Export Related Compounds to a SDF

This script is designed to export relational data where the structure is in a child entity and in a one-to-one or many-to-one relationship with respect to the parent entity. If many-to-one the structures will be duplicated in the resulting SD file.

/** Export relational data to a SD file
*
* Usage:
* 1. create this script for a data tree in the project explorer. The parent entity
* will be a standard entity and it will contain a child structure entity.
* 2. edit the variables in the 'edit these settings' section
* 3. run a query to locate the rows you want to export (or do a 'Show All')
* 4. execute the script
*
* Defaults for this script are set for the Wombat (activities view) data tree in the
* sample project included in IJC. The variables in the 'edit these settings' section
* will need changing for your data.
*
* @author Tim Dudgeon ([email protected])
*/

import com.im.commons.progress.*
import chemaxon.formats.MolExporter
import chemaxon.struc.Molecule

// ---------- edit these settings ----------------------------------------------------

def FIELDS_FROM_PARENT = [ 'TYPE', 'VALUE', 'TARGET.NAME'] // list of field names from the parent entity to export
def MOL_ENTITY_NAME = 'Wombat structures' // name of the child entity with the structures
def STRUCTURE_FIELD = 'Structure' // field name of the structure field
def FIELDS_FROM_CHILD = [ 'CdId', 'Formula' ] // list of field names from the child entity to export. Do not include structure field
def FILE_NAME = 'C:/tmp/export.sdf' // name of the file to create
def FIELD_NAMES = [ 'TARGET.NAME' : 'Target', 'CdId' : 'CPD_ID'] // rename some fields in the SD file output

// ------------probably no need to edit anything below here ---------------------------

// root entity
def parent = dataTree.rootVertex.entity

// ID field
def fldId = parent.idField
println "found ID field ${fldId.id}"

// find the child entity with the mols
def molEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == MOL_ENTITY_NAME }
def molEntity = molEdge.destination.entity
def fldFK = molEdge.relationshipDir.srcField
println "Found child entity: ${molEntity}"
println "Found FK field ${fldFK.id} ${fldFK.name}"
// mol field
def fldMol = molEntity.fields.items.find { it.name == STRUCTURE_FIELD }
println "found MOL field ${fldMol.id}"

// data fields from parent
def fieldsFromParent = [ ]
FIELDS_FROM_PARENT.each { name ->
def fld = parent.fields.items.find { it.name == name }
if (fld) {
fieldsFromParent << fld
println "Found parent field ${fld.id} for $name"
} else {
println "WARNING: field $name not found"
}
}

// data fields from child
def fieldsFromChild = [ ]
FIELDS_FROM_CHILD.each { name ->
def fld = molEntity.fields.items.find { it.name == name }
if (fld) {
fieldsFromChild << fld
println "Found child field ${fld.id} for $name"
} else {
println "WARNING: field $name not found"
}
}

// ResultSet and VertexStates
def rs = parent.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def molVS = rs.getVertexState(molEdge.destination)
def ids = parentVS.ids
println "Found $ids.size parent IDs to export"

// now read the data
def good = 0
def bad = 0

def exporter = new MolExporter(FILE_NAME, 'sdf')

try {
ids.each { id ->

// stop if the script is terminated
if (env.getFeedback().isCancelled()) {
def msg = "Exporting data to file $FILE_NAME interupted!"
println msg
throw new InterruptedException(msg)
}

try {
def data = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
def values = [ : ]
fieldsFromParent.each {
values.put(it, data[id][it.id])
}

def mol = null
def childIDs = molVS.getIdsForParentId(id, DFEnvironmentRO.DEV_NULL)
if (childIDs.size() > 0) {
def childID = childIDs[0]
def molData = molVS.getData([childID], DFEnvironmentRO.DEV_NULL)
mol = molData[childID][fldMol.id]
fieldsFromChild.each {
values.put(it, molData[childID][it.id])
}
}

println "Exporting ID $id"

def expMol
// work with a clone so we don't alter the original
if (!mol || !mol.native ) {
expMol = new Molecule()
} else {
expMol = mol.native.cloneMolecule()
}
values.each { k,v ->
if (v) {
def pName = (FIELD_NAMES[k.name] == null ? k.name : FIELD_NAMES[k.name])
expMol.setProperty(pName, v.toString())
}
}
exporter.write(expMol)

good++

} catch (Exception exc) {
println "EROROR Failed to load ID $id ${exc.toString()}"
bad++
}
}
} finally {
exporter.flush()
exporter.close()
}

println "Finished exporting data to file $FILE_NAME"
println "good: $good bad: $bad"

Versions: This script has been tested on IJC versions 5.6

Copyright © 1999-2012 ChemAxon Ltd.    All rights reserved.