Export Related Compounds to a SDF

This script is designed to export relational data where the structure is in a child entity and in a one-to-one or many-to-one relationship with respect to the parent entity. If many-to-one the structures will be duplicated in the resulting SD file.


/** Export relational data to a SD file
 *
 * Usage:
 * 1. create this script for a data tree in the project explorer. The parent entity
 * will be a standard entity and it will contain a child structure entity.
 * 2. edit the variables in the 'edit these settings' section
 * 3. run a query to locate the rows you want to export (or do a 'Show All')
 * 4. execute the script
 *
 * Defaults for this script are set for the Wombat (activities view) data tree in the
 * sample project included in IJC. The variables in the 'edit these settings' section
 * will need changing for your data.
 *
 * @author Tim Dudgeon ([email protected])
 */

import com.im.commons.progress.*
import chemaxon.formats.MolExporter
import chemaxon.struc.Molecule

// ---------- edit these settings ----------------------------------------------------

def FIELDS_FROM_PARENT = [ 'TYPE', 'VALUE', 'TARGET.NAME'] // list of field names from the parent entity to export
def MOL_ENTITY_NAME =  'Wombat structures' // name of the child entity with the structures
def STRUCTURE_FIELD = 'Structure' // field name of the structure field
def FIELDS_FROM_CHILD = [ 'CdId', 'Formula' ] // list of field names from the child entity to export. Do not include structure field
def FILE_NAME = 'C:/tmp/export.sdf' // name of the file to create
def FIELD_NAMES = [ 'TARGET.NAME' : 'Target', 'CdId' : 'CPD_ID'] // rename some fields in the SD file output

// ------------probably no need to edit anything below here ---------------------------

// root entity
def parent = dataTree.rootVertex.entity

// ID field
def fldId = parent.idField
println "found ID field ${fldId.id}"

// find the child entity with the mols
def molEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == MOL_ENTITY_NAME }
def molEntity = molEdge.destination.entity
def fldFK = molEdge.relationshipDir.srcField
println "Found child entity: ${molEntity}"
println "Found FK field ${fldFK.id}  ${fldFK.name}"
// mol field
def fldMol = molEntity.fields.items.find { it.name == STRUCTURE_FIELD }
println "found MOL field ${fldMol.id}"

// data fields from parent
def fieldsFromParent = [ ]
FIELDS_FROM_PARENT.each { name ->
    def fld = parent.fields.items.find { it.name == name }
    if (fld) {
        fieldsFromParent << fld
        println "Found parent field ${fld.id} for $name"
    } else {
        println "WARNING: field $name not found"
    }
}

// data fields from child
def fieldsFromChild = [ ]
FIELDS_FROM_CHILD.each { name ->
    def fld = molEntity.fields.items.find { it.name == name }
    if (fld) {
        fieldsFromChild << fld
        println "Found child field ${fld.id} for $name"
    } else {
        println "WARNING: field $name not found"
    }
}

// ResultSet and VertexStates
def rs = parent.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def molVS = rs.getVertexState(molEdge.destination)
def ids = parentVS.ids
println "Found $ids.size parent IDs to export"

// now read the data
def good = 0
def bad = 0

def exporter = new MolExporter(FILE_NAME, 'sdf')

try {
    ids.each { id ->

        // stop if the script is terminated
        if (env.getFeedback().isCancelled()) {
            def msg = "Exporting data to file $FILE_NAME interupted!"
            println msg
            throw new InterruptedException(msg)
        }

        try {
            def data = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
            def values = [ : ]
            fieldsFromParent.each {
                values.put(it, data[id][it.id])
            }

            def mol = null
            def childIDs = molVS.getIdsForParentId(id, DFEnvironmentRO.DEV_NULL)
            if (childIDs.size() > 0) {
                def childID = childIDs[0]
                def molData = molVS.getData([childID], DFEnvironmentRO.DEV_NULL)
                mol = molData[childID][fldMol.id]
                fieldsFromChild.each {
                    values.put(it, molData[childID][it.id])
                }
            }

            println "Exporting ID $id"

            def expMol
            // work with a clone so we don't alter the original
            if (!mol || !mol.native ) {
                expMol = new Molecule()
            } else {
                expMol = mol.native.cloneMolecule()
            }
            values.each { k,v ->
                if (v) {
                    def pName = (FIELD_NAMES[k.name] == null ? k.name : FIELD_NAMES[k.name])
                    expMol.setProperty(pName, v.toString())
                }
            }
            exporter.write(expMol)

            good++

        } catch (Exception exc) {
            println "EROROR Failed to load ID $id ${exc.toString()}"
            bad++
        }
    }
} finally {
    exporter.flush()
    exporter.close()
}

println "Finished exporting data to file $FILE_NAME"
println "good: $good bad: $bad"

Versions: This script has been tested on IJC versions 5.6



Copyright © 1999-2012 ChemAxon Ltd.    All rights reserved.