Simple SDF Exporter

This script exports data from a structure entity into a SD file. This can also be done via the export wizard, but the basic script provides an opportunity for customisation. Possibilities include exporting multiple times, with certain constraints, adding additional data, or exporting on conditions.

Defaults for this script are set for the Pubchem demo data tree in the sample project included in IJC. The variables in the 'edit these settings' section will need changing for your data.


/** Export data to a SD file
 *
 * Usage:
 * 1. create this script for a data tree in the project explorer. The parent entity will be a structure entity
 * 2. edit the variables in the 'edit these settings' section
 * 3. run a query to locate the rows you want to export (or do a 'Show All')
 * 4. execute the script
 *
 *
 * @author Tim Dudgeon ([email protected])
 */

import com.im.commons.progress.*
import chemaxon.formats.MolExporter
import chemaxon.struc.Molecule

// ---------- edit these settings ----------------------------------------------------

def FIELDS_FROM_PARENT = [ 'Formula', 'Donors', 'Acceptors'] // list of field names to export
def STRUCTURE_FIELD = 'Structure' // field name of the structure field
def FILE_NAME = 'C:/tmp/export.sdf' // name of the file to create
def FIELD_NAMES = [ 'Donors' : 'HBD', 'Acceptors' : 'HBA'] // rename some fields in the SD file output

// ------------probably no need to edit anything below here ---------------------------

// root entity
def parent = dataTree.rootVertex.entity

// ID field
def fldId = parent.idField
println "found ID field ${fldId.id}"

// mol field
def fldMol = parent.fields.items.find { it.name == STRUCTURE_FIELD }
println "found MOL field ${fldMol.id}"

// data fields from parent
def fieldsFromParent = [ ]
FIELDS_FROM_PARENT.each { name ->
    def fld = parent.fields.items.find { it.name == name }
    if (fld) {
        fieldsFromParent << fld
        println "Found parent field ${fld.id} for $name"
    } else {
        println "WARNING: field $name not found"
    }
}

// ResultSet and VertexStates
def rs = parent.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def ids = parentVS.ids
println "Found $ids.size parent IDs to export"

// now read the data
def good = 0
def bad = 0
def exporter = new MolExporter(FILE_NAME, 'sdf')

try {
    ids.each { id ->

        // stop if the script is terminated
        if (env.getFeedback().isCancelled()) {
            def msg = "Exporting data to file $FILE_NAME interupted!"
            println msg
            throw new InterruptedException(msg)
        }

        try {
            def data = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
            // get the mol
            def mol = data[id][fldMol.id]
            // get the other fields
            def values = [ : ]
            fieldsFromParent.each {
                values.put(it, data[id][it.id])
            }

            println "Exporting ID $id"

            def expMol
            // work with a clone so we don't alter the original
            if (!mol || !mol.native ) {
                expMol = new Molecule()
            } else {
                expMol = mol.native.cloneMolecule()
            }
            values.each { k,v ->
                if (v != null) {
                    def pName = (FIELD_NAMES[k.name] == null ? k.name : FIELD_NAMES[k.name])
                    expMol.setProperty(pName, v.toString())
                }
            }
            exporter.write(expMol)

            good++

        } catch (Exception exc) {
            println "EROROR Failed to load ID $id ${exc.toString()}"
            bad++
        }
    }
} finally {
    exporter.flush()
    exporter.close()
}
println "Finished exporting data to file $FILE_NAME"
println "good: $good bad: $bad"



Versions: This script has been tested on IJC versions 5.6 and 5.7 and 6.0



Copyright © 1999-2012 ChemAxon Ltd.    All rights reserved.