Import all SDF files in a directory

This script find and import all SDF files in a directory in one run. You can specify the path and filename mask(*.sdf by default) in --- edit these settings --- script section. There is also a dedicated forum topic.


/** Multiple SDF file importer
* 
* Imports all SDF files in a given source directory and its all subdirectories.
* Automaticaly read all fields in SDF file and crete new String fields if necessary.
* All fields are save as Strings regardless its actual contents(integer, decimal number etc.)
*
* Usage:
* 1. Edit source directory containing SDF files
* 2. Run script
*
* @author Filip Zimandl <[email protected]>
*/

import chemaxon.formats.MolImporter
import chemaxon.struc.Molecule
import chemaxon.marvin.io.MPropHandler
import groovy.io.FileType

// ---------- edit these settings ----------------------------------------------------
String structureFieldName = 'Structure' // the name for the structure field
def pattern = ~/.*\.sdf/ // pattern for file to process
def sourceDir = new File('/Users/filip/Downloads/testScript') // dir to start at
// ---------- edit end section -------------------------------------------------------
def entity = dataTree.rootVertex.entity
def edp = entity.schema.dataProvider.getEntityDataProvider(entity)
def structureField = entity.fields.items.find { it.name == structureFieldName }
def schema = dataTree.schema
def traverse

def lock = schema.lockable.obtainLock('create the new field')
def envRW = EnvUtils.createDefaultEnvironmentRW(lock, 'creating the new field', true)

//loading single sdf file procedure
def loadFile = { file ->
    def bytes = file.toString()
    MolImporter importer = new MolImporter(bytes)
    importer.grabbingEnabled = true
    Molecule mol = new Molecule()

    importer.read(mol)
    println mol.getPropertyCount()
    def list = []
    def fields = []
    def values = [:]
    
    while (importer.read(mol)) {
        values[structureField.id] = importer.grabbedMoleculeString
        for (i = 0; i < mol.getPropertyCount(); i++) {
                
            //get fields in file
            list.add(mol.getPropertyKey(i))
            println mol.getPropertyKey(i)
                
            //create fields in entity if does not exist
            def textField = entity.fields.items.find { it.name == list[i] }
            if (textField == null) {
                textField = DFFields.createTextField(entity, list[i], list[i], 1024, envRW)
                fields.add(textField)
            } else {
                fields.add(textField)
            }
                
            //read values from file
            String property = MPropHandler.convertToString(mol.properties(), list[i]);
               
            //prepare map of fields and values
            values[textField.id] = property
        }
        
        //inserting row
        def insLock = edp.lockable.obtainLock('Inserting')
        def insEnvRW = EnvUtils.createDefaultEnvironmentRW(insLock, 'Inserting', true)
        try {
            edp.insert(values, null, insEnvRW)
        } finally {
            insLock?.release()
            insEnvRW?.feedback.finish()
        }
    }
}

traverse = { dir ->
    dir.eachFileMatch(FileType.FILES, pattern) { file ->
        loadFile(file)
    }
    dir.eachDir(traverse)
}

try {
    traverse(sourceDir)
} finally {
    envRW?.feedback.finish()
    lock?.release()    
}

Versions: This script has been tested on IJC version 5.11 and 6.0



Copyright © 1999-2012 ChemAxon Ltd.    All rights reserved.