Table Standardizer

This script takes a tables of structures and applies a pre-defined standardizer XML. The resulting structures are output into a new BLOB column, as well as the index and name of standardizer tasks into two new columns. However, the user must change the new BLOB column to be a structure rendered after the script has completed. Doing this automatically from within a script requires calling the column XML file and performing a complicated replacement. This is currently in development. However, there is a pop up window at the end of the script which prompts the user to do so.


/** Table Standardizer
 *
 * @author Erin Bolstad ([email protected])
 * Dec 2011
 */
import chemaxon.formats.MolExporter
import chemaxon.reaction.Standardizer
import com.im.commons.db.ddl.DBDatabaseInfo.ColumnSQLType;
import com.im.commons.progress.*
import com.im.df.api.capabilities.*
import com.im.df.api.support.*
import com.im.df.api.util.DIFUtilities
import groovy.swing.SwingBuilder
import java.awt.*
import javax.swing.*

def ety = dataTree.rootVertex.entity
def edp = ety.schema.dataProvider.getEntityDataProvider(ety)
def rows = edp.getRowCount(env)
def rs = ety.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def ids = parentVS.ids
println "Entity has $rows rows"

// Prompt for name of structure column and output column
def colInputs = new SwingBuilder()
colInputs.setVariable('properties',[:])
def vars = colInputs.variables
def frame = colInputs.dialog(title:'Column Constructor', modal:true) {
    panel () {
        gridBagLayout()
        label(text:"Use this window to build or specify the input and output columns of the standardizing script.", constraints:gbc(
                gridy:0,
                gridwidth:2,
                insets:[20,10,0,10]))
        label(text:"You will then be prompted for a standardization configuration XML", constraints:gbc(
                gridy:1,
                gridwidth:2,
                insets:[5,10,30,10]))
        label(text:"Input existing structure column name", constraints:gbc(
                gridx:0,
                gridy:2,
                insets:[2,10,2,10]))
        textField(id:'origStruc', text:"Structure", name:"Original Structure", constraints:gbc(
                gridx:1,
                gridy:2,
                fill:GridBagConstraints.HORIZONTAL,
                insets:[2,10,2,10]))
        label(text:"Check the box if the column needs to be created, otherwise, enter the existing name", constraints:gbc(
                gridwidth:2,
                gridy:5,
                insets:[28,10,5,10]))
        checkBox(id:'ssCheck', text:"Standardized Structures",
                horizontalTextPosition: SwingConstants.LEFT, constraints:gbc(
                        gridx:0,
                        gridy:6,
                        anchor:LINE_END,
                        insets:[2,10,2,10]))
        textField(id:'ssName', text:"Standardized Structure", constraints:gbc(
                gridx:1,
                gridy:6,
                fill:GridBagConstraints.HORIZONTAL,
                insets:[2,10,2,10]))
        checkBox(id:'apTaskIndexCheck', text:"Applied Task Indices",
                horizontalTextPosition: SwingConstants.LEFT, constraints:gbc(
                        gridx:0,
                        gridy:7,
                        anchor:LINE_END,
                        insets:[2,10,2,10]))
        textField(id:'apTaskIndexName', text:"Applied Task Indicies", constraints:gbc(
                gridx:1,
                gridy:7,
                fill:GridBagConstraints.HORIZONTAL,
                insets:[2,10,2,10]))
        checkBox(id:'apTaskCheck', text:"Applied Tasks",
                horizontalTextPosition: SwingConstants.LEFT, constraints:gbc(
                        gridx:0,
                        gridy:8,
                        anchor:LINE_END,
                        insets:[2,10,2,10]))
        textField(id:'apTaskName', text:"Applied Tasks", constraints:gbc(
                gridx:1,
                gridy:8,
                fill:GridBagConstraints.HORIZONTAL,
                insets:[2,10,2,10]))
        button(id:'run', label: "Standardize!", constraints:gbc(
                gridx:0,
                gridy:9,
                anchor:LINE_END,
                insets:[10,0,10,0]),
                actionPerformed: {
                    vars.buttonResults = 'ok'
                    dispose()})
        button(id:'cancel', label: "Cancel", constraints:gbc(
                gridx:1,
                gridy:9,
                anchor:LINE_START,
                insets:[10,0,10,0]),
                actionPerformed: {
                    vars.buttonResults = 'quit'
                    dispose()})
    }
}
frame.pack()
frame.setLocationRelativeTo(null)
frame.show()

def chosenAction = vars.buttonResults
if (chosenAction == 'quit') {
    return
}

if (chosenAction == 'ok') {
    origStrucCol = vars.origStruc.text
    ssColCheck = vars.ssCheck.selected
    ssColName = vars.ssName.text
    apTaskIndxCheck = vars.apTaskIndexCheck.selected
    apTaskIndexColName = vars.apTaskIndexName.text
    apTaskColCheck = vars.apTaskCheck.selected
    apTaskColName = vars.apTaskName.text
}

// Create the fields if need be
def lock = DIFUtilities.getLockable(dataTree).obtainLock('Standardizing')
def envRW = EnvUtils.createDefaultEnvironmentRW(lock, 'Standardizing', true)

def fldMol = ety.fields.items.find { it.name == origStrucCol }
try {
    if (!ssColCheck) {
        FLDSTSTRUC = ety.fields.items.find { it.name == ssColName }
        if (FLDSTSTRUC == null) {
            print "Column $ssColName not found! Quitting...\n"
            return
        }
    } else {
        def Class[] req1 = [ DBFieldCapability.class, DFFieldTextCapability.class ]
        def nt = DIFUtilities.findFirstAppropriateNewType(ety.getFields().getNewTypes(), false, req1, new Class[0]);
        nt.options.newDFItemNameSafe = ssColName
        def col = nt.options.columns[0];
        def nativeTypeBlob = null;

        // search for BLOB native type
        for (def nativeType : col.allNativeTypeDefinitions) {
            if (nativeType.columnType == ColumnSQLType.BLOB) {
                nativeTypeBlob = nativeType;
            }
        }
        if (nativeTypeBlob != null) {
            // resize default BLOB native type and assign it to column type definition
            col.nativeTypeDefinition = nativeTypeBlob.reSize(20000);
        }
        def newBinField = nt.create(envRW).iterator().next()
        FLDSTSTRUC = ety.fields.items.find { it.name == ssColName }
    }

    if (!apTaskColCheck) {
        FLDAPTASKS = ety.fields.items.find { it.name == apTaskColName }
        if (FLDAPTASKS == null) {
            print "Column $apTaskColName not found! Quitting... \n"
            return
        }
    } else {
        def Class[] req = [ DFFieldTextCapability.class ]
        nt = DIFUtilities.findFirstAppropriateNewType(ety.getFields().getNewTypes(), false, req, new Class[0]);
        nt.options.newDFItemNameSafe = apTaskColName
        assert nt.options.valid : 'Error: ' + nt.options.errorMessage
        def appliedTasksField = nt.create(envRW).iterator().next()
        FLDAPTASKS = ety.fields.items.find { it.name == apTaskColName }
    }

    if (!apTaskIndxCheck) {
        FLDAPTASKINDEX = ety.fields.items.find { it.name == apTaskIndexColName }
        if (FLDAPTASKINDEX == null) {
            Print "Column $apTaskIndexColName not found! Quitting... \n"
            return
        }
    } else {
        def Class[] req2 = [ DFFieldTextCapability.class ]
        nt = DIFUtilities.findFirstAppropriateNewType(ety.getFields().getNewTypes(), false, req2, new Class[0]);
        nt.options.newDFItemNameSafe = apTaskIndexColName
        assert nt.options.valid : 'Error: ' + nt.options.errorMessage
        def appliedTaskIndex = nt.create(envRW).iterator().next()
        FLDAPTASKINDEX = ety.fields.items.find { it.name == apTaskIndexColName }
    }

    // Input the standardizer XML
    def chooser = new JFileChooser()
    chooser.setDialogTitle('Select XML file containing the standardization rules')
    if (chooser.showOpenDialog(null)==JFileChooser.APPROVE_OPTION) {
        File fileName = chooser.getSelectedFile()
        FILENAME = fileName
        NAME = fileName.getCanonicalPath()
    } else {
        return }

    def standFile = FILENAME
    print "Standardization file is $standFile \n"

}finally{
    lock?.release()
    envRW?.feedback.finish()
}

def lock2 = edp.lockable.obtainLock('inserting data')
def envRW2 = EnvUtils.createDefaultEnvironmentRW(lock2, 'Standardizing', true)

try {
    def standFile = FILENAME
    def fldStStruc = FLDSTSTRUC
    def fldApTasks = FLDAPTASKS
    def fldApTaskIndex = FLDAPTASKINDEX

    ids.each { id ->
        // Get mol from table
        def molData = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
        getMol = molData[id][fldMol.id]
        nativeMol = getMol.getNative()

        Standardizer standardizer = new Standardizer(standFile)
        standardizer.setFinalClean()

        def molStr = MolExporter.exportToFormat(nativeMol, "mol")
        vals = [(fldStStruc.id):molStr]

        String [] stanIDs = standardizer.getAppliedTaskIDs()
        String results
        n=0
        for (String item: stanIDs) {
            if (n==0) {
                results = item
                n++
            } else{
                results = results + ', ' + item
            }}
        vals.putAt(fldApTasks.id, results)

        String [] stanIndex = standardizer.getAppliedTaskIndexes()
        String resultsidx
        m=0
        for (String item: stanIndex) {
            if (m==0) {
                resultsidx = item
                m++
            } else {
                resultsidx = resultsidx + ', ' + item
            }}
        vals.putAt(fldApTaskIndex.id, resultsidx)

        DFUpdateDescription ud = DFUpdateDescription.create(ety, id, vals)
        submitList = new Collections ()
        submitList = submitList.singletonList(ud)
        edp.update(submitList, DFUndoConfig.OFF, envRW2)
        print "Finished standardizing ID $id \n"
    }
}finally{
    lock2?.release()
    envRW2?.feedback.finish()
}

def message = "Standardization of table is complete! \n \n You will need to convert the standardized structure column to a structure renderer: \n Right click on the column title and select Customize Widget Settings. \n In the table that pops up, Column Properties will be open. Change 'Cell Renderer' to 'Structure Renderer' \n \n For ease of reading, you may want to change the Applied Tasks/ID columns to a multi-line renderer."
JOptionPane.showMessageDialog(null, message, "Finished!", JOptionPane.INFORMATION_MESSAGE)
  
        

Versions: This script has been tested on IJC versions 5.7 and 6.0



Copyright © 1999-2012 ChemAxon Ltd.    All rights reserved.