Summary: This script cycles through a structures table and decides if a structure has a pka that lies between pH 4.5 and 9. If so, it generates the microspecies and inserts it into another table, as well as the original. It also inserts the pH of the maximum occupation of that microspecies, and which pH range it fell in (pH 4.5 - 9.5 split into different ranges).
This script shows the use of chemJEP and Evaluator for calling chemical terms from Groovy.
/* Microspecies Populating Script * * Usage: This script needs two tables connected via a one to many relationship, with the microspecies table as the child. * * @author Erin Bolstad ([email protected]) * Dec 2011 */ import com.im.commons.progress.* import com.im.df.api.chem.MarvinStructure import chemaxon.jep.Evaluator import chemaxon.jep.context.MolContext import chemaxon.formats.MolExporter def ety = dataTree.rootVertex.entity def rs = ety.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL) def parentVS = rs.getVertexState(dataTree.rootVertex) def molIDs = parentVS.getIds() def molFld = ety.fields.items.find { it.name == "Structure" } def protoEdge = dataTree.rootVertex.edges.find { it.destination.entity.name == 'Microspecies' } def protoEntity = protoEdge.destination.entity def protoEdp = protoEntity.schema.dataProvider.getEntityDataProvider(protoEntity) def protoIdFld = protoEntity.fields.items.find { it.name == 'ID' } def protoHiFld = protoEntity.fields.items.find { it.name == 'PH_HI' } def protoMidFld = protoEntity.fields.items.find { it.name == 'PH_MID' } def protoLoFld = protoEntity.fields.items.find { it.name == 'PH_LO' } def protoMaxOcFld = protoEntity.fields.items.find { it.name == 'PH_MAX_OCCUPANCY' } def protoStFld = protoEntity.fields.items.find { it.name == 'Structure' } def protoRefFld = protoEntity.fields.items.find { it.name == 'PH_REF' } def lock = protoEdp.lockable.withLock('inserting data'){ envRW -> i=0 molIDs.each { id -> //Fetch in the molecule from the structures table def molData = parentVS.getData([id], DFEnvironmentRO.DEV_NULL) getMol = molData[id][molFld.id] nativeMol = getMol.getNative() def nativeStr = MolExporter.exportToFormat(nativeMol, "smiles") def pkaArray = [] // Calculate the first acidic pKa Evaluator evaluator = new Evaluator() chemJEP = evaluator.compile("pKa('acidic','1')", MolContext.class) MolContext context = new MolContext() context.setMolecule(nativeMol) aresult = chemJEP.evaluate(context) String nanCheck = aresult n=2 // pKa evaluator will return NaN if no more pKas to calculate, cycle through pKas till 9.5 hit or till NaN is returned while ((aresult < 9.5) && (nanCheck !="NaN")){ pkaArray << aresult chemJEP = evaluator.compile("pKa('acidic','$n')", MolContext.class) context.setMolecule(nativeMol) aresult = chemJEP.evaluate(context) nanCheck = aresult n++ } //Calculate the first basic pKa chemJEP = evaluator.compile("pKa('basic','1')", MolContext.class) context.setMolecule(nativeMol) bresult = chemJEP.evaluate(context) nanCheck = bresult n=2 //Cycle through basic pKas until 4.5 hit or Nan is returned while ((nanCheck != "NaN") && (bresult > 4.5)) { pkaArray << bresult chemJEP = evaluator.compile("pKa('basic','$n')", MolContext.class) context.setMolecule(nativeMol) bresult = chemJEP.evaluate(context) nanCheck = bresult n++ } i++ vals = [:] // Build insert array with original molecule for insert vals = [(protoStFld.id):getMol] vals.putAt(protoRefFld.id, true) vals.putAt(protoIdFld.id, id) //Insert the row into Microspecies table protoEdp.insert(vals, null, envRW) print "Inserted native structure \n" arraySize = pkaArray.size() // If there is a pKa in the array of pKas calculated between 4.5 and 9, enter the loop. if (arraySize > 0) { //Sort array of pKas sortedArray = pkaArray.sort() // Get the pKas at the halfway points for 4.5 and pKa1, pKax and 9 for microspecies calculation speciesPkas = [:] m=0 s= arraySize - 1 msLo = ((sortedArray[0] - 4.5) / 2) + 4.5 msHi = ((9.5 - sortedArray[s]) / 2) + sortedArray[s] // Calculate the SMILES for the microspecies at the pKa chemJEP = evaluator.compile("majorMicrospecies('$msLo')", MolContext.class) context.setMolecule(nativeMol) mLoresult = chemJEP.evaluate(context) nativeStr = MolExporter.exportToFormat(nativeMol, "smiles") def loMolStr = MolExporter.exportToFormat(mLoresult, "smiles") chemJEP = evaluator.compile("majorMicrospecies('$msHi')", MolContext.class) context.setMolecule(nativeMol) msHiresult = chemJEP.evaluate(context) nativeStr = MolExporter.exportToFormat(nativeMol, "smiles") hiMolStr = MolExporter.exportToFormat(msHiresult, "smiles") // Check that this microspecies is not the same one as the input molecule if (nativeStr != loMolStr) { def loMvMol = new MarvinStructure(loMolStr, false) speciesPkas.put(msLo, loMvMol) } if (nativeStr != hiMolStr) { def hiMvMol = new MarvinStructure(hiMolStr, false) speciesPkas.put(msHi, hiMvMol) } m++ // For arrays with more than one pKa, calculate the species at the halfway points of each pKa while (m < arraySize) { r = m - 1 msX = ((sortedArray[m] - sortedArray[r]) / 2) + sortedArray[r] chemJEP = evaluator.compile("majorMicrospecies('$msX')", MolContext.class) context.setMolecule(nativeMol) msXresult = chemJEP.evaluate(context) nativeStr = MolExporter.exportToFormat(nativeMol, "smiles") xMolStr = MolExporter.exportToFormat(msXresult, "smiles") if (nativeStr != xMolStr) { def xMvMol = new MarvinStructure(xMolStr, false) speciesPkas.put(msX, xMvMol) } m++ } //For each microspecies, determine which pH range it falls in and flag it. Put the max occupancy pH in the array. speciesPkas.each { maxph, struc -> vals = [(protoStFld.id):struc] vals.putAt(protoIdFld.id, id) def flMaxph = maxph.floatValue() if ((4.5 < maxph) && (maxph < 7)){ vals.putAt(protoLoFld.id, true) } if ((5.75 < maxph) && (maxph < 8.25)){ vals.putAt(protoMidFld.id, true) } if ((7 < maxph) && (maxph < 9.5)){ vals.putAt(protoHiFld.id, true) } vals.putAt(protoMaxOcFld.id, flMaxph) // Insert the microspecies protoEdp.insert(vals, null, envRW) print "Inserted microspecies \n" } } } }
Versions: This script has been tested on IJC versions 6.0