Select Representative Member of Clusters

Summary: This script works on a structure table in which several compounds are assigned to a cluster number. Once a table is sorted on some criteria (activity, for example), this script moves through the table taking the first instance of each cluster - effectively picking the cluster member with the highest activity. It then reports these IDs to a new list with the name of your choosing.

This script demonstrates the creation and building of a list object.


/** Fetch ID of the lowest affinity structure from each cluster
 *
 * Usage:
 * 1. Edit the CLUSTER_N_FIELD value below with the name of the column containing the cluster group number.
 * 2. Sort the grid by affinity from low to high
 * 3. Run the script
 *
 * @author Erin Bolstad ([email protected])
 * Dec 2011
 */

import javax.swing.*

// Edit the value below to the column with the cluster assignment

def CLUSTER_N_FIELD = 'DB name'

def parent = dataTree.rootVertex.entity
def rs = parent.schema.dataProvider.getDefaultResultSet(dataTree, false, DFEnvironmentRO.DEV_NULL)
def parentVS = rs.getVertexState(dataTree.rootVertex)
def ids = parentVS.ids
def fldCN = parent.fields.items.find { it.name == CLUSTER_N_FIELD }

def list
def createdList

def updateList = { envRW ->
    def values = createdList.getValues()
    values.add(NEW_VALUE)
    createdList.setValues(values, envRW)
}

// Get the last list in the Permanent lists list

parent.schema.userLockable.withLock('updating list') { envRW ->

    Object[] inputOptions = null
    String inputName = JOptionPane.showInputDialog( null, \
    "Enter a name for the list of Cluster representatives",\
    "List Name", \
    JOptionPane.PLAIN_MESSAGE, \
    null, inputOptions, "Cluster Reps")

    if ((inputName != null) && (inputName.length() > 0)) {
        USER_LIST_NAME = inputName
        print "string accepted \n"
    } else {
        setLabel("Can't do a blank list name, try again!")
        print "string not accepted"}

    def firstValues = ["n"]
    def nt = parent.getLists().getNewTypes().get(0);
    def options = nt.getOptions();
    options.setField(parent.getIdField());
    options.setValues(firstValues);
    options.setNewDFItemNameSafe(USER_LIST_NAME);
    createdList = nt.create(envRW).iterator().next()

    println "Checking for new Cluster Numbers in the sorted grid"

    // Cycle through the rows and pick out the instances
    def clusterList = []
    ids.each { id ->
        def data = parentVS.getData([id], DFEnvironmentRO.DEV_NULL)
        def clusterNum = data[id][fldCN.id]
        if (!clusterList.contains(clusterNum)) {
            NEW_VALUE = id
            clusterList.add(clusterNum)
            print "ID $id is representative of cluster $clusterNum. Added to list \n"
            updateList(envRW)
        }
    }
}

println "Finished searching grid"

Versions: This script has been tested on IJC versions 6.0.



Copyright © 1999-2013 ChemAxon Ltd.    All rights reserved.