This script will recursively search a directory for PDF files and load them into the structure entity at the root of the data tree.
/** * PDF file loader. * Trawls a directory and all its subdirectories, and looks for PDF files. * Extracts the individual molecules from the file and loads them into the * structure entity at the root of the data tree. * * Usage: * 1. create a structure entity in the project explorer * 2. Add a fields named 'Filename' and 'Name' * 3. Edit the settings in the 'adjust these variables' section (the defaults are for the * Pubchem demo data tree in the sample project) * 4. Run the script * * @author Tim Dudgeon */ import groovy.io.FileType import chemaxon.formats.MolImporter import com.im.commons.progress.* import com.im.df.api.chem.MarvinStructure // --------- adjust these variables -------------- def pattern = ~/.*\.pdf/ // pattern for file to process def root = new File('C:/Documents/chemaxon/pdfs') // dir to start at def STRUCTURE_FIELD = 'Structure' // name of structure field def FILE_FIELD = 'Filename' // name of file field def NAME_FIELD = 'Name' // name of the name field // ---------- end of variables ------------------- def structF def filenameF def nameF def edp def traverse // ---------- this is the routine that process the file and loads it def perform = { file, envRW -> println "processing file $file" MolImporter importer = new MolImporter(file, "pdf") def mol = null int count = 0 while (mol = importer.read()) { count++ println "loading $count $mol" def vals = [ (structF.id) : new MarvinStructure(mol), (filenameF.id) : file.path, (nameF.id) : mol.name ] edp.insert(vals, null, envRW) } } def ety = dataTree.rootVertex.entity edp = ety.schema.dataProvider.getEntityDataProvider(ety) structF = ety.fields.items.find { it.name == STRUCTURE_FIELD } filenameF = ety.fields.items.find { it.name == FILE_FIELD } nameF = ety.fields.items.find { it.name == NAME_FIELD } println "Found fields ${structF.id} and ${filenameF.id}" traverse = { dir -> println "Looking at dir $dir" dir.eachFileMatch(FileType.FILES, pattern) { file -> // stop if the script is terminated if (env.getFeedback().isCancelled()) { def msg = "Importing molecules from $root interupted!" println msg throw new InterruptedException(msg) } edp.lockable.withLock('loading') { envRW -> perform(file, envRW) } } dir.eachDir(traverse) } // start the process off traverse(root)
Versions: This script has been tested on IJC version 6.0