package d2sguide;

import java.io.*;
import java.util.*;

import chemaxon.naming.DocumentExtractor;
import chemaxon.naming.DocumentExtractor.Hit;

import chemaxon.formats.MolExporter;

import org.apache.http.client.ResponseHandler;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;


public class Demo5 {

    private static String loadURL(String url) throws Exception {
        
        HttpClient httpclient = new DefaultHttpClient();
        try {
            HttpGet httpget = new HttpGet(url);

            ResponseHandler<String> responseHandler = new BasicResponseHandler();
            String responseBody = httpclient.execute(httpget, responseHandler);
            return responseBody;

        } finally {
            httpclient.getConnectionManager().shutdown();
        }
    }
     

    public static void main(String[] args) throws Exception {

        String pagecontent = loadURL("http://en.wikipedia.org/wiki/Penicillin");
            
            
        //process the page and get the results
        DocumentExtractor x = new DocumentExtractor();
        x.processHTML(new StringReader(pagecontent));
        List<Hit> hits = x.getHits();
      
        
        //MolExporter exporter = new MolExporter("test.sdf", "sdf");
        MolExporter exporter = new MolExporter("test.mrv", "mrv");
        
        try {
            for (Hit hit : hits) {
                hit.structure.setProperty("hit text", hit.text);                
                hit.structure.setProperty("hit position", "" + hit.position);
                
                //hit.structure.setProperty("pagenumber", "" + hit.getPageNumber());
                //hit.structure.setProperty("corrected text", hit.structure.getName());
                exporter.write(hit.structure);
            }
        } finally {
            exporter.flush();
            exporter.close();
        }

        System.exit(0);

    }

}
