package d2sguide;

import java.io.*;
import java.sql.*;
import java.util.*;
import java.util.regex.*;
import java.util.concurrent.*;

import chemaxon.naming.DocumentExtractor;
import chemaxon.naming.DocumentExtractor.Hit;

import org.apache.http.client.ResponseHandler;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;


public class Demo7 {

    //html markup for the individual hits
    public static String prefix = "<span data-structurename=\"%n\">";
    public static String suffix = "</span>";
    
    
    //returns the list of hits from a piece of string
    private static class DocumentExtractorTask implements Callable {
        String content;
        public DocumentExtractorTask(String c) {
            this.content = c;
        }
        public List<Hit> call() {            
            DocumentExtractor x = new DocumentExtractor();
            StringReader doc = new StringReader(this.content);
            
            try {
                x.processHTML(doc);
            } catch (IOException e) {
            }
            
            return x.getHits();
        }
    }
    
    
    private static String loadURL(String url) throws Exception {
        
        HttpClient httpclient = new DefaultHttpClient();
        try {
            HttpGet httpget = new HttpGet(url);

            ResponseHandler<String> responseHandler = new BasicResponseHandler();
            String responseBody = httpclient.execute(httpget, responseHandler);
            return responseBody;

        } finally {
            httpclient.getConnectionManager().shutdown();
        }
    }
     

    public static void main(String[] args) throws Exception {

        int availableProcessors = Runtime.getRuntime().availableProcessors();
        ExecutorService exec = Executors.newFixedThreadPool(availableProcessors);

        try {
        
            String pagecontent = loadURL("http://en.wikipedia.org/wiki/Penicillin");
            
            
            ExecutorCompletionService execservice = new ExecutorCompletionService(exec);
            int execservicesize = 0;
            
            
            //split up the document into large fragments
            Pattern p = Pattern.compile("(.{2000}.*?</?(a|abbr|blockquote|" + 
                    "caption|code|dd|div|dl|dt|h1|h2|h3|h4|h5|h6|hr|img|label|" +
                    "legend|li|ol|p|pre|table|td|th|tr|ul)[^>]*>|.*$)",
                    Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
                    
            Matcher m = p.matcher(pagecontent);
            while (m.find()) {

                //create and submit new callable instance
                execservice.submit(new DocumentExtractorTask(m.group(1)));
                execservicesize++;
            }
            
            
            while (execservicesize-- > 0) {

                //wait until the next fragment is finished
                Future<List<Hit>> f = execservice.take();
                List<Hit> hits = f.get();
                
                
                //print out the name of each recognized structure
                for (Hit hit : hits) {            
                    //System.out.println(hit.text);
                }
   
            }
        
        } finally {
            exec.shutdownNow();
        }

        System.exit(0);

    }

}
