import java.util.*;
import java.util.regex.*;
import java.lang.Character;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.nio.channels.*;

import java.io.*;
import iotools.*;

import javax.xml.bind.*;
import javax.xml.transform.stream.StreamSource;

import UMLS.*;

import config.*;

public class KiPar
{
// ------------------------------------------------------------------------------------
// --- constants
// ------------------------------------------------------------------------------------
private static final String BASE_PubMed   = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=";
private static final String BASE_PMC      = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=";
private static final String BASE_KEGG_EC  = "http://www.genome.jp/dbget-bin/www_bget?enzyme+";
private static final String BASE_KEGG_CPD = "http://www.genome.jp/dbget-bin/www_bget?compound+";
private static final String BASE_KEGG_SCE = "http://www.genome.jp/dbget-bin/www_bget?sce+";
private static final String BASE_GO       = "http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&depth=0&query=";
private static final String BASE_SBO      = "http://www.ebi.ac.uk/compneur-srv/sbo-main/search.do?searchTerm=";

private static final int    MAX_ECs       = 20;
// ------------------------------------------------------------------------------------


// --- (global) database connection variable ---
private static Connection con;

private static String bibtexGlobal = "";

// --- constructor ---
public KiPar() {this.open();}
// -------------------


// ************************************************************************************
// ************************* D A T A B A S E   M E T H O D S **************************
// ************************************************************************************

// ----------------------------------------------------------
// --- open database connection
// ----------------------------------------------------------
public void open()
{
  System.out.println("\nKiPar.open(): open database connection");

  try
  {
    Configuration config = new Configuration();

    // --- load the postgresql jdbc driver
    Class.forName(config.driver());

    String url = config.url();

    Properties props = new Properties();
    props.setProperty("user", config.username());
    props.setProperty("password",config.password());
    props.setProperty("ssl","true");

    con = DriverManager.getConnection(url, props);
    con.setAutoCommit(true);
  }
  catch (SQLException ex)        {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- close database connection
// ----------------------------------------------------------
public void close()
{
  System.out.println("\nKiPar.close(): close database connection");

  try {con.close();}
  catch (SQLException ex) {this.explain(ex);}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- explain the SQL exception cought
// ----------------------------------------------------------
public void explain(SQLException ex)
{
  System.out.println ("\n*** SQLException caught ***\n");

  while (ex != null) 
  {
    System.out.println("SQLState: " + ex.getSQLState());
    System.out.println("Message:  " + ex.getMessage());
    System.out.println("Vendor:   " + ex.getErrorCode());
    System.out.println("");

    ex = ex.getNextException();
  }
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- fix apostrophe, a special character in SQL, 
// --- so it can be imported (' --> '')
// ----------------------------------------------------------
private String fixApostrophe(String inputString)
{
  String outputString = "";
  int i, l;

  l = inputString.length();

  for (i = 0; i < l; i++)
  {
    char c = inputString.charAt(i);
    outputString += c;
    if (c == '\'') outputString += "'";
  }

  return outputString;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- SQL query to empty the given table
// ----------------------------------------------------------
private String empty(String table)
{
  return "DELETE FROM " + table + ";";
}
// ----------------------------------------------------------
// --- SQL query to count the rows in the given table
// ----------------------------------------------------------
public String countQuery(String table)
{
  return "SELECT COUNT(*) FROM " + table + ";";
}
// ----------------------------------------------------------
// --- SQL query to select concept IDs and the NCBI queries 
//    (based on concept synonyms) from the given source/table
// ----------------------------------------------------------
public String nonblockedConcepts(String source, String db)
{
  return "SELECT ID, " + db + "_query FROM CON_" + source + " WHERE " + db + "_block = FALSE;";
}
// ----------------------------------------------------------
// --- SQL query to update a concept in the DB with its
//     synonyms-based PubMed/PMC query
// ----------------------------------------------------------
public String updateQuery(String source, String db, String query, int docs, boolean block, String concept)
{
  return "UPDATE CON_" + source                                         + "\n" +
         "SET " + db + "_query = '" + this.fixApostrophe(query) + "',"  + "\n" +
         "    " + db + "_docs  = "  + docs + ","                        + "\n" +
         "    " + db + "_block = "  + block                             + "\n" +
         "WHERE ID = '" + concept + "';";
}
// ----------------------------------------------------------
// --- SQL query to select the synonyms for the given concept
// ----------------------------------------------------------
public String selectTerms(String source, String concept)
{
  return "SELECT DISTINCT term FROM TERM_" + source + " WHERE ID = '" + concept + "' AND term NOT LIKE '%</%';";
}
// ----------------------------------------------------------
// --- SQL query to calculate the number of matching concepts
//     in a given document
// ----------------------------------------------------------
public double countHits(String db, String id, String concept)
{
  String    query;
  Statement stmt;
  ResultSet rs;

  String ID = docID(db);

  int hits  = 0;
  int total = 0;

  try
  {
    stmt  = con.createStatement();


    // --- # of concept hits ---
    query = "SELECT COUNT(*)"                          + "\n" +
            "FROM   " + db + "_INDEX"                  + "\n" +
            "WHERE  " + ID + "= '" + id + "'"          + "\n" +
            "AND    concept = '" + concept + "';";
    rs = stmt.executeQuery(query);
    if (rs.next()) hits = rs.getInt(1);
    rs.close();
    // --------------------------


    // --- # of non-blocked concepts ---
    query = "SELECT COUNT(*) FROM CON_" + concept + ";";
    rs = stmt.executeQuery(query);
    if (rs.next()) total = rs.getInt(1);
    rs.close();
    // --------------------------


    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}

  if (total == 0) return 0;
  else            return (hits + 0.0) / total;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- empty all database tables
// ----------------------------------------------------------
  private void empty()
  {
    System.out.println("\nKiPar.empty(): empty a local database used for information retrieval");

    Statement stmt;
    String query;

    try
    {
      stmt = con.createStatement();

      query = empty("CONCEPTS");             stmt.execute(query);
      query = empty("CON_CPD");              stmt.execute(query);
      query = empty("CON_EC");               stmt.execute(query);
      query = empty("CON_GO");               stmt.execute(query);
      query = empty("CON_SBO");              stmt.execute(query);
      query = empty("CON_SCE");              stmt.execute(query);

      query = empty("REL_EC_CPD");           stmt.execute(query);
      query = empty("REL_EC_SCE");           stmt.execute(query);

      query = empty("TERM_CPD");             stmt.execute(query);
      query = empty("TERM_EC");              stmt.execute(query);
      query = empty("TERM_GO");              stmt.execute(query);
      query = empty("TERM_SBO");             stmt.execute(query);
      query = empty("TERM_SCE");             stmt.execute(query);

      query = empty("PMC_INDEX");            stmt.execute(query);
      query = empty("PMC_GENERAL_HITS");     stmt.execute(query);
      query = empty("PMC_SPECIFIC_HITS");    stmt.execute(query);
      query = empty("PMC_CITATION");         stmt.execute(query);

      query = empty("PubMed_INDEX");         stmt.execute(query);
      query = empty("PubMed_GENERAL_HITS");  stmt.execute(query);
      query = empty("PubMed_SPECIFIC_HITS"); stmt.execute(query);
      query = empty("PubMed_CITATION");      stmt.execute(query);

      query = empty("TMP2");                 stmt.execute(query);
      query = empty("TMP3");                 stmt.execute(query);

      stmt.close();
    }
    catch (SQLException ex) {this.explain(ex);}
  }
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- stopword? (based on the given list in the DB)
// ----------------------------------------------------------
private boolean stopWord(String word)
{
  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   stop = false;

  try
  {
    stmt  = con.createStatement();
    query = "SELECT COUNT(*) FROM STOP_LIST WHERE token = '" + this.fixApostrophe(word.toLowerCase()) + "';";
    rs    = stmt.executeQuery(query);
    if (rs.next()) { if (rs.getInt(1) == 1) stop = true; }
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}

  return stop;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- upload the file to the temp table TMP1
// ----------------------------------------------------------
public void tmp1(String file_name)
{
  EasyReader file;
  Statement  stmt;
  String     query;
  String     ID;

  try
  {
    file = fileIn(file_name);
    stmt = con.createStatement();

    query = empty("TMP1"); stmt.execute(query);

    while (!file.eof())
    {
      ID = file.readString();

      if (ID.length() > 0) // --- skip empty lines
      {
        query = "INSERT INTO TMP1(ID) VALUES('" + ID + "');";
        stmt.execute(query);
      }
    }

    file.close();
    stmt.close();
  }
  catch (SQLException ex)        {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- upload the file to the temp table TMP2
// ----------------------------------------------------------
public void tmp2(String file_name)
{
  EasyReader file;
  Statement  stmt;
  String     query;
  String     line = "";
  int        t;
  String     ID;
  String     term;

  try
  {
    file = fileIn(file_name);
    stmt = con.createStatement();

    query = empty("TMP2"); stmt.execute(query);

    while (!file.eof())
    {
      line = file.readString();

      if (line.length() > 0) // --- skip empty lines
      {
        t    = line.indexOf("\t");
        ID   = line.substring(0, t);
        term = this.fixComma(line.substring(t+1));

        query = "INSERT INTO TMP2(ID, term) VALUES('" + ID + "', '" + this.fixApostrophe(term) + "');";
        stmt.execute(query);
      }
    }

    file.close();
    stmt.close();
  }
  catch (SQLException ex)        {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- upload the file to the temp table TMP3
// ----------------------------------------------------------
public void tmp3(String file_name)
{
  EasyReader file;
  Statement  stmt;
  String     query;
  String     line = "";
  int        t1;
  int        t2;
  String     ID;
  String     link1;
  String     link2;

  try
  {
    file = fileIn(file_name);
    stmt = con.createStatement();

    query = empty("TMP3"); stmt.execute(query);

    while (!file.eof())
    {
      line = file.readString();

      if (line.length() > 0) // --- skip empty lines
      {
        t1    = line.indexOf("\t", 0);
        t2    = line.indexOf("\t", t1+1);
        ID    = line.substring(0, t1);
        link1 = line.substring(t1+1, t2);
        link2 = line.substring(t2+1);

        query = "INSERT INTO TMP3(ID) VALUES('" + ID + "');";
        stmt.execute(query);

        if (link1.compareTo("NULL") != 0)
        {
          query = "UPDATE TMP3 SET link1 = '" + link1 + "' WHERE ID = '" + ID + "';";
          stmt.execute(query);
        }

        if (link2.compareTo("NULL") != 0)
        {
          query = "UPDATE TMP3 SET link2 = '" + link2 + "' WHERE ID = '" + ID + "';";
          stmt.execute(query);
        }
      }
    }

    file.close();
    stmt.close();
  }
  catch (SQLException ex)        {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- INIT database - upload input files
// ----------------------------------------------------------
public void seed()
{
  System.out.println("\nKiPar.seed(): populate a local database used for information retrieval");

  Statement  stmt;
  String     query;
  boolean    more;
  String     dir_name  = "";
  String     file_name = "";
  EasyReader file;
  String     line = "";
  int        t;
  String     ID;
  String     term;

  try
  {
    this.empty();                  // --- discard previous content
    stmt = con.createStatement();  // --- create statement to pass queries

    Configuration config = new Configuration();


    dir_name = config.path() + "\\input\\";

    file_name = dir_name + config.input_EC();
    System.out.print("Uploading enzyme numbers... ");
    file = fileIn(file_name);
    while (!file.eof())
    {
      ID = file.readString();
      query = "INSERT INTO CON_EC(ID) VALUES('" + ID + "');";
      stmt.execute(query);
    }
    file.close();
    System.out.println("Done.\n");


    dir_name = config.path() + "\\output\\";

    file_name = dir_name + config.output_SBO();
    System.out.print("Uploading SBO IDs and terms... ");
    this.tmp2(file_name);
    query = "INSERT INTO CON_SBO(ID)"                           + "\n" +
            "SELECT DISTINCT ID FROM TMP2;";
    stmt.execute(query);
    query = "INSERT INTO TERM_SBO(ID, term)"                    + "\n" +
            "SELECT DISTINCT ID, term FROM TMP2;";
    stmt.execute(query);
    System.out.println("Done.\n");


    file_name = dir_name + config.output_GO();
    System.out.print("Uploading GO IDs and terms... ");
    this.tmp2(file_name);
    query = "INSERT INTO CON_GO(ID)"                            + "\n" +
            "SELECT DISTINCT ID FROM TMP2;";
    stmt.execute(query);
    query = "INSERT INTO TERM_GO(ID, term)"                     + "\n" +
            "SELECT DISTINCT ID, term FROM TMP2;";
    stmt.execute(query);
    System.out.println("Done.\n");
    file_name = dir_name + config.output_GO();
    System.out.print("Uploading file: " + file_name + " ... ");
    this.tmp2(file_name);
    query = "INSERT INTO TERM_GO(ID, term)"                     + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"              + "\n" +
            "EXCEPT"                                            + "\n" +
            "(SELECT ID, term FROM TERM_GO);";
    stmt.execute(query);
    System.out.println("Done.\n");


    dir_name += "KEGG\\";

    file_name = dir_name + config.compound();
    System.out.print("Uploading compound IDs... ");
    this.tmp3(file_name);
    query = "INSERT INTO CON_CPD(ID, sid, chebi)"          + "\n" +
            "(SELECT DISTINCT ID, link1, link2 FROM TMP3)" + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ID, sid, chebi FROM CON_CPD);";
    stmt.execute(query);
    this.reactions();
    System.out.println("Done.\n");


    file_name = dir_name + config.gene();
    System.out.print("Uploading gene IDs... ");
    this.tmp3(file_name);
    query = "INSERT INTO CON_SCE(ID, sgd, mips)"               + "\n" +
            "(SELECT DISTINCT ID, link1, link2 FROM TMP3)" + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ID, sgd, mips FROM CON_SCE);";
    stmt.execute(query);
    System.out.println("Done.\n");


    file_name = dir_name + config.enzyme_name();
    System.out.print("Uploading enzyme names... ");
    this.tmp2(file_name);
    query = "INSERT INTO TERM_EC(ID, term)"                + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"         + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ID, term FROM TERM_EC);";
    stmt.execute(query);
    System.out.println("Done.\n");


    file_name = dir_name + config.compound_name();
    System.out.print("Uploading compound names ... ");
    this.tmp2(file_name);
    query = "INSERT INTO TERM_CPD(ID, term)"               + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"         + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ID, term FROM TERM_CPD);";
    stmt.execute(query);
    System.out.println("Done.\n");


    file_name = dir_name + config.gene_name();
    System.out.print("Uploading gene names... ");
    this.tmp2(file_name);
    query = "INSERT INTO TERM_SCE(ID, term)"               + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"         + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ID, term FROM TERM_SCE);";
    stmt.execute(query);
    System.out.println("Done.\n");


    file_name = dir_name + config.enzyme2compound();
    System.out.print("Uploading enzyme-to-compound mappings... ");
    this.tmp2(file_name);
    query = "INSERT INTO REL_EC_CPD(ec, cpd)"              + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"         + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ec, cpd FROM REL_EC_CPD);";
    stmt.execute(query);
    System.out.println("Done.\n");


    file_name = dir_name + config.enzyme2gene();
    System.out.print("Uploading enzyme-to-gene mappings... ");
    this.tmp2(file_name);
    query = "INSERT INTO REL_EC_SCE(ec, sce)"              + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"         + "\n" +
            "EXCEPT"                                       + "\n" +
            "(SELECT ec, sce FROM REL_EC_SCE);";
    stmt.execute(query);
    System.out.println("Done.\n");


    query = empty("TMP2"); stmt.execute(query);
    query = empty("TMP3"); stmt.execute(query);


    System.out.print("Uploading weights... ");
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('EC', "   + config.weight_EC()   + ");"; stmt.execute(query);
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('CPD', "  + config.weight_CPD()  + ");"; stmt.execute(query);
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('SCE', "  + config.weight_SCE()  + ");"; stmt.execute(query);
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('RN', "   + config.weight_RN()   + ");"; stmt.execute(query);
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('GO', "   + config.weight_GO()   + ");"; stmt.execute(query);
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('PATH', " + config.weight_PATH() + ");"; stmt.execute(query);
    query = "INSERT INTO CONCEPTS(concept, weight) VALUES('SBO', "  + config.weight_SBO()  + ");"; stmt.execute(query);
    System.out.println("Done.\n");


    stmt.close();
  }
  catch (SQLException ex)        {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}
}
// ----------------------------------------------------------


// ************************************************************************************
// ************************************************************************************
// ************************************************************************************



// ************************************************************************************
// *********** P U B M E D [ C E N T R A L ] - R E L A T E D   M E T H O D S **********
// ************************************************************************************

// ----------------------------------------------------------
// --- which database, PubMed or PMC?
// ----------------------------------------------------------
private boolean isPubMed(String db)
{
  if (db.toLowerCase().equals("pubmed")) return true;
  else                                   return false;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- which ID, PMID or PMCID?
// ----------------------------------------------------------
private String docID(String db)
{
  if (isPubMed(db)) return "PMID";
  else              return "PMCID";
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- formulate a PubMed query for each concept
// --- cache the query in a DB table (the concept row)
// --- (un)mask the concept for the use in retrieval based
//     on the number of documents returned by the query
// ----------------------------------------------------------
public void queryPubMed()
{
  System.out.println("\nKiPar.queryPubMed(): formulate a PubMed query for each concept");

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;

  String  source;

  // --- PubMed search parameters
  String PubMed_query = "";
  String db           = "pubmed";
  String retMax       = "50000";
  int    max          =  50000;

  int     docs;
  boolean block;

  try
  {
    stmt = con.createStatement();

    Configuration config = new Configuration();

    Entrez entrez = new Entrez();

    List<String> conceptList = new ArrayList<String>();
    Iterator<String> c;

    String keywords = config.keywords();

    // --- pathway: GO terms
    source = "GO";
    conceptList.clear();
    conceptList = this.concepts(source, "pubmed");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PubMed_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PubMed_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PubMed_query = PubMed_query.substring(4);

      if (PubMed_query.length() == 0) docs  = 0;
      else                            docs  = entrez.response(db, "(" + PubMed_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max)     block = false;
      else                            block = true;
      query = updateQuery(source, db, PubMed_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- kinetic: SBO terms
    source = "SBO";
    conceptList.clear();
    conceptList = this.concepts(source, "pubmed");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PubMed_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PubMed_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PubMed_query = PubMed_query.substring(4);

      if (PubMed_query.length() == 0) docs  = 0;
      else                            docs  = entrez.response(db, "(" + PubMed_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max)     block = false;
      else                            block = true;
      query = updateQuery(source, db, PubMed_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- ORGANISM
    String organism = config.organism();


    // --- GENES
    source = "SCE";
    conceptList.clear();
    conceptList = this.concepts(source, "pubmed");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PubMed_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        PubMed_query += " OR " + name + "[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PubMed_query = "(" + PubMed_query.substring(4) + ") AND " + organism + "[ALL]";
      docs = entrez.response(db, "(" + PubMed_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max) block = false;
      else                        block = true;
      query = updateQuery(source, db, PubMed_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- ENZYMES
    source = "EC";
    conceptList.clear();
    conceptList = this.concepts(source, "pubmed");
    PubMed_query = "";

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PubMed_query = "\"" + concept + "\"[TEXT:noexp]";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PubMed_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PubMed_query = "(" + PubMed_query + ") AND " + organism + "[ALL]";
      docs = entrez.response(db, "(" + PubMed_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max) block = false;
      else                        block = true;
      query = updateQuery(source, db, PubMed_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- COMPOUNDS
    source = "CPD";
    conceptList.clear();
    conceptList = this.concepts(source, "pubmed");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PubMed_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PubMed_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PubMed_query = PubMed_query.substring(4);

      if (PubMed_query.length() == 0) docs  = 0;
      else                            docs  = entrez.response(db, "(" + PubMed_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max)     block = false;
      else                            block = true;
      query = updateQuery(source, db, PubMed_query, docs, block, concept);
      stmt.execute(query);
    }

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- formulate a PMC query for each concept
// --- cache the query in a DB table (the concept row)
// --- (un)mask the concept for the use in retrieval based
//     on the number of documents returned by the query
// ----------------------------------------------------------
public void queryPMC()
{
  System.out.println("\nKiPar.queryPMC(): formulate a PMC query for each concept");

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;

  String source;

  // --- PMC search parameters
  String PMC_query = "";
  String db        = "pmc";
  String retMax    = "50000";
  int    max       =  50000;

  int     docs;
  boolean block;

  try
  {
    stmt = con.createStatement();

    Configuration config = new Configuration();

    Entrez entrez = new Entrez();

    List<String> conceptList = new ArrayList<String>();
    Iterator<String> c;

    String keywords = config.keywords();

    // --- pathway: GO terms
    source = "GO";
    conceptList.clear();
    conceptList = this.concepts(source, "pmc");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PMC_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PMC_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PMC_query = PMC_query.substring(4);

      if (PMC_query.length() == 0) docs = 0;
      else                         docs = entrez.response(db, "(" + PMC_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max)  block = false;
      else                         block = true;
      query = updateQuery(source, db, PMC_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- kinetic: SBO terms
    source = "SBO";
    conceptList.clear();
    conceptList = this.concepts(source, "pmc");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PMC_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PMC_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PMC_query = PMC_query.substring(4);

      if (PMC_query.length() == 0) docs = 0;
      else                         docs = entrez.response(db, "(" + PMC_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max)  block = false;
      else                         block = true;
      query = updateQuery(source, db, PMC_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- ORGANISM
    String organism = config.organism();


    // --- GENES
    source = "SCE";
    conceptList.clear();
    conceptList = this.concepts(source, "pmc");

    for(c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PMC_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        PMC_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PMC_query = "(" + PMC_query.substring(4) + ") AND " + organism + "[ORGN:noexp]";
      docs = entrez.response(db, "(" + PMC_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max) block = false;
      else                        block = true;
      query = updateQuery(source, db, PMC_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- ENZYMES
    source = "EC";
    conceptList.clear();
    conceptList = this.concepts(source, "pmc");
    PMC_query = "";

    for(c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PMC_query = "\"" + concept + "\"[RN]";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PMC_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PMC_query = "(" + PMC_query + ") AND " + organism + "[ORGN:noexp]";
      docs = entrez.response(db, "(" + PMC_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max) block = false;
      else                        block = true;
      query = updateQuery(source, db, PMC_query, docs, block, concept);
      stmt.execute(query);
    }


    // --- COMPOUNDS
    source = "CPD";
    conceptList.clear();
    conceptList = this.concepts(source, "pmc");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String concept = c.next();
      PMC_query = "";
      query = selectTerms(source, concept);
      rs    = stmt.executeQuery(query);
      more  = rs.next();
      while (more)
      {
        String name = rs.getString(1);
        if (name.length() > 4 && !stopWord(name)) PMC_query += " OR \"" + name + "\"[TEXT:noexp]";
        more = rs.next();
      }
      rs.close();

      PMC_query = PMC_query.substring(4);
      if (PMC_query.length() == 0) docs = 0;
      else                         docs = entrez.response(db, "(" + PMC_query + ") AND (" + keywords + ")", retMax);
      if (0 < docs && docs < max)  block = false;
      else                         block = true;
      query = updateQuery(source, db, PMC_query, docs, block, concept);
      stmt.execute(query);
    }

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- formulate PubMed and PMC queries for each concept
// --- cache the query in a DB table (the concept row)
// --- (un)mask the concept for the use in retrieval based
//     on the number of documents returned by the query
// ----------------------------------------------------------
public void queryLiterature()
{
  System.out.println("\nKiPar.queryLiterature(): query PubMed and PMC");

  try
  {
    Configuration config   = new Configuration();
    String        database = config.options_database();

    database = database.toLowerCase();

    if ("pubmed or both".indexOf(database) >= 0) this.queryPubMed();
    if (   "pmc or both".indexOf(database) >= 0) this.queryPMC();
  }
  catch (Exception ex) {System.out.println(ex.toString());}
}
// ----------------------------------------------------------

// ************************************************************************************
// ************************************************************************************
// ************************************************************************************




// ************************************************************************************
// *********************** U M L S - R E L A T E D   M E T H O D S ********************
// ************************************************************************************

// ----------------------------------------------------------
// --- query UMLS for synonyms of the given term
//    (within its semantic type)
// --- NB:
//   - query template read from a file
//   - output (synonyms) written to a file
// ----------------------------------------------------------
private void UMLS_query(String term, String stn)
{
  System.out.println("\nKiPar.UMLS_query(" + term + ", " + stn + "): find synonyms in UMLS for the given term and its semantic type");

  System.out.println("\n*Nota Bene*: Running this option without previously obtaining a UMLS licence will generate a RemoteException, because of an invalid client IP address. Alternatively, you may choose to skip this option.\n");

  String     dir_name  = "";
  String     file_name = "";
  EasyReader file_in;
  FileWriter file_out;
  String     line = "";

  try
  {
    Configuration config = new Configuration();

    dir_name = config.path();

    // ----------------------------------------------------------
    // --- formulate an UMLS query for the given term and stn ---
    // ----------------------------------------------------------
    file_name = dir_name + "\\template\\" + config.template_UMLS();
    file_in = fileIn(file_name);

    file_name = dir_name + "\\tmp\\" + config.tmp_UMLS();
    file_out = fileOut(file_name);

    while (!file_in.eof())
    {
      line = file_in.readString();

      if (line.indexOf("<rhs><string>...</string></rhs>") >= 0) line = "<rhs><string>" + term + "</string></rhs>";
      else if (line.indexOf("<like>...%</like>") >= 0)          line = "<like>" + stn + "%</like>";

      file_out.write(line + "\n");
    }

    file_in.close();
    file_out.close();
    System.out.println("Done.\n");
    // ----------------------------------------------------------


    // --- run the UMLS query
    ExpertClientUMLS umls = new ExpertClientUMLS();


    // --- extract terms from XML and put them into a list

    file_name = dir_name + "\\output\\" + config.output_UMLS();
    file_out = fileOut(file_name);

    // --- read the UMLS term list the following XML format:
    //     +-------------------------+
    //     | <?xml version = '1.0'?> |
    //     | <termList>              |
    //     |  <term>                 |
    //     |     <str>...</str>      |
    //     |     <sty>...</sty>      |
    //     |  </term>                |
    //     |  <term>                 |
    //     |     <str>...</str>      |
    //     |     <sty>...</sty>      |
    //     |  </term>                |
    //     |  ...                    |
    //     | </termList>             |
    //     +-------------------------+

    // --- prepare for XML unmarshalling
    JAXBContext jc;            // --- JAXBContext object to provide entry point to JAXB API
    Unmarshaller unmarshaller; // --- Unmarshaller object to control unmarshalling

    jc = JAXBContext.newInstance("UMLS");
    unmarshaller = jc.createUnmarshaller();

    file_name = dir_name + "\\tmp\\" + config.tmp_terms();
    UMLS.TermList root = (UMLS.TermList) unmarshaller.unmarshal(new File(file_name));

    List<TermList.Term> termList = root.getTerm();

    for(Iterator<TermList.Term> i = termList.iterator(); i.hasNext(); )
    {
      UMLS.TermList.Term termElement = i.next();
      String str = termElement.getStr();
      file_out.write(str + "\n");
    }
  
    file_in.close();
    file_out.close();
    System.out.println("Done.\n");
  }
  catch (Exception ex) {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- for the given concept (table.ID), 
//     add synonyms previously retrieved from UMLS
//     (and stored in a file)
// ----------------------------------------------------------
private void expandConcept(String table, String ID)
{
  System.out.println("\nKiPar.expand(" + table + ", " + ID + "): for the given concept, add synonyms found previously in UMLS");

  Statement  stmt;
  String     query;
  boolean    more;
  String     dir_name  = "";
  String     file_name = "";
  EasyReader file;
  String     term;

  try
  {
    stmt = con.createStatement();
    query = empty("TMP2"); stmt.execute(query);

    // --------------------------------------------------------
    // --- load synonyms from a file to the temp table TMP2 ---
    // --------------------------------------------------------
    Configuration config = new Configuration();
    dir_name = config.path();
    file_name = dir_name + "\\output\\" + config.output_UMLS();
    file = fileIn(file_name);

    while (!file.eof())
    {
      term = file.readString();
      query = "INSERT INTO TMP2(ID, term) VALUES('" + ID + "', '" + this.fixApostrophe(term) + "');";
    }

    file.close();
    System.out.println("Done.\n");
    // --------------------------------------------------------


    // --------------------------------------------------------
    // --- copy from the temp table TMP2 to the target table
    // --------------------------------------------------------
    query = "INSERT INTO " + table + "(ID, term)"      + "\n" +
            "(SELECT DISTINCT ID, term FROM TMP2)"     + "\n" +
            "EXCEPT"                                   + "\n" +
            "(SELECT ID, term FROM TERM_EC);";
    stmt.execute(query);
    // --------------------------------------------------------

    query = empty("TMP2"); stmt.execute(query);
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- for the given table, add synonyms from UMLS for all 
//     concepts within a table given the semantic type for 
//     the table concepts
// ----------------------------------------------------------
private void expandTable(String table, String stn)
{
  System.out.println("\nKiPar.expand(" + table + ", " + stn + "): for the given semantic type, add synonyms from UMLS for all of its concepts");

  Statement stmt;
  ResultSet rs;
  boolean   more;
  String    query;

  try
  {
    stmt  = con.createStatement();
    query = "SELECT ID, term FROM " + table + ";";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    // --- for each term in the given table ---
    while (more)
    {
      String ID   = rs.getString(1);
      String term = rs.getString(2);

      UMLS_query(term, stn);      // --- query UMLS for synonyms
      expandConcept(table, ID);   // --- expand table with retrieved synonyms

      more = rs.next();
    }
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- add synonyms from UMLS for all considered concepts
// ----------------------------------------------------------
public void expand()
{
  System.out.println("\nKiPar.expand(): for all considered concepts, add synonyms from UMLS");

  // --- expand the following tables with synonyms from UMLS
  expandTable("TERM_EC" , "A1.4");
  expandTable("TERM_CPD", "A1.4");
  expandTable("TERM_GO" , "B2.2");
}
// ----------------------------------------------------------

// ************************************************************************************
// ************************************************************************************
// ************************************************************************************




// ************************************************************************************
// ******************************* I N D E X I N G ************************************
// ************************************************************************************

// ----------------------------------------------------------
// --- index the given database (Pubmed or PMC) with the 
//     concepts from the given source/table
// ----------------------------------------------------------
private void indexTable(String db, String retMax, String source)
{
  System.out.println("\nKiPar.indexTable(...): " + db + "_INDEX with concepts from table CON_" + source);

  Statement stmt;
  ResultSet rs;
  boolean   more;
  String    query;

  try
  {
    // --- file to write the results temporarily
    Configuration config = new Configuration();
    String  dir_name = config.path() + "\\tmp\\";
    String file_name = "";
    if (isPubMed(db)) file_name = dir_name + config.tmp_PubMed();
    else              file_name = dir_name + config.tmp_PMC();

    // --- prepare for Entrez search
    Entrez entrez = new Entrez();

    String keywords = config.keywords();

    // --- search PubMed/PMC with the nonblocked concepts
    stmt  = con.createStatement();
    query = nonblockedConcepts(source, db);
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String concept   = rs.getString(1);
      String lit_query = rs.getString(2);
      entrez.search(db, "(" + lit_query + ") AND (" + keywords + ")", retMax, file_name);
      this.loadIndex(db, source, concept, file_name);
      more = rs.next();
    }
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- index the given database (Pubmed or PMC) with the 
//     considered concepts
// ----------------------------------------------------------
public void indexLiteratureDB(String db, String retMax)
{
  System.out.println("\nKiPar.indexLiteratureDB(...): map each concept to the IDs of the matching documents in " + db);

  Statement stmt;
  String    query;

  try
  {
    stmt = con.createStatement();
    query = empty(db + "_INDEX"); stmt.execute(query);
    stmt.close();

    this.indexTable(db, retMax, "SCE");
    this.indexTable(db, retMax, "EC");
    this.indexTable(db, retMax, "CPD");
    this.indexTable(db, retMax, "GO");
    this.indexTable(db, retMax, "SBO");
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- index documents in PubMed and PMC
// ----------------------------------------------------------
  public void indexLiterature()
  {
    System.out.println("\nKiPar.indexLiterature(): index documents (abstracts/articles) in:");
    System.out.println("  (1) PubMed\n  (2) PubMed Central");

  try
  {
    Configuration config   = new Configuration();
    String        database = config.options_database();

    database = database.toLowerCase();

    if ("pubmed or both".indexOf(database) >= 0) this.indexLiteratureDB("pubmed", "50000");
    if (   "pmc or both".indexOf(database) >= 0) this.indexLiteratureDB("pmc",    "50000");
  }
  catch (Exception ex) {System.out.println(ex.toString());}
  }
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- extract concept IDs from the given source/table
// ----------------------------------------------------------
List<String> concepts(String table, String db)
{
  System.out.println("\nKiPar.concepts(" + table + ", " + db + "): extract all concepts from the given table");
  System.out.println("that do not have a " + db + "-query formulated yet");

  List<String> conceptList = new ArrayList<String>();

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;

  try
  {
    stmt  = con.createStatement();
    query = "SELECT DISTINCT ID FROM con_" + table + " WHERE " + db + "_query IS NULL;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String name = rs.getString(1);
      conceptList.add(name);
      more = rs.next();
    }
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}

  return conceptList;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- for a given concept (ID) (from the given source) 
//     upload the IDs of matching documents
// ----------------------------------------------------------
public void loadIndex(String db, String source, String ID, String file_of_IDs)
{
  System.out.println("\nKiPar.loadIndex(...): load table " + db + "_INDEX");

  Statement stmt;

  String ID_column = docID(db); // --- PMID or PMCID

  try
  {
    EasyReader file = fileIn(file_of_IDs);

    stmt = con.createStatement();

    while (!file.eof())
    {
      String doc_ID = file.readString();

      if (doc_ID.length() > 0) // --- skip empty lines
      {
        String query = "INSERT INTO " + db + "_INDEX(concept, ID, " + ID_column + ") VALUES('" + source + "', '" + ID + "', '" + doc_ID + "');";
        System.out.println(query);
        stmt.execute(query);
      }
    }

    file.close();
    stmt.close();
  }
  catch (SQLException ex)        {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}
}
// ----------------------------------------------------------

// ************************************************************************************
// ************************************************************************************
// ************************************************************************************




// ************************************************************************************
// ******************************** S C O R I N G *************************************
// ************************************************************************************

// ----------------------------------------------------------
// --- calculate the numbers of enzyme(EC)-specific hits for 
//     each document in the given database (PubMed or PMC)
// ----------------------------------------------------------
public void hits(String db, String EC)
{
  System.out.println("\nKiPar.hits(db, EC): for each indexed document from " + db + " calculate the numbers of enzyme(" + EC + ")-specific hits");

  String ID = docID(db);

  String    query = "";
  Statement stmt;
  ResultSet rs;
  boolean   more;
  Statement stmt1;
  ResultSet rs1;

  try
  {
    stmt  = con.createStatement();
    stmt1 = con.createStatement();

    System.out.println("");
    System.out.println("  0%                                                100%");
    System.out.println("  |--------------------------------------------------|");
    System.out.print("  |");

    int n = 0, pre = 0;

    // -----------------------------------------------------------------------
    // --- select the documents that have anything to do with the given enzyme
    // --- output written to a temp file
    // -----------------------------------------------------------------------
    query = "SELECT DISTINCT " + ID                   + "\n" +
            "FROM   " + db + "_INDEX"                 + "\n" +
            "WHERE  " + ID + " IN"                    + "\n" +
            "("                                       + "\n" +
            "    SELECT " + ID                        + "\n" +
            "    FROM   " + db + "_INDEX"             + "\n" +
            "    WHERE  concept = 'EC'"               + "\n" +
            "    AND    ID = '" + EC + "'"            + "\n" +
            "  UNION"                                 + "\n" +
            "    SELECT " + ID                        + "\n" +
            "    FROM   " + db + "_INDEX"             + "\n" +
            "    WHERE  concept = 'CPD'"              + "\n" +
            "    AND    ID IN"                        + "\n" +
            "    ("                                   + "\n" +
            "      SELECT CPD"                        + "\n" +
            "      FROM   REL_EC_CPD"                 + "\n" +
            "      WHERE  EC = '" + EC + "'"          + "\n" +
            "    )"                                   + "\n" +
            "  UNION"                                 + "\n" +
            "    SELECT " + ID                        + "\n" +
            "    FROM   " + db + "_INDEX"             + "\n" +
            "    WHERE  concept = 'SCE'"              + "\n" +
            "    AND    ID IN"                        + "\n" +
            "    ("                                   + "\n" +
            "      SELECT SCE"                        + "\n" +
            "      FROM   REL_EC_SCE"                 + "\n" +
            "      WHERE  EC = '" + EC + "'"          + "\n" +
            "    )"                                   + "\n" +
            ");";
    rs   = stmt.executeQuery(query);
    rs.last();
    int all = rs.getRow();
    rs.beforeFirst();
    while (rs.next())
    {
      String id = rs.getString(1);

      n++;
      int p = (n*50)/all;
      for (int bars = 0; bars < p-pre; bars++) System.out.print("|");
      pre = p;

      // --- EC hit?
      int EC_hit = 0;
      query = "SELECT COUNT(*)"                       + "\n" +
              "FROM   " + db + "_INDEX"               + "\n" +
              "WHERE  " + ID + " = '" + id + "'"      + "\n" +
              "AND    concept = 'EC'"                 + "\n" +
              "AND    ID = '" + EC + "';";
      rs1 = stmt1.executeQuery(query);
      if (rs1.next()) EC_hit = rs1.getInt(1);
      rs1.close();


      // --- calculate the number of EC's CPD-specific hits
      double CPD_hits = 0.0;
      query = "SELECT COUNT(*)"                       + "\n" +
              "FROM   " + db + "_INDEX"                    + "\n" +
              "WHERE  " + ID + " = '" + id + "'"      + "\n" +
              "AND    concept = 'CPD'"                + "\n" +
              "AND    ID IN"                          + "\n" +
              "("                                     + "\n" +
              "  SELECT CPD"                          + "\n" +
              "  FROM   REL_EC_CPD"                   + "\n" +
              "  WHERE  EC = '" + EC + "'"            + "\n" +
              ");";
      rs1 = stmt1.executeQuery(query);
      if (rs1.next()) CPD_hits = rs1.getInt(1) + 0.0;
      rs1.close();

      // --- How many enzyme-related compounds?
      int CPD_total = 0;
      query = "SELECT COUNT(CPD)"                     + "\n" +
              "FROM   REL_EC_CPD EC, CON_CPD C"       + "\n" +
              "WHERE  EC.EC = '" + EC + "'"           + "\n" +
              "AND    EC.CPD = C.ID"                  + "\n" +
              "AND    C." + db + "_BLOCK = FALSE;";
      rs1 = stmt1.executeQuery(query);
      if (rs1.next()) CPD_total = rs1.getInt(1);
      rs1.close();
      if (CPD_total != 0) CPD_hits = CPD_hits / CPD_total;


      // --- calculate the number of EC's SCE-specific hits
      double SCE_hits = 0.0;
      query = "SELECT COUNT(*)"                       + "\n" +
              "FROM   " + db + "_INDEX"               + "\n" +
              "WHERE  " + ID + "= '" + id + "'"       + "\n" +
              "AND    concept = 'SCE'"                + "\n" +
              "AND    ID IN"                          + "\n" +
              "("                                     + "\n" +
              "  SELECT SCE"                          + "\n" +
              "  FROM   REL_EC_SCE"                   + "\n" +
              "  WHERE  EC = '" + EC + "'"            + "\n" +
              ");";
      rs1 = stmt1.executeQuery(query);
      if (rs1.next()) SCE_hits = rs1.getInt(1) + 0.0;
      rs1.close();

      // --- How many enzyme-related genes?
      int SCE_total = 0;
      query = "SELECT COUNT(SCE)"                     + "\n" +
              "FROM   REL_EC_SCE ES, CON_SCE S"       + "\n" +
              "WHERE  ES.EC = '" + EC + "'"           + "\n" +
              "AND    ES.SCE = S.ID"                  + "\n" +
              "AND    S." + db + "_BLOCK = FALSE;";
      rs1 = stmt1.executeQuery(query);
      if (rs1.next()) SCE_total = rs1.getInt(1);
      rs1.close();
      if (SCE_total != 0) SCE_hits = SCE_hits / SCE_total;


      query = "INSERT INTO " + db + "_SPECIFIC_HITS(" + ID + ", EC, EC_hit, CPD_hits, SCE_hits)"    + "\n" +
              "VALUES ('" + id + "', '" + EC + "', " + EC_hit + ", " + CPD_hits + ", " + SCE_hits + ");";
      stmt1.execute(query);
    }
    if (n == 0) System.out.print("||||||||||||||||||||||||||||||||||||||||||||||||||");
    System.out.println("|");
    // -----------------------------------------------------------------------

    rs.close();
    stmt.close();
    stmt1.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- calculate the numbers of general term hits for 
//     each document in the given database (PubMed or PMC)
// ----------------------------------------------------------
public void hits(String db)
{
  System.out.println("\nKiPar.hits(" + db + "): calculate the numbers of SBO- and GO-hits");

  String ID = docID(db);

  String    query = "";
  Statement stmt;
  ResultSet rs;
  boolean   more;
  Statement stmt1;
  ResultSet rs1;

  try
  {
    stmt  = con.createStatement();
    stmt1 = con.createStatement();

    query = "INSERT INTO " + db + "_GENERAL_HITS(" + ID + ", go_hits, sbo_hits)" + "\n" +
            "SELECT DISTINCT " + ID + ", 0, 0 FROM " + db + "_INDEX;";
    stmt.execute(query);

    System.out.println("");
    System.out.println("  0%                                                100%");
    System.out.println("  |--------------------------------------------------|");
    System.out.print("  |");

    int n = 0, pre = 0;
    query = "SELECT DISTINCT " + ID + " FROM " + db + "_INDEX WHERE concept in ('GO', 'SBO');";
    rs    = stmt.executeQuery(query);
    rs.last();
    int all = rs.getRow();
    rs.beforeFirst();
    while (rs.next())
    {
      String id = rs.getString(1);

      n++;
      int p = (n*50)/all;
      for (int bars = 0; bars < p-pre; bars++) System.out.print("|");
      pre = p;

      double GO_hits  = countHits(db, id, "GO");
      double SBO_hits = countHits(db, id, "SBO");

      query = "UPDATE " + db + "_GENERAL_HITS"           + "\n" +
              "SET     GO_hits = " +  GO_hits  + ","     + "\n" +
              "       SBO_hits = " + SBO_hits            + "\n" +
              "WHERE " + ID + " = '" + id + "';";
      stmt1.execute(query);
    }
    if (n == 0) System.out.print("||||||||||||||||||||||||||||||||||||||||||||||||||");
    System.out.println("|");
    rs.close();

    query = "DELETE FROM " + db + "_GENERAL_HITS WHERE SBO_hits = 0;";
    stmt.execute(query);

    // -----------------------------------------------------------------------

    stmt1.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- calculate the number hits for all documents and 
//     all enzymes
// ----------------------------------------------------------
public void hits()
{
  System.out.println("\nKiPar.hits(): for each document calculate the numbers of hits");

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;

  try
  {
    Configuration config = new Configuration();
    String        db     = config.options_database();

    db = db.toLowerCase();

    stmt  = con.createStatement();

    // --- determine general hits
    if ("pubmed or both".indexOf(db) >= 0)
    {
      query = empty("PubMed_GENERAL_HITS"); stmt.execute(query);
      query = empty("PubMed_SPECIFIC_HITS"); stmt.execute(query);

      this.hits("PubMed");
    }

    if ("pmc or both".indexOf(db) >= 0)
    {
      query = empty("PMC_GENERAL_HITS"); stmt.execute(query);
      query = empty("PMC_SPECIFIC_HITS"); stmt.execute(query);

      this.hits("PMC");
    }

    // --- determine enzyme-specific hits
    query = "SELECT ID FROM CON_EC;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String EC = rs.getString(1);

      if ("pubmed or both".indexOf(db) >= 0) this.hits("PubMed", EC);
      if (   "pmc or both".indexOf(db) >= 0) this.hits("PMC",    EC);

      more = rs.next();
    }
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- score each indexed document from the given DB
// ----------------------------------------------------------
public void score(String db)
{
  System.out.println("\nKiPar.score(" + db + "): score the documents from " + db);

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;
  Statement stmt1;

  // --- obtain the weights for the considered concepts
  double w_EC   = weight("EC");
  double w_CPD  = weight("CPD");
  double w_SCE  = weight("SCE");
  double w_RN   = weight("RN");
  double w_GO   = weight("GO");
  double w_PATH = weight("PATH");
  double w_SBO  = weight("SBO");

  try
  {
    stmt  = con.createStatement();
    stmt1 = con.createStatement();

    String doc_ID = docID(db);

    System.out.println("");
    System.out.println("  0%                                                100%");
    System.out.println("  |--------------------------------------------------|");
    System.out.print("  |");

    int n = 0, pre = 0;

    query = "SELECT G." + doc_ID + ","                                                     + "\n" +
            "       100.0 / (" + w_PATH + " + " + w_SBO + ") *"                            + "\n" +
            "       ("                                                                     + "\n" +
            "         " + w_PATH + " / (" + w_GO + " + " + w_RN + ") *"                    + "\n" +
            "           ("                                                                 + "\n" +
            "             " + w_GO + " * G.GO_hits + "                                     + "\n" +
            "             " + w_RN + " / (" + w_EC + " + " + w_CPD + " + " + w_SCE + ") *" + "\n" +
            "             MAX("                                                            + "\n" +
            "                  " + w_EC  + " * S.EC_hit   + "                              + "\n" +
            "                  " + w_CPD + " * S.CPD_hits + "                              + "\n" +
            "                  " + w_SCE + " * S.SCE_hits   "                              + "\n" +
            "                )"                                                            + "\n" +
            "           )"                                                                 + "\n" +
            "         +"                                                                   + "\n" +
            "         " + w_SBO + " * G.SBO_hits"                                          + "\n" +
            "       )"                                                                     + "\n" +
            "FROM   " + db + "_GENERAL_HITS G,"                                            + "\n" +
            "       " + db + "_SPECIFIC_HITS S"                                            + "\n" +
            "WHERE  S." + doc_ID + " = G." + doc_ID                                        + "\n" +
            "GROUP BY G." + doc_ID + ", G.GO_hits, G.SBO_hits;";
    rs = stmt.executeQuery(query);
    rs.last();
    int all = rs.getRow();
    rs.beforeFirst();
    while (rs.next())
    {
      n++;
      int p = (n*50)/all;
      for (int bars = 0; bars < p-pre; bars++) System.out.print("|");
      pre = p;

      String id    = rs.getString(1);
      double score = rs.getDouble(2);

      query = "UPDATE " + db + "_GENERAL_HITS"       + "\n" +
              "SET    score = " + score              + "\n" +
              "WHERE " + doc_ID + " = '" + id + "';";
      stmt1.execute(query);
    }
    if (n == 0) System.out.print("||||||||||||||||||||||||||||||||||||||||||||||||||");
    System.out.println("|");

    query = "DELETE FROM " + db + "_GENERAL_HITS WHERE score IS NULL;";
    stmt.execute(query);

    rs.close();
    stmt.close();
    stmt1.close();
  }
  catch (SQLException ex) {this.explain(ex);}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- score each indexed document from the given DB
// ----------------------------------------------------------
public void score()
{
  System.out.println("\nKiPar.score(): score all indexed documents");

  String    query;
  Statement stmt;

  try
  {
    Configuration config = new Configuration();

    stmt  = con.createStatement();

    System.out.print("Uploading weights... ");
    query = "UPDATE CONCEPTS SET weight = " + config.weight_EC()   + " WHERE concept = 'EC';";   stmt.execute(query);
    query = "UPDATE CONCEPTS SET weight = " + config.weight_CPD()  + " WHERE concept = 'CPD';";  stmt.execute(query);
    query = "UPDATE CONCEPTS SET weight = " + config.weight_SCE()  + " WHERE concept = 'SCE';";  stmt.execute(query);
    query = "UPDATE CONCEPTS SET weight = " + config.weight_RN()   + " WHERE concept = 'RN';";   stmt.execute(query);
    query = "UPDATE CONCEPTS SET weight = " + config.weight_GO()   + " WHERE concept = 'GO';";   stmt.execute(query);
    query = "UPDATE CONCEPTS SET weight = " + config.weight_PATH() + " WHERE concept = 'PATH';"; stmt.execute(query);
    query = "UPDATE CONCEPTS SET weight = " + config.weight_SBO()  + " WHERE concept = 'SBO';";  stmt.execute(query);
    System.out.println("Done.\n");

    stmt.close();

    this.hits();

    String database = config.options_database();
    database = database.toLowerCase();
    if ("pubmed or both".indexOf(database) >= 0) this.score("PubMed");
    if (   "pmc or both".indexOf(database) >= 0) this.score("PMC");
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- obtain the WEIGHT for the given concept (TYPE)
// ----------------------------------------------------------
private double weight(String concept)
{
  Statement stmt;
  ResultSet rs;
  String    query;

  double w = 1.0;

  try
  {
    stmt  = con.createStatement();
    query = "SELECT weight FROM concepts WHERE concept = '" + concept + "';";
    rs    = stmt.executeQuery(query);
    if (rs.next()) w = rs.getDouble(1);
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}

  return w;
}
// ----------------------------------------------------------

// ************************************************************************************
// ************************************************************************************
// ************************************************************************************



// ************************************************************************************
// ************************** F I N A L   R E S U L T S *******************************
// ************************************************************************************

// ----------------------------------------------------------
// --- retrieve citation details and abstract from
//     PubMed or PMC for all indexed documents scoring
//     more than a given threshold
// ----------------------------------------------------------
public void retrieve(String db, double threshold)
{
  System.out.println("\nKiPar.retrieve(" + db + ", " + threshold + "): retrieve info (citation details and abstract)");

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;
  Statement stmt1;

  try
  {
    stmt  = con.createStatement();
    stmt1 = con.createStatement();

    Entrez entrez = new Entrez();

    Configuration config = new Configuration();

    int max = config.output_HTML_max();   // --- max number of documents retrieved/exported

    String doc_ID = docID(db);

    query = empty(db + "_citation"); stmt.execute(query);

    // --- which documents to retrieve?
    query = "SELECT " + doc_ID                                        + "\n" +
            "FROM   " + db + "_GENERAL_HITS"                          + "\n" +
            "WHERE  score > " + threshold                             + "\n" +
            "LIMIT  " + max + ";";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      // --- given a PM[C]ID, retrieve the citation details using Entrez

      String id         = rs.getString(1);
      Citation citation = entrez.getCitation(db, id);

      String authors  = "'" + fixApostrophe(citation.authors)  + "'";
      String title    = "'" + fixApostrophe(citation.title)    + "'";
      String ref      = "'" + fixApostrophe(citation.ref)      + "'";
      String Abstract = "'" + fixApostrophe(citation.Abstract) + "'";

      String row      = "'" + id + "', " + authors + ", " + title + ", " + ref + ", " + Abstract;

      query = "INSERT INTO " + db + "_citation(" + doc_ID + ", authors, title, ref, abstract)"  + "\n" +
              "VALUES (" + row + ");";
      System.out.println("\n" + query + "\n");
      stmt1.execute(query);

      if (!isPubMed(db))
      {
        // --- map PMCID to PMID
        query = "UPDATE PMC_CITATION SET PMID = '" + entrez.PMID(id) + "' WHERE PMCID = '" + id + "';";
        stmt1.execute(query);
      }

      more = rs.next();
    }
    rs.close();
    stmt.close();
    stmt1.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- retrieve citation details and abstract from a
//     user-given list of documents
// ----------------------------------------------------------
public void retrieve(String db)
{
  System.out.println("\nKiPar.retrieve(" + db + "): retrieve info (citation details and abstract)");

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;
  Statement stmt1;

  try
  {
    stmt  = con.createStatement();
    stmt1 = con.createStatement();
    String doc_ID = docID(db);


    // --- get the file with user-selected document IDs
    Configuration config = new Configuration();
    String file_name = "";

    if (isPubMed(db)) file_name = config.input_PubMed();
    else              file_name = config.input_PMC();


    // --- upload the document IDs into the database
    query = empty("USER_" + db); stmt.execute(query);
    this.tmp1(file_name);
    query = "INSERT INTO USER_" + db + "(" + doc_ID + ")"             + "\n" +
            "SELECT DISTINCT ID FROM TMP1;";
    stmt.execute(query);


    Entrez entrez = new Entrez();

    // --- which documents to retrieve?
    query = "SELECT " + doc_ID                                        + "\n" +
            "FROM   USER_" + db                                       + "\n" +
            "WHERE " + doc_ID + " NOT IN "                            + "\n" +
            "(SELECT " + doc_ID + " FROM " + db + "_CITATION);";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      // --- given a PM[C]ID, retrieve the citation details using Entrez

      String id         = rs.getString(1);
      Citation citation = entrez.getCitation(db, id);

      String authors  = "'" + fixApostrophe(citation.authors)  + "'";
      String title    = "'" + fixApostrophe(citation.title)    + "'";
      String ref      = "'" + fixApostrophe(citation.ref)      + "'";
      String Abstract = "'" + fixApostrophe(citation.Abstract) + "'";

      String row      = "'" + id + "', " + authors + ", " + title + ", " + ref + ", " + Abstract;

      query = "INSERT INTO " +db + "_citation(" + doc_ID + ", authors, title, ref, abstract)"  + "\n" +
              "VALUES (" + row + ");";
      System.out.println("\n" + query + "\n");
      stmt1.execute(query);

      if (!isPubMed(db))
      {
        // --- map PMCID to PMID
        query = "UPDATE PMC_CITATION SET PMID = '" + entrez.PMID(id) + "' WHERE PMCID = '" + id + "';";
        stmt1.execute(query);
      }

      more = rs.next();
    }

    rs.close();
    stmt.close();
    stmt1.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- retrieve citation details and abstract
// ----------------------------------------------------------
public void retrieve()
{
  System.out.println("\nKiPar.retrieve(): retrieve info (citation details and abstract)");

  try
  {
    Configuration config   = new Configuration();
    String        database = config.options_database();
    boolean       search   =(config.options_search()).equals("yes");

    database = database.toLowerCase();

    if (search)
    {
      if ("pubmed or both".indexOf(database) >= 0) this.retrieve("PubMed", threshold("pubmed"));
      if (   "pmc or both".indexOf(database) >= 0) this.retrieve("PMC",    threshold("pmc"));
    }
    else
    {
      if ("pubmed or both".indexOf(database) >= 0) this.retrieve("PubMed");
      if (   "pmc or both".indexOf(database) >= 0) this.retrieve("PMC");
    }
  }
  catch (Exception ex) {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- the average score for indexed documents from 
//     the given DB (PubMed or PMC)
// ----------------------------------------------------------
public double threshold(String db)
{
  System.out.println("\nKiPar.threshold(" + db + "): the AVG score for indexed documents");

  double t = 0.0;

  String    query;
  Statement stmt;
  ResultSet rs;

  try
  {
    stmt = con.createStatement();
    query = "SELECT AVG(score) FROM " + db + "_GENERAL_HITS;";
    rs = stmt.executeQuery(query);
    if (rs.next()) t = rs.getDouble(1);
    rs.close();
    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}

  return t;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- export citations from PubMed and PMC to HTML
//     THE FINAL INFORMATION RETRIEVAL RESULTS
// ----------------------------------------------------------
public void report(int totalPubMed, int totalPMC)
{
  System.out.println("\nKiPar.report(): generate an HTML report summarising the export");

  try
  {
    Configuration config = new Configuration();

    String database = config.options_database();
    database = database.toLowerCase();
    boolean PubMed = false;
    boolean PMC    = false;
    if ("pubmed or both".indexOf(database) >= 0) PubMed = true;
    if (   "pmc or both".indexOf(database) >= 0) PMC    = true;
    boolean search = (config.options_search()).equals("yes");

    String html = "";

    html += "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n" +
            "<html>\n" +
            "<head>\n" +
            "<title>KiPar: information retrieval for kinetic parameters</title>\n" +
            "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" +
            "<link rel=\"stylesheet\" type=\"text/css\" href=\"KiPar.css\" />\n" +
            "</head>\n" +
            "<body>\n" +
            "<div align=\"Center\">\n" +
            "<TABLE WIDTH=\"80%\">\n" +
            "<TR><TH class=\"title\">\n" +
            "<span class=\"kipar\">KiPar</span>: a text mining toolbox for systematic screening of yeast\n" +
            "<span class=\"kipar\">Ki</span>netic <span class=\"kipar\">Par</span>ameters for metabolic models\n" +
            "</TH></TR>\n" +
            "<TR><TD>\n" +
            "<BR><HR><BR>\n";


    html += "<p>The <strong>ouput</strong> is given as a list of documents annotated with <a href=\"#concepts\">concepts</a> \n" +
            "(legend: \n" +
            "  <span class=\"enzyme\">enzyme</span>, \n" +
            "  <span class=\"compound\">compound</span>, \n" +
            "  <span class=\"gene\">gene</span>, \n" +
            "  <span class=\"pathway\">pathway</span>, \n" +
            "  <span class=\"kinetics\">kinetics</span>) \n" +
            "relevant for parametrising metabolic models:</p>\n" +
            "<ul>\n";

    String page_name;
    int dot;
    String name_root;
    String extension;
    String first_page;

    if (PubMed)
    {
      page_name  = config.output_HTML_PubMed();
      dot        = page_name.lastIndexOf(".");
      name_root  = page_name.substring(0, dot) + "_";
      extension  = page_name.substring(dot);
      first_page = name_root + "1" + extension;

      html += "<li> " + totalPubMed + " <a href=\"" + first_page + "\">abstracts</a> from <a href=\"http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed\">PubMed</a>\n";
    }
    if (PMC)
    {

      page_name  = config.output_HTML_PMC();
      dot        = page_name.lastIndexOf(".");
      name_root  = page_name.substring(0, dot) + "_";
      extension  = page_name.substring(dot);
      first_page = name_root + "1" + extension;

      html += "<li> " + totalPMC    + " <a href=\"" + first_page + "\">articles</a>  from <a href=\"http://www.pubmedcentral.nih.gov/\">PubMed Central</a>\n";
    }

    html += "</ul>\n" +
            "<BR><HR><BR>\n";

    String enzymes = "", pathway = "", kinetics = "";
    String path = config.path();
    String file_name;
    EasyReader file;
    String ID;

    file_name = path + "\\input\\" + config.input_EC();
    file = fileIn(file_name);
    while (!file.eof())
    {
      ID = file.readString();
      if (ID.length() > 0) // --- skip empty lines
      enzymes += this.link(BASE_KEGG_EC, ID, "") + "; ";
    }
    file.close();

    file_name = path + "\\input\\" + config.input_GO();
    file = fileIn(file_name);
    while (!file.eof())
    {
      ID = file.readString();
      if (ID.length() > 0) // --- skip empty lines
      pathway += this.link(BASE_GO, ID, "") + "; ";
    }
    file.close();

    file_name = path + "\\input\\" + config.input_SBO();
    file = fileIn(file_name);
    while (!file.eof())
    {
      ID = file.readString();
      if (ID.length() > 0) // --- skip empty lines
      kinetics += this.link(BASE_SBO, "SBO:" + "0000000".substring(0, 7 - ID.length()) + ID, "") + "; ";
    }
    file.close();

    html += "<p>The output was generated for this <strong><a href=\"../../input\">input</a></strong>:</p>\n" +
            "<ul>\n" +
            "<li> <a name=\"concepts\"></a><strong>Concepts</strong> to search and/or annotate:\n" +
            "  <ul>\n" +
            "  <li> <a name=\"enzymes\"></a> <strong><a href=\"../../input/" + config.input_EC()  + "\">enzymes</a></strong>: "  + enzymes  + "\n" +
            "  <li> <a name=\"pathway\"></a> <strong><a href=\"../../input/" + config.input_GO()  + "\">pathway</a></strong>: "  + pathway  + "\n" +
            "  <li> <a name=\"kinetics\"></a><strong><a href=\"../../input/" + config.input_SBO() + "\">kinetics</a></strong>: " + kinetics + "\n" +
            "  </ul>\n" +
            "</ul>\n";

    if (!search)
    {
      html += "<ul>\n" +
              "<li> <a name=\"docs\"></a><strong>Documents</strong> to annotate:\n" +
              "  <ul>\n";

      if (PubMed) html += "  <li> <a href=\"../../input/" + config.input_PubMed() + "\">PubMed</a>\n";
      if (PMC)    html += "  <li> <a href=\"../../input/" + config.input_PMC()    + "\">PubMed Central</a>\n";

      html += "  </ul>\n" +
              "</ul>\n";
    }

    html += "<BR><HR><BR>\n" +
            "<p>This <strong><a href=\"config.xml\">configuration</a></strong> was used:</p>\n" +
            "<p><strong>Literature database(s)</strong> used:</p>\n" +
            "<ul>\n";

    if (PubMed) html += "<li><a href=\"http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed\">PubMed</a>\n";
    if (PMC)    html += "<li><a href=\"http://www.pubmedcentral.nih.gov/\">PubMed Central</a>\n";

    html += "</ul>\n" +
            "<p><strong>Search</strong> the literature?</p>\n";

    if (search)
    {
      html += "<ul>\n" +
              "<li><strong>YES</strong> (find documents automatically)\n" +
              "<li><span class=\"vague\"><strike><strong>NO</strong> (use the given list of documents)</strike></span>\n" +
              "</ul>\n";

      html += "<p>\n" +
              "The <strong>formula</strong> used to score documents when searching the literature: <BR><BR>\n" +
              "<img src=\"formula.gif\" alt=\"formula\"/>\n" +
              "<BR><BR> where:\n" +
              "  <BR>&minus; <span class=\"value\"><em>E</em></span> is a set of <strong>enzymes</strong> of interest (see <a href=\"#enzymes\">input</a> above),\n" +
              "  <BR>&minus; <span class=\"value\"><em>e</em></span> is an <strong>enzyme</strong> from <em>E</em>,\n" +
              "  <BR>&minus; <span class=\"value\"><em>CPD<sub>e</sub></em></span> is a set of <strong>compounds</strong> involved in the reaction catalysed by the enzyme <em>e</em>,\n" +
              "  <BR>&minus; <span class=\"value\"><em>SCE<sub>e</sub></em></span> is a set of <em>Saccharomyces cerevisiae</em> <strong>genes</strong> encoding the enzyme <em>e</em>,\n" +
              "  <BR>&minus; <span class=\"value\"><em>GO</em></span> is a set of a <strong>pathway</strong>-related concepts from the <a href=\"http://www.geneontology.org/\">Gene Ontology</a> (see <a href=\"#pathway\">input</a> above),\n" +
              "  <BR>&minus; <span class=\"value\"><em>SBO</em></span> is a set of a <strong>kinetics</strong>-related concepts from the <a href=\"http://www.ebi.ac.uk/sbo/\">Systems Biology Ontology</a> (see <a href=\"#kinetics\">input</a> above),\n" +
              "  <BR>&minus; <span class=\"value\">hits(<em>S</em>)</span> is the percentage of concepts in the set <em>S</em> matching the given document,\n" +
              "  <BR>&minus; the following <strong>weights</strong> are used for: \n" +
              "enzymes   (<span class=\"value\"><em>w<SUB>EC</SUB></em>   = " + config.weight_EC()   + "</span>), \n" +
              "compounds (<span class=\"value\"><em>w<SUB>CPD</SUB></em>  = " + config.weight_CPD()  + "</span>), \n" +
              "genes     (<span class=\"value\"><em>w<SUB>SCE</SUB></em>  = " + config.weight_SCE()  + "</span>), \n" +
              "reactions (<span class=\"value\"><em>w<SUB>RN</SUB></em>   = " + config.weight_RN()   + "</span>), \n" +
              "GO terms  (<span class=\"value\"><em>w<SUB>GO</SUB></em>   = " + config.weight_GO()   + "</span>), \n" +
              "pathway   (<span class=\"value\"><em>w<SUB>PATH</SUB></em> = " + config.weight_PATH() + "</span>), \n" +
              "kinetics  (<span class=\"value\"><em>w<SUB>SBO</SUB></em>  = " + config.weight_SBO()  + "</span>). \n" +
              "</p>\n";
    }
    else
    {
      html += "<ul>\n" +
              "<li><span class=\"vague\"><strike><strong>YES</strong> (find documents automatically)</strike></span>\n" +
              "<li><strong>NO</strong> (use the given list of <a href=\"#docs\">documents</a>)\n" +
              "</ul>\n";
    }

    html += "<p><strong>Export</strong> options:</p>\n" +
            "<ul>\n";

    boolean vague;
    String yesno;
    if ((config.options_export_HTML()).equals("yes")) {vague = false; yesno = "YES";}
    else                                              {vague = true;  yesno = "NO"; }
    html += "<li>";
    if (vague) html += "<span class=\"vague\"><strike>";
    html += "HTML: <strong>" + yesno + "</strong>";
    if (vague) html += "</strike></span>";
    html += "\n";

    if ((config.options_export_bibtex()).equals("yes")) {vague = false; yesno = "YES";}
    else                                                {vague = true;  yesno = "NO"; }
    html += "<li>";
    if (vague) html += "<span class=\"vague\"><strike>";
    html += "bibtex: <strong>" + yesno + "</strong>";
    if (vague) html += "</strike></span>";
    html += "\n";

    if ((config.options_export_fullText()).equals("yes")) {vague = false; yesno = "YES";}
    else                                                  {vague = true;  yesno = "NO"; }
    html += "<li>";
    if (vague) html += "<span class=\"vague\"><strike>";
    html += "full-text annotation: <strong>" + yesno + "</strong>";
    if (vague) html += "</strike></span>";
    html += "\n";

    if (search)
    {
      List<String> export_tags_list = config.options_export_tags_list();
      String tags = "";
      int n = export_tags_list.size();
      for (int i = 0; i < n; i++) tags += export_tags_list.get(i) + "; ";
      if (n > 0) html += "<li>tags: " + tags + "\n";
    }

    html += "</ul>\n" +
            "<BR><HR><BR>\n" +
            "<p><span class=\"vague\">This report was generated on " + this.timeStamp() + ".</span></p>\n" +
            "</TD></TR>\n" +
            "</TABLE>\n" +
            "</div>\n" +
            "</body>\n" +
            "</html>";

    // --- make a copy of the config.xml file
    copyFile(new File("config.xml"), new File(path + "\\output\\html\\config.xml"));

    // --- write the report into an HTML file
    file_name = path + "\\output\\html\\index.html";
    FileWriter file_out = fileOut(file_name);
    file_out.write(html);
    file_out.close();
  }
  catch (Exception ex) {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- export citations from PubMed and PMC to HTML
//     THE FINAL INFORMATION RETRIEVAL RESULTS
// ----------------------------------------------------------
public void export()
{
  System.out.println("\nKiPar.export(): export info (citation details and abstract) to HTML");

  try
  {
    Configuration config   = new Configuration();
    String        database = config.options_database();

    database = database.toLowerCase();

    int totalPubMed = 0;
    int totalPMC    = 0;

    if ("pubmed or both".indexOf(database) >= 0) totalPubMed = this.export("PubMed");
    if (   "pmc or both".indexOf(database) >= 0) totalPMC    = this.export("PMC");

    this.report(totalPubMed, totalPMC);
  }
  catch (Exception ex) {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- export citations from PubMed or PMC into an
//     HTML document to be presented to the user as 
//     THE FINAL INFORMATION RETRIEVAL RESULT
// ----------------------------------------------------------
public int export(String db)
{
  System.out.println("\nKiPar.export(db): export info from " + db);

  String html[]         = {"", "", "", ""};
  StringBuffer abBoxes  = new StringBuffer();
  StringBuffer htmlBody = new StringBuffer();

  Statement stmt;
  ResultSet rs;
  boolean   more;
  String    query;

  int doc = 0;    // --- # of documents exported

  try
  {
    Configuration config = new Configuration();

    String path = config.path();

    boolean search        = (config.options_search()).equals("yes");
    boolean export2bibtex = (config.options_export_bibtex()).equals("yes");

    String   page_name = "";
    String bibtex_name = "";
    
    int items = config.output_HTML_items(); // --- items = docs per page
    int max   = config.output_HTML_max();   // --- max number of documents exported

    String ID_column;

    if (isPubMed(db)) 
    {
      ID_column    = "PMID"; 
      page_name    = config.output_HTML_PubMed(); 
      bibtex_name  = config.output_bibtex_PubMed();
    }
    else 
    {
      ID_column    = "PMCID"; 
      page_name    = config.output_HTML_PMC(); 
      bibtex_name  = config.output_bibtex_PMC();
    }

    stmt = con.createStatement();


    // --- obtain the fixed HTML code ---
    query = "SELECT ID, code FROM HTML ORDER BY ID ASC;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      int    ID   = rs.getInt(1);
      String code = rs.getString(2);
      html[ID] = code;
      more = rs.next();
    }
    rs.close();
    // -----------------------------------

    String header = "";
    String footer = html[3];

    // --- name & extension for the chunk HTML files ---
    int dot = page_name.lastIndexOf(".");
    String name_root = page_name.substring(0, dot);
    String extension = page_name.substring(dot);


    FileWriter bibtex_out = fileOut(path + "\\output\\bibtex\\" + bibtex_name);


    // --- which documents to export?
    if (search)
    {
      List<String> export_tags_list = config.options_export_tags_list();
      String tags = "";
      String restriction = "";
      int n = export_tags_list.size();
      for (int i = 0; i < n; i++)
      {
        String tag = export_tags_list.get(i);
        tags += ", '" + tag + "'";
        restriction += "  INTERSECT\n  (SELECT " + ID_column + " FROM " + db + "_INDEX WHERE ID = '" + tag + "')";
      }
      if (n > 0)
      {
        tags = tags.substring(2);
        restriction = "AND    " + ID_column + " IN \n(" + restriction.substring(11) + ")" + "\n";
      }

      query = "SELECT GH." + ID_column + ", score"            + "\n" +
              "FROM   " + db + "_GENERAL_HITS GH,"            + "\n" +
              "       " + db + "_CITATION C"                  + "\n" +
              "WHERE  score > " + threshold(db)               + "\n" +
              "AND    GH." + ID_column + " = C."+ ID_column   + "\n" +
              restriction                                     + "\n" +
              "ORDER BY score DESC;";
    }
    else query = "SELECT " + ID_column + " FROM USER_" + db + " ORDER BY " + ID_column + " ASC;";
    rs = stmt.executeQuery(query);
    rs.last();
    int total = rs.getRow();
    if (total > max) total = max;
    int page = 0;
    if (total % items > 0) page = 1;
    page += total / items;   // --- total number of HTML pages (items = max # of documents per page)
    rs.beforeFirst();
    while (doc < total)
    {
      rs.next();

      String id = rs.getString(1);

      double score = 0;
      if (search) score = Math.round(rs.getDouble(2)*100)/100.0;

      abBoxes.append(", 'element_" + id + "_abbox'");
      htmlBody.append(this.reference(db, id, score));
      htmlBody.append(this.annotateDocument(db, id));
      if (search) htmlBody.append(this.scoreTable(db, id));

      if (export2bibtex) bibtex_out.write(bibtexGlobal + "\n");

      doc++; // --- the ordinal number of the document currently being exported

      if (doc % items == 0)   // --- write new HTML page
      {
        int p = doc / items;  // --- current HTML page with exported documents

        System.out.println("\nWriting the " + p + ". HTML result page...\n");
        String     chunk_file_name = path + "\\output\\html\\" + name_root + "_" + (Integer.toString(p)) + extension;
        FileWriter chunk_file_out  = fileOut(chunk_file_name);

        // --- create HTML header ---
        header  = html[0];
        header += "var abBoxes = new Array(" + abBoxes.substring(2) + ");\n\n";
        if (search) header += "var hiBoxes = new Array(" + (abBoxes.substring(2)).replaceAll("abbox", "hibox") + ");\n\n";
        header += html[1];
        if (search)        header += "&emsp;&bull; ( <a href=\"javascript:void(0)\" onclick=\"expandAll(hiBoxes)\">expand</a> | <a href=\"javascript:void(0)\" onclick=\"collapseAll(hiBoxes)\">collapse</a> ) score ";
        if (export2bibtex) header += "&emsp;&bull; from <a href=\"..\\bibtex\\" + bibtex_name + "\">BibTex</a> to <a href=\"http://www.citeulike.org/import_bibtex\">citeUlike</a> ";
        header += html[2];
        // --------------------------

        chunk_file_out.write(header);
        chunk_file_out.write(htmlBody.toString());
        chunk_file_out.write(this.navigation(p, page, page_name, items));
        chunk_file_out.write(footer);
        chunk_file_out.close();

        // --- clean up memory & start new page
        htmlBody = null;
        abBoxes  = null;

        Runtime runtime = Runtime.getRuntime();
        System.out.println("Free memory = " + runtime.freeMemory());
        runtime.gc();

        htmlBody = new StringBuffer();
        abBoxes  = new StringBuffer();
      }
    }
    rs.close();
    stmt.close();
    bibtex_out.close();

    if (doc % items > 0)
    {
      // --- write the last HTML page
      FileWriter chunk_file_out = fileOut(path + "\\output\\html\\" + name_root + "_" + (Integer.toString(page)) + extension);

      // --- create HTML header ---
      header  = html[0];
      header += "var abBoxes = new Array(" + abBoxes.substring(2) + ");\n\n";
      if (search) header += "var hiBoxes = new Array(" + (abBoxes.substring(2)).replaceAll("abbox", "hibox") + ");\n\n";
      header += html[1];
      if (search)        header += "&emsp;&bull; ( <a href=\"javascript:void(0)\" onclick=\"expandAll(hiBoxes)\">expand</a> | <a href=\"javascript:void(0)\" onclick=\"collapseAll(hiBoxes)\">collapse</a> ) score ";
      if (export2bibtex) header += "&emsp;&bull; from <a href=\"..\\bibtex\\" + bibtex_name + "\">BibTex</a> to <a href=\"http://www.citeulike.org/import_bibtex\">citeUlike</a> ";
      header += html[2];
      // --------------------------

      chunk_file_out.write(header);
      chunk_file_out.write(htmlBody.toString());
      chunk_file_out.write(this.navigation(page, page, page_name, items));
      chunk_file_out.write(footer);
      chunk_file_out.close();
    }
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex) {ex.printStackTrace();}

  return doc;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- generate HTML code for the citation of the given doc
// ----------------------------------------------------------
private String reference(String db, String id, double score)
{
  System.out.println("\nKiPar.reference(" + db + ", " + id + ", " + score + ")");

  String html = "";

  Statement stmt;
  ResultSet rs;
  boolean   more;
  String    query;

  String base = "";
  String pmid = "";
  String pmcid = "";
  String mark  = "";

  bibtexGlobal = "";

  try
  {
    Configuration config = new Configuration();
    boolean export2bibtex = (config.options_export_bibtex()).equals("yes");
    BibTex bibTex = new BibTex();

    stmt = con.createStatement();

    if (isPubMed(db))
    {
      if (export2bibtex) bibtexGlobal = bibTex.pmid2bibtex(id);

      base = BASE_PubMed;
      pmid = "[PubMed: <a href=\"" + base + id + "\">" + id + "</a>] ";
    }
    else
    {
      base = BASE_PMC;

      query = "SELECT PMID FROM PMC_CITATION WHERE PMCID = '" + id + "'";
      rs    = stmt.executeQuery(query);
      if (rs.next())
      {
        pmid = rs.getString(1);

        if (pmid != null && pmid.compareTo("") != 0)
        {
          if (export2bibtex) bibtexGlobal = bibTex.pmid2bibtex(pmid);
          pmid = "[PubMed: <a href=\"" + BASE_PubMed + pmid + "\">" + pmid + "</a>] ";
        }
        else pmid = "";
      }
      rs.close();

      pmcid = "[PMC: <a href=\"" + BASE_PMC + id + "\">" + id + "</a>] [KiPar: <a href=\".\\papers\\PMC_" + id + ".html\">full text</a>] ";
    }

    if (score > 0) mark = "[score = " + score + " <input type=\"button\" id=\"element_" + id + "_hibox_button\" "        +
                            "class=\"boxControl\" onclick=\"switchState ('element_" + id + "_hibox'); return false;\" "  +
                            "style=\"display: none\" />]\n";

    query = "SELECT authors, title, ref FROM " + db + "_CITATION WHERE " + docID(db) + " = '" + id + "';";
    rs   = stmt.executeQuery(query);
    more = rs.next();
    if (more)
    {
      String authors = "<div class=\"author\">" + rs.getString(1) + "</div>";
      String title   = "<a class=\"title\" href=\"" + base + id + "\">" + rs.getString(2) + "</a>";
      String ref     = rs.getString(3);

      html = "<li>" + authors + "\n" + title                                                                 + "\n" +
             "<div class=\"vague\">" + ref + " <b>"                                                          + "\n" +
             "[abstract <input type=\"button\" id=\"element_" + id + "_abbox_button\" "                      +
               "class=\"boxControl\" onclick=\"switchState('element_" + id + "_abbox'); return false;\" "    +
               "style=\"display: none\" />]"                                                                 + "\n" +
             mark + pmid + pmcid                                                                             + "\n" +
             "</b></div>";
    }
    rs.close();

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (java.lang.Exception ex) {ex.printStackTrace();}

  return html + "\n\n";
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- pick a table for the given concept class
//     for the given concept class
// ----------------------------------------------------------
private String source(String a_class)
{
  String source = "";

       if (a_class.equals("enzyme"))   source = "ec";
  else if (a_class.equals("compound")) source = "cpd";
  else if (a_class.equals("gene"))     source = "sce";
  else if (a_class.equals("pathway"))  source = "go";
  else if (a_class.equals("kinetics")) source = "sbo";

  return source;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- pick a base for a URL-based query / link
// ----------------------------------------------------------
private String base(String a_class)
{
  String base = "";

       if (a_class.equals("enzyme"))   base = BASE_KEGG_EC;
  else if (a_class.equals("compound")) base = BASE_KEGG_CPD;
  else if (a_class.equals("gene"))     base = BASE_KEGG_SCE;
  else if (a_class.equals("pathway"))  base = BASE_GO;
  else if (a_class.equals("kinetics")) base = BASE_SBO;

  return base;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- an HTML link for the given ID and the base URL
// ----------------------------------------------------------
private String annotation(String term, String base, String id, String a_class)
{
  String aLink = "<a class=\"" + a_class + "\" href=\"" + base + id + "\">" + term + "</a>";
  return aLink;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- annotation: <a class="..." href="...">...from...</a>
// --- replace all occurrences of from-string with to-string
//     (see the link above) within the target-text
// ----------------------------------------------------------
public static String replace(String target, String from, String to)
{   
  int start = target.indexOf(from);
  if (start < 0) return target;

  int          lf          = from.length();
  char[]       targetChars = target.toCharArray();
  StringBuffer buffer      = new StringBuffer();
  int          copyFrom    = 0;

  while (start >= 0)
  {
    buffer.append(targetChars, copyFrom, start - copyFrom);

    // --- no link inside a link - check here!
    int a_close = target.indexOf("</a>", start);
    int a_open  = target.indexOf("<a ",  start);
    if (a_close > 0)
    {
      if (0 < a_open && a_open < a_close) buffer.append(to);
      else                                buffer.append(from); // --- i.e. do not replace inside an existing annotation
    }
    else buffer.append(to);

    copyFrom = start + lf;
    start = target.indexOf(from, copyFrom);
  }

  buffer.append (targetChars, copyFrom, targetChars.length - copyFrom);

  return buffer.toString();
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- annotate all occurences of the given concept with 
//     an a_class link:
//     <a class="a_class" href="...">...term...</a>
// ----------------------------------------------------------
private String annotateConcept(String text, String concept, String a_class)
{
  String annotated = text;
  String source = "";
  String link = "";
  String base = "";

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;

  // --- characters separating term occurrences in text,
  //     so we don't annotate sub-words
  String separators = ".?!:;,-/(){}[] \t\n\f\r";

  int len = separators.length();

  try
  {
    // --- concept class, so we know which table to retrieve terms from
    source = source(a_class);

    // --- URL base, to link to the concept in an external source
    base = base(a_class);

    stmt  = con.createStatement();

    // --- retrieve concept's terms -- longest first
    query = "SELECT term FROM TERM_" + source + " WHERE id = '" + concept + "' ORDER BY LENGTH(term) DESC;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String name = rs.getString(1);      // --- get term

      if (!stopWord(name))                // --- skip stop words
      {
        // --- create an annotated term: <a class="a_class" href="...">...term...</a>
        String annotation = annotation(name, base, concept, a_class);

        // --- look for the term occurrences
        for (int i = 0; i < len; i++)
        for (int j = 0; j < len; j++)
        {
          // --- but make sure the term occurrence is bound by separators
          char left  = separators.charAt(i);
          char right = separators.charAt(j);

          // --- replace all 'plural' occurrences of the term
          annotated = replace(annotated, left + name + "s" + right, left + annotation + "s" + right);

          // --- replace all other occurrences of the term
          annotated = replace(annotated, left + name       + right, left + annotation       + right);
        }
      }

      more = rs.next(); // --- next term
    }
    rs.close();

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}

  return annotated;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- highlight sentences that contain 'kinetic' terms and
//     annotate numerical values in such sentences
// ----------------------------------------------------------
public static String focusOnKinetics(String text)
{
  String annotated = ""; // --- annotated text

  int start = 0;

  // --- find the end of the 1st sentence
  int eos = indexOfEOS(text, start);

  while (eos >= 0)
  {
    // --- extract sentence
    String sentence = text.substring(start, eos+1);

    String annotatedSentence = "";

    // --- does the sentence contain a 'kinetic' term?
    if (sentence.indexOf("class=\"kinetics\"") > 0)
    {
      // --- temporarily separate any 2 neighbouring links, because
      //     we're gonna use <a ...>...</a> as separators
      sentence = "xxxxxxxx" + sentence + "xxxxxxxx";
      sentence = sentence.replaceAll("</a>\\s*<a ", "</a>yyyyyyyy<a ");
      sentence = sentence.replaceAll("</a>\\-<a ",  "</a>zzzzzzzz<a ");

      Pattern a = Pattern.compile("<a [^>]+>[^<]+</a>");
      String[] nonLink = a.split(sentence + " ");

      int from = 0, to; // --- link boundaries

      // --- for each sentence segment that is not a part of a link
      for (int i = 0; i < nonLink.length - 1; i++)
      {
        String segment = nonLink[i];

        // --- extract link: <a ...>...</a>
        from = sentence.indexOf("<a ",  from);     // --- open  link
        to   = sentence.indexOf("</a>", from) + 4; // --- close link
        String link = sentence.substring(from, to);

        // --- annotate numerical values in the current segment
        annotatedSentence += annotateValues(segment) + link;

        from = to + 1;
      }

      String lastSegment = nonLink[nonLink.length - 1];
      annotatedSentence += annotateValues(lastSegment);

      // --- remove the artificial link separators
      annotatedSentence = annotatedSentence.replaceAll("xxxxxxxx", "");
      annotatedSentence = annotatedSentence.replaceAll("yyyyyyyy", " ");
      annotatedSentence = annotatedSentence.replaceAll("zzzzzzzz", "-");

      // --- highlight the sentence if it contains numerical values
      if (annotatedSentence.contains("<span class=\"value\">")) sentence = " <span class=\"focus\">" + annotatedSentence + "</span>";
      else                                                      sentence = " "                       + annotatedSentence;
    }

    annotated += sentence + " ";

    // --- find next sentence
    start = eos+2;
    eos = indexOfEOS(text, start);
  }

  return annotated;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- annotate numerical values in the input text
// ----------------------------------------------------------
public static String annotateValues(String input)
{
  String preprocessed = input;
  preprocessed = preprocessed.replaceAll(  ", ",   ", xxxxxxxx ");
  preprocessed = preprocessed.replaceAll("\\. ", "\\. xxxxxxxx ");
  String tagged = "";

  // ---  a regular expression to describe numerals 
  //      between two separator characters
  Pattern p = Pattern.compile("[\\(\\s]\\d+[,\\.]?\\d*[%:;,\\.\\s]");
  String[] result = p.split(preprocessed);

  int from = 0;
  int len  = preprocessed.length();

  for (int i = 0; i < result.length - 1; i++)
  {
    String block = result[i];

    from = preprocessed.indexOf(block, from) + block.length();

    int to = from + 1;

    while (Character.isDigit(preprocessed.charAt(to))) to++;

    char sep = preprocessed.charAt(to);

    to++;
    int greed = 0;

    if (sep == ',' || sep == '.')
    {

      while (Character.isDigit(preprocessed.charAt(to))) {to++; greed++;}

      if (" \t\n\r\f%:;,.".indexOf(preprocessed.charAt(to)) >= 0) greed = 0;
      else greed++;

      to++;
    }

    result[i+1] = result[i+1].substring(greed);

    tagged += block + preprocessed.charAt(from) + "<span class=\"value\">" + preprocessed.substring(from+1, to-1) + "</span>" + preprocessed.charAt(to-1);
  }

  String lastBlock = "";
  if (result.length > 0) lastBlock = result[result.length - 1];
  else
  {
    // --- if the segment is the actual number (with possible white spaces),
    //     then length = 0

    lastBlock = " <span class=\"value\">" + preprocessed.trim() + "</span> ";
  }
  tagged += lastBlock;

  tagged = tagged.replaceAll("xxxxxxxx ", "");
  tagged = tagged.replaceAll(",</span>", "</span>,");
  tagged = tagged.replaceAll("\\.</span>", "</span>.");

  return tagged;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- find End-Of-Sentence starting from 'from'
// ----------------------------------------------------------
public static int indexOfEOS(String text, int from)
{
  int period = from;
  int len = text.length();

  if (from >= len) return -1;

  while (period >= 0)
  {
    // --- find the next period character
    period = text.indexOf(".", period+1);

    // --- if no EOS found -> return the 'last' character position
    if (period < 0) return len-2;

    // --- white space after the period?
    char next = text.charAt(period+1);
    if (" \t\n\r\f".indexOf(next) >= 0)
    {
      // --- check if EOS is inside an HTML element
      //     NB: here, HTML elements do not span multiple sentences

      // --- find the 1st closing HTML tag
      int b = text.indexOf("</", period+1);

      if (b >= 0)  // --- if there is a closing HTML tag
      {
        // --- check if it encloses the current period (EOS)
        int e = text.indexOf(">", b);
        if (e > 0)
        {
          // --- extract tag name: </...tag_name...>
          String tag = text.substring(b+2, e);

          // --- find the 'matching' opening tag and
          //     check if EOS within the HTML element?
          b = text.indexOf("<" + tag + ">", period);
          if (0 < b && b < e) return period;

          b = text.indexOf("<" + tag + " ", period);
          if (0 < b && b < e) return period;

          // --- go back and look for the next period
        }
      }
      else return period;
    }
    // --- go back and look for the next period
  }

  // --- no EOS found; return the 'last' character position
  return len-2;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- generate HTML code for the expandable SCORE TABLE
// ----------------------------------------------------------
private String scoreTable(String db, String id)
{
  System.out.println("\nKiPar.scoreTable(" + db + ", " + id + ")");

  String html = "<table class=\"abstract\" width=\"100%\"><tr><td>"         + "\n" +
                "<div id=\"element_" + id + "_hibox\" class=\"box\">"       + "\n" +
                "<TABLE BORDER width=\"100%\">"                             + "\n" +
                "<TR>"                                                      + "\n" +
                "  <TH class=\"enzyme\"   width=\"10%\">enzyme</TH>"        + "\n" +
                "  <TH class=\"compound\" width=\"30%\">compound</TH>"      + "\n" +
                "  <TH class=\"gene\"     width=\"10%\">gene</TH>"          + "\n" +
                "  <TH class=\"pathway\"  width=\"25%\">pathway</TH>"       + "\n" +
                "  <TH class=\"kinetics\" width=\"25%\">kinetics</TH>"      + "\n" +
                "</TR>"                                                     + "\n";

  String TC_EC[]  = new String [MAX_ECs];
  String TC_CPD[] = new String [MAX_ECs];
  String TC_SCE[] = new String [MAX_ECs];

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;
  Statement stmt1;
  ResultSet rs1;
  boolean   more1;

  String keywords = "kipar-export";

  String ID_column = docID(db);

  boolean pmc = db.toLowerCase().equals("pmc");

  try
  {
    stmt  = con.createStatement();
    stmt1 = con.createStatement();

    // --- SCORE TABLE: one row for each enzyme

    int row = 0;

    // --- ENZYME ---
    query = "SELECT ID, " + db + "_block"                  + "\n" +
            "FROM   CON_EC"                                + "\n" +
            "ORDER BY ID;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String a_class;

      String EC = rs.getString(1);

      if (rs.getBoolean(2)) a_class = "maybe";
      else
      {
        query = "SELECT ID"                                  + "\n" +
                "FROM   " + db + "_INDEX"                    + "\n" +
                "WHERE  concept = 'EC'"                      + "\n" +
                "AND    " + ID_column + " = '" + id + "';";
        rs1   = stmt1.executeQuery(query);
        if (rs1.next()) a_class = "yes";
        else            a_class = "no";
        rs1.close();
      }

      if (a_class.equals("yes")) keywords += ", EC" + EC + " ";

      TC_EC[row] = this.link(BASE_KEGG_EC, EC, a_class);

      TC_CPD[row] = "";
      TC_SCE[row] = "";


      // --- COMPOUNDS ---

      String CPD;
      String CPDs = "";
      // --- matched compounds ---
      query = "SELECT ID"                                                            + "\n" +
              "FROM   " + db + "_INDEX"                                              + "\n" +
              "WHERE  " + ID_column + " = '" + id + "'"                              + "\n" +
              "AND    concept = 'CPD'"                                               + "\n" +
              "AND    ID IN (SELECT CPD FROM REL_EC_CPD WHERE EC = '" + EC + "')"    + "\n" +
              "ORDER BY ID;";
      rs1   = stmt1.executeQuery(query);
      more1 = rs1.next();
      while (more1)
      {
        CPD          = rs1.getString(1);
        CPDs        += ", '" + CPD + "'";
        TC_CPD[row] += this.link(BASE_KEGG_CPD, CPD, "yes") + "<br>";
        if (keywords.indexOf(", " + CPD + " ") < 0) keywords += ", " + CPD + " ";
        more1 = rs1.next();
      }
      rs1.close();

      // --- blocked compounds ---
      query = "SELECT C.ID"                                        + "\n" +
              "FROM   REL_EC_CPD EC, CON_CPD C"                    + "\n" +
              "WHERE  EC.EC = '" + EC + "'"                        + "\n" +
              "AND    EC.CPD = C.ID"                               + "\n" +
              "AND    C." + db + "_block = TRUE"                   + "\n" +
              "ORDER BY C.ID;";
      rs1   = stmt1.executeQuery(query);
      more1 = rs1.next();
      while (more1)
      {
        CPD          = rs1.getString(1);
        CPDs        += ", '" + CPD + "'";
        TC_CPD[row] += this.link(BASE_KEGG_CPD, CPD, "maybe") + "<br>";
        more1 = rs1.next();
      }
      rs1.close();

      // --- other compounds ---
      if (CPDs.length() > 2)
      {
           CPDs  = CPDs.substring(2);
           query = "SELECT CPD FROM REL_EC_CPD WHERE EC = '" + EC + "' AND CPD NOT IN (" + CPDs + ");";
      }
      else query = "SELECT CPD FROM REL_EC_CPD WHERE EC = '" + EC + "';";

      rs1   = stmt1.executeQuery(query);
      more1 = rs1.next();
      while (more1)
      {
        CPD          = rs1.getString(1);
        TC_CPD[row] += this.link(BASE_KEGG_CPD, CPD, "no") + "<br>";
        more1 = rs1.next();
      }
      rs1.close();


      // --- GENES ---

      String SCE;
      String SCEs = "";

      // --- matched genes ---
      query = "SELECT ID"                                                            + "\n" +
              "FROM   " + db + "_INDEX"                                              + "\n" +
              "WHERE  " + ID_column + " = '" + id + "'"                              + "\n" +
              "AND    concept = 'SCE'"                                               + "\n" +
              "AND    ID IN (SELECT SCE FROM REL_EC_SCE WHERE EC = '" + EC + "')"    + "\n" +
              "ORDER BY ID;";
      rs1   = stmt1.executeQuery(query);
      more1 = rs1.next();
      while (more1)
      {
        SCE          = rs1.getString(1);
        SCEs        += ", '" + SCE + "'";
        TC_SCE[row] += this.link(BASE_KEGG_SCE, SCE, "yes") + "<br>";
        if (keywords.indexOf(", " + SCE + " ") < 0) keywords += ", " + SCE + " ";
        more1 = rs1.next();
      }
      rs1.close();

      // --- blocked genes ---
      query = "SELECT S.ID"                                        + "\n" +
              "FROM   REL_EC_SCE ES, CON_SCE S"                    + "\n" +
              "WHERE  ES.EC = '" + EC + "'"                        + "\n" +
              "AND    ES.SCE = S.ID"                               + "\n" +
              "AND    S." + db + "_block = TRUE"                   + "\n" +
              "ORDER BY S.ID;";
      rs1 = stmt1.executeQuery(query);
      more1 = rs1.next();
      while (more1)
      {
        SCE          = rs1.getString(1);
        SCEs        += ", '" + SCE + "'";
        TC_SCE[row] += this.link(BASE_KEGG_SCE, SCE, "maybe") + "<br>";
        more1 = rs1.next();
      }
      rs1.close();

      // --- other genes ---
      if (SCEs.length() > 2)
      {
           SCEs = SCEs.substring(2);
           query = "SELECT SCE FROM REL_EC_SCE WHERE EC = '" + EC + "' AND SCE NOT IN (" + SCEs + ");";
      }
      else query = "SELECT SCE FROM REL_EC_SCE WHERE EC = '" + EC + "';";

      rs1   = stmt1.executeQuery(query);
      more1 = rs1.next();
      while (more1)
      {
        SCE          = rs1.getString(1);
        TC_SCE[row] += this.link(BASE_KEGG_SCE, SCE, "no") + "<br>";
        more1 = rs1.next();
      }
      rs1.close();

      row++;
      more = rs.next();
    }

    rs.close();
    stmt1.close();


    // --- GO terms ---
    String TC_pathway = "";
    query = "SELECT ID"                                             + "\n" +
            "FROM   " + db + "_INDEX"                               + "\n" +
            "WHERE  concept = 'GO'"                                 + "\n" +
            "AND    " + ID_column + " = '" + id + "'"               + "\n" +
            "ORDER BY ID;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String goID = rs.getString(1);
      TC_pathway += this.link(BASE_GO, goID, "") + "<br>";
      keywords += ", " + goID;
      more = rs.next();
    }
    rs.close();


    // --- SBO terms ---
    String TC_kinetics = "";
    query = "SELECT ID"                                             + "\n" +
            "FROM   " + db + "_INDEX"                               + "\n" +
            "WHERE  concept = 'SBO'"                                + "\n" +
            "AND    " + ID_column + " = '" + id + "'"               + "\n" +
            "ORDER BY ID;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String sboID = rs.getString(1);
      TC_kinetics += this.link(BASE_SBO, sboID, "") + "<br>";
      keywords += ", " + sboID;
      more = rs.next();
    }
    rs.close();

    Configuration config = new Configuration();
    boolean export2bibtex = (config.options_export_bibtex()).equals("yes");
    if (export2bibtex) 
    {
      if (!keywords.equals("kipar-export"))
      {
        keywords = "keywords = {" + keywords.replaceAll(" , ", ", ") + "}";
        bibtexGlobal = bibtexGlobal.replaceAll("keywords\\s*=\\s*\\{\\s*\\}", keywords);
      }
    }


    html += "<TR>"                                                  + "\n" +
            "  <TD>" + TC_EC[0]  + "</TD>"                          + "\n" +
            "  <TD>" + TC_CPD[0] + "</TD>"                          + "\n" +
            "  <TD>" + TC_SCE[0] + "</TD>"                          + "\n" +
            "  <TD ROWSPAN=" + row + ">"                            + "\n" +
            "    <p>" + TC_pathway + "</p>"                         + "\n" +
            "  </TD>"                                               + "\n" +
            "  <TD ROWSPAN=" + row + ">"                            + "\n" +
            "    <p>" + TC_kinetics  + "</p>"                       + "\n" +
            "  </TD>"                                               + "\n" +
            "</TR>"                                                 + "\n";

    for (int i = 1; i < row; i++)
    {
      html += "<TR>"                                                + "\n" +
              "  <TD>" + TC_EC[i]  + "</TD>"                        + "\n" +
              "  <TD>" + TC_CPD[i] + "</TD>"                        + "\n" +
              "  <TD>" + TC_SCE[i] + "</TD>"                        + "\n" +
              "</TR>"                                               + "\n";
    }

    html += "</TABLE>"                                              + "\n" +
            "</div>"                                                + "\n" +
            "</td></tr></table>"                                    + "\n";

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}

  return html + "\n\n";
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- annotate a given document with terms of interest:
//  -  returns HTML code for an annotated abstract of the
//     document
//  -  full paper stored in a separate HTML file
// ----------------------------------------------------------
public String annotateDocument(String db, String id)
{
  System.out.println("\nKiPar.annotateDocument(" + db + ", " + id + ")");

  String html = "";

  String    query;
  Statement stmt;
  ResultSet rs;
  boolean   more;

  String ID_column = docID(db);

  String Abstract    = "";
  String fullPaper   = "";
  String scannedText = "";
  boolean scanned = false;

  boolean pmc = db.toLowerCase().equals("pmc");

  try
  {
    stmt  = con.createStatement();

    // --- get ABSTRACT
    query = "SELECT abstract FROM " + db + "_CITATION WHERE " + ID_column + " = '" + id + "';";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    if (more) Abstract = rs.getString(1);
    rs.close();

    // --- get full paper if document from PMC
    if (pmc)
    {
      Entrez entrez = new Entrez();
      Abstract  = entrez.cleanPMCAbstractTags(Abstract);
      StringBuffer fullPaperBuffer = entrez.bufferPMC(id);
      if (fullPaperBuffer.length() > 100) fullPaper = entrez.cleanPMC_HTMLtags(fullPaperBuffer);
      else                                fullPaper = fullPaperBuffer.toString();

      if (fullPaper.indexOf("Full text is available as a scanned copy of the original print version") > 0)
      {
        scanned = true;

        // --- save PDF file
        String pdf_file_name = "tmp_pdf.pdf"; 
        entrez.PMC_fetchPDF(id, pdf_file_name);

        // --- extract text from PDF file
        Pdf2text pdf2text = new Pdf2text();
        scannedText = (pdf2text.getTextFromPDF(pdf_file_name)).toString();

        // --- delete PDF file
        File pdf_file = new File(pdf_file_name); pdf_file.delete();
      }
    }

    // --- ENZYMES ---
    query = "SELECT ID FROM CON_EC;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String EC = rs.getString(1);

      Abstract = annotateConcept(Abstract, EC, "enzyme");
      if (pmc)
      {
        fullPaper = annotateConcept(fullPaper, EC, "enzyme");
        if (scanned) scannedText = annotateConcept(scannedText, EC, "enzyme");
      }

      more = rs.next();
    }
    rs.close();


    // --- COMPOUNDS ---
    query = "SELECT ID FROM CON_CPD;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String CPD = rs.getString(1);

      Abstract = annotateConcept(Abstract, CPD, "compound");
      if (pmc)
      {
        fullPaper = annotateConcept(fullPaper, CPD, "compound");
        if (scanned) scannedText = annotateConcept(scannedText, CPD, "compound");
      }

      more = rs.next();
    }
    rs.close();


    // --- GENES ---
    query = "SELECT ID FROM CON_SCE;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String SCE = rs.getString(1);

      Abstract = annotateConcept(Abstract, SCE, "gene");
      if (pmc)
      {
        fullPaper = annotateConcept(fullPaper, SCE, "gene");
        if (scanned) scannedText = annotateConcept(scannedText, SCE, "gene");
      }

      more = rs.next();
    }
    rs.close();


    // --- GO terms ---
    query = "SELECT ID FROM CON_GO;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String goID = rs.getString(1);

      Abstract = annotateConcept(Abstract, goID, "pathway");
      if (pmc)
      {
        fullPaper = annotateConcept(fullPaper, goID, "pathway");
        if (scanned) scannedText = annotateConcept(scannedText, goID, "pathway");
      }

      more = rs.next();
    }
    rs.close();


    // --- SBO terms ---
    query = "SELECT ID FROM CON_SBO;";
    rs    = stmt.executeQuery(query);
    more  = rs.next();
    while (more)
    {
      String sboID = rs.getString(1);

      Abstract = annotateConcept(Abstract, sboID, "kinetics");
      if (pmc)
      {
        fullPaper = annotateConcept(fullPaper, sboID, "kinetics");
        if (scanned) scannedText = annotateConcept(scannedText, sboID, "kinetics");
      }

      more = rs.next();
    }
    rs.close();


         if (Abstract == null)    Abstract = "Abstract not available.";
    else if (Abstract.equals("")) Abstract = "Abstract not available.";
    else
    {
      Abstract = focusOnKinetics(Abstract  + " ");
      if (pmc)
      {
        fullPaper = focusOnKinetics(fullPaper + " ");
        if (scanned) scannedText = focusOnKinetics(scannedText + " ");
      }
    }

    html = "<table class=\"abstract\" width=\"100%\"><tr><td>"      + "\n" +
           "<div id=\"element_" + id + "_abbox\" class=\"box\">"    + "\n" +
           "<blockquote><p>" + Abstract + "</p></blockquote>"       + "\n" +
           "</div>"                                                 + "\n" +
           "</td></tr></table>"                                     + "\n" +
           html;

    if (pmc)
    {
      Configuration config = new Configuration();
      String file_name = config.path() + "\\output\\html\\papers\\PMC_" + id + ".html";
      FileWriter file_out = fileOut(file_name);

      // --- HTML HEADER ---
      String html_code = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">"                                                        + "\n" +
                         "<html>"                                                                                                                  + "\n" +
                         "<head>"                                                                                                                  + "\n" +
                         "<title>KiPar: full text - PMC ID: " + id +"</title>"                                                                     + "\n" +
                         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />"                                               + "\n" +
                         "<link rel=\"stylesheet\" type=\"text/css\" href=\"../KiPar.css\" />"                                                     + "\n" +
                         "<script language=\"JavaScript\" type=\"text/javascript\">"                                                               + "\n" +
                         "(document.getElementById) ? dom = true : dom = false;"                                                                   + "\n" +
                         "function typeStart()"                                                                                                    + "\n" +
                         "{"                                                                                                                       + "\n" +
                         "  if (dom) {document.write('<div id=\"topMenu\" style=\"position:absolute; left:0; visibility:show; solid #000000\">')}" + "\n" +
                         "}"                                                                                                                       + "\n" +
                         "function typeEnd() {if (dom) {document.write('</div>')}}"                                                                + "\n" +
                         "function placeIt()"                                                                                                      + "\n" +
                         "{"                                                                                                                       + "\n" +
                         "  if (dom && !document.all) {document.getElementById(\"topMenu\").style.top = window.pageYOffset + 0;}"                  + "\n" +
                         "  if (document.all) {document.all.topMenu.style.pixelTop = document.body.scrollTop + 0}"                                 + "\n" +
                         "  window.setTimeout(\"placeIt()\", 10);"                                                                                 + "\n" +
                         "}"                                                                                                                       + "\n" +
                         "function init() {placeIt();}"                                                                                            + "\n" +
                         "</script>"                                                                                                               + "\n" +
                         "</head>"                                                                                                                 + "\n" +
                         "<body onload=\"init();\">"                                                                                               + "\n" +
                         "<script>typeStart()</script>"                                                                                            + "\n" +
                         "<div class=\"vague\" style=\"width:100%; background-color:white; padding-left:1em\";>"                                   + "\n" +
                         "<p>"                                                                                                                     + "\n" +
                         "Legend: "                                                                                                                + "\n" +
                         "  <span class=\"enzyme\">enzyme</span> | "                                                                               + "\n" +
                         "  <span class=\"compound\">compound</span>  | "                                                                          + "\n" +
                         "  <span class=\"gene\">gene</span>  | "                                                                                  + "\n" +
                         "  <span class=\"pathway\">pathway</span>  | "                                                                            + "\n" +
                         "  <span class=\"kinetics\">kinetics</span>"                                                                              + "\n" +
                         "</p>"                                                                                                                    + "\n" +
                         "</div>"                                                                                                                  + "\n" +
                         "<script>typeEnd()</script>"                                                                                              + "\n" +
                         "<div align=\"Center\">"                                                                                                  + "\n" +
                         "<TABLE WIDTH=\"80%\">"                                                                                                   + "\n";
      file_out.write(html_code);


      // --- CITATION DETAILS ---
      query = "SELECT authors, title, ref FROM " + db + "_CITATION WHERE " + docID(db) + " = '" + id + "';";
      rs   = stmt.executeQuery(query);
      more = rs.next();
      if (more)
      {
        String authors = rs.getString(1);
        String title   = rs.getString(2);
        String ref     = rs.getString(3);

        html_code = "<TR><TH class=\"title\">"                                              + "\n" +
                    "<div class=\"author\">" + authors + "</div>"                           + "\n" +
                    "<a class=\"title\" href=\"" + BASE_PMC + id + "\">" + title + "</a>"   + "\n" +
                    "<div class=\"vague\">" + ref + "</div>"                                + "\n" +
                    "</TH></TR>"                                                            + "\n";

        file_out.write(html_code);
      }
      rs.close();


      file_out.write("<TR><TD>\n");

      boolean full = (config.options_export_fullText()).equals("yes");
      int refs;

      // --- PAPER TEXT ---
      if (scanned)
      {
        if (!full) scannedText = this.restrictText(scannedText);

        scannedText = "\nFull text from a scanned copy\n<br></br>\n\n<tt>\n" + scannedText + "</tt>\n<br></br>\n";

        refs = fullPaper.lastIndexOf("Selected References");

        if (refs > 0) fullPaper = fullPaper.substring(0, refs) + scannedText + fullPaper.substring(refs);
        else          fullPaper = fullPaper + scannedText;
      }
      else // --- not scanned
      {
        if (!full)
        {
          refs = fullPaper.lastIndexOf("References");

          if (refs > 0) fullPaper = this.restrictText(fullPaper.substring(0, refs)) + "<br></br>" + fullPaper.substring(refs);
          else          fullPaper = this.restrictText(fullPaper);
        }
      }


      file_out.write("\n<p>\n" + fullPaper + "\n</p>\n");

      // --- HTML FOOTER ---
      html_code = "</TD></TR>"               + "\n" +
                  "</TABLE>"                 + "\n" +
                  "</div>"                   + "\n" +
                  "</body>"                  + "\n" +
                  "</html>"                  + "\n";
      file_out.write(html_code);

      file_out.close();
    }

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex) {ex.printStackTrace();}

  return html + "\n\n";
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- an HTML link for the given ID and the base URL
// ----------------------------------------------------------
private String link(String base, String id, String a_class)
{
  String name = id;
  String aLink = "<a ";

       if (base.equals(BASE_KEGG_CPD)) name = this.compoundName(id);
  else if (base.equals(BASE_SBO))      name = this.sboName(id);
  else if (base.equals(BASE_GO))       name = this.goName(id);

  if (a_class.compareTo("") != 0) aLink += "class=\"" + a_class + "\" ";

       if (a_class.equals("yes"))   name = "+"        + name;
  else if (a_class.equals("no"))    name = "&minus;"  + name;
  else if (a_class.equals("maybe")) name = "&plusmn;" + name;

  aLink += "href=\"" + base + id + "\">" + name + "</a>";
  return aLink;
}
// ----------------------------------------------------------

// ************************************************************************************
// ************************************************************************************
// ************************************************************************************




// ************************************************************************************
// ************************* A U X I L I A R Y   M E T H O D S ************************
// ************************************************************************************

// ----------------------------------------------------------
// --- open file for WRITING
// ----------------------------------------------------------
private FileWriter fileOut(String file_name)
{
  FileWriter file = null;
  System.out.print("Writing to: " + file_name + " ... ");
  try {file = new FileWriter(file_name);}
  catch (Exception ex) {System.out.println(ex.toString());}
  System.out.print("File open. ");
  return file;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- open file for READING
// ----------------------------------------------------------
private EasyReader fileIn(String file_name)
{
  EasyReader file = null;
  System.out.print("Reading from: " + file_name + " ... ");
  try {file = new EasyReader(file_name);}
  catch (Exception ex) {System.out.println(ex.toString());}
  System.out.print("File open. ");
  return file;
}
// ----------------------------------------------------------

// ************************************************************************************
// ************************************************************************************
// ************************************************************************************




// ************************************************************************************
// *************************** O T H E R   M E T H O D S ******************************
// ************************************************************************************

// ----------------------------------------------------------
// --- strip off the comma at the end of a term
// --- (which is commonly found in PubChem)
// ----------------------------------------------------------
private String fixComma(String inputString)
{
  String outputString = inputString.trim();
  int i, l;

  l = inputString.length() - 1;

  while (l > 0)
  {
    char c = outputString.charAt(l);
    if (c != ',') return outputString.substring(0, l+1);
    else l--;
  }

  return outputString;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- update each COMPOUND with the # of reactions it is
//     involved in (based on information from KEGG)
// ----------------------------------------------------------
public void reactions()
{
  System.out.println("\nKiPar.reactions(): query KEGG for the number of reactions each compound is involved in");

  Statement stmt;

  try
  {
    stmt = con.createStatement();

    EnzymeKEGG kegg = new EnzymeKEGG();

    List<String> conceptList = new ArrayList<String>();
    Iterator<String> c;

    // --- COMPOUNDS
    conceptList = this.concepts("CPD", "pubmed");

    for (c = conceptList.iterator(); c.hasNext(); )
    {
      String compound = c.next();
  
      int reactions = kegg.reactions(compound);
      String query = "UPDATE CON_CPD SET reactions = " + reactions + " WHERE ID = '" + compound + "';";
      stmt.execute(query);

      String name = kegg.compoundName(compound);
      query = "UPDATE CON_CPD SET name = '" + this.fixApostrophe(name) + "' WHERE ID = '" + compound + "';";
      stmt.execute(query);
    }

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- retrieve compound's name
// ----------------------------------------------------------
public String compoundName(String id)
{
  String name = id;

  String    query;
  Statement stmt;
  ResultSet rs;

  try
  {
    stmt = con.createStatement();

    query = "SELECT name FROM CON_CPD WHERE ID = '" + id + "';";
    rs    = stmt.executeQuery(query);
    if (rs.next()) name = rs.getString(1);

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}

  return name;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- retrieve SBO (parameter) name
// ----------------------------------------------------------
public String sboName(String id)
{
  String name = id;

  String    query;
  Statement stmt;
  ResultSet rs;

  try
  {
    stmt = con.createStatement();

    query = "SELECT term FROM TERM_SBO WHERE ID = '" + id + "';";
    rs    = stmt.executeQuery(query);
    if (rs.next()) name = rs.getString(1);

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}

  return name;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- retrieve GO name
// ----------------------------------------------------------
public String goName(String id)
{
  String name = id;

  String    query;
  Statement stmt;
  ResultSet rs;

  try
  {
    stmt = con.createStatement();

    query = "SELECT term FROM TERM_GO WHERE ID = '" + id + "';";
    rs    = stmt.executeQuery(query);
    if (rs.next()) name = rs.getString(1);

    stmt.close();
  }
  catch (SQLException ex) {this.explain(ex);}
  catch (Exception ex)    {System.out.println(ex.toString());}

  return name;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
private static void copyFile(File in, File out) throws Exception
{
  System.out.println("\nKiPar.copyFile(...): copying a file... ");

  FileChannel      sourceChannel = new FileInputStream(in).getChannel();
  FileChannel destinationChannel = new FileOutputStream(out).getChannel();

  sourceChannel.transferTo(0, sourceChannel.size(), destinationChannel);

       sourceChannel.close();
  destinationChannel.close();
}
// ----------------------------------------------------------


// ----------------------------------------------------------
public String timeStamp()
{
  Calendar cal = Calendar.getInstance();
  SimpleDateFormat sdf = new SimpleDateFormat("dd MMMMM yyyy 'at' hh:mm:ss z");
  return sdf.format(cal.getTime());
}
// ----------------------------------------------------------


// ----------------------------------------------------------
public String navigation(int page, int totalPages, String pageName, int items)
{
  int    dot       = pageName.lastIndexOf(".");
  String name_root = pageName.substring(0, dot) + "_";
  String extension = pageName.substring(dot);

  String page_no  = Integer.toString(page);
  String total    = Integer.toString(totalPages);
  String current  = "Page " + page_no + " of " + total;

  String first    = "";
  String previous = "";
  String next     = "";
  String last     = "";

  if (page > 1)
  {
    previous = "<a href=\"" + name_root + (Integer.toString(page-1)) + extension + "\">Previous</a> | ";
    first    = "<a href=\"" + name_root + "1"                        + extension + "\">First</a> | ";
  }

  if (page < totalPages)
  {
    next = " | <a href=\"" + name_root + (Integer.toString(page+1)) + extension + "\">Next</a>";
    last = " | <a href=\"" + name_root + total + extension + "\">Last</a>";
  }

  String navigationBar = "\n<BR>\n" +
                         "<div class=\"vague\" align=\"Center\">\n" +
                         first + previous + current + next + last + "\n" +
                         "<BR>\n" +
                         "(" + items + " items per page)\n" +
                         "</div>\n";

  return navigationBar;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
public String restrictText(String txt)
{
  System.out.println("\nKiPar.restrictedText(...): display text windows of highlighted sentences only");

  StringBuffer restricted = new StringBuffer();

  int L = txt.length();

  int len = 200;

  String left  = "";
  String right = "";

  int b;     // --- index of the current focus sentence
  int e = 0; // --- everything of interest before e has already been printed

  b = txt.indexOf("<span class=\"focus\">");

  while (0 < b && b < L)
  {
    if (e >= b-len) left =                           txt.substring(e,     b);
    else            left = "\n<br>... " + leftWindow(txt.substring(b-len, b));
    // ------------------------------


    // --- extract FOCUS sentence ---
    int span = b;
    e = txt.indexOf("</span>", span) + 7;
    while ((txt.substring(span, e)).contains("<span class=\"value\">"))
    {
      // --- skip the nested <span class="value">...</span>
      span = e;
      e = txt.indexOf("</span>", span) + 7;
    }
    String focus = txt.substring(b, e);
    // ------------------------------


    // --- the NEXT focus sentence
    b = txt.indexOf("<span class=\"focus\">", e);


    // --- extract RIGHT window ---
    if (b < 0) b = L;

         if (e+len >= b)     {right =             txt.substring(e,  b);   e  = b;             }
    else if (e+len >= b-len) {right = rightWindow(txt.substring(e), len); e += right.length();}
    else                     {right = rightWindow(txt.substring(e), len); e += right.length();
                              if (e < b-len) right += " ...<br>\n";                           }
    // ------------------------------

    restricted.append(left + focus + right);
  }

  return restricted.toString();

}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- extract left text window
// NB: mind the HTML tags
// ----------------------------------------------------------
public String leftWindow(String txt)
{
  System.out.println("\nKiPar.leftWindow(...): extract left text window");

  // --- window: "........... txt ..........."
  //              0
  //              ^
  //             cut 
  String window = txt;
  int    cut    = 0;

  // --- check if a tag is possibly broken: <[/]ta|g>
  int right = window.indexOf(">");
  int left  = window.indexOf("<");
  if (right >= 0)
  {
    if (left  < 0   )
    {
      // --- trim the potential tag
      cut = right+1;
    }
    else if (right < left)
    {
      // --- trim the potential tag
      cut = right+1;
    }
    else
    {
      // --- then the left-most tag is fully enclosed within the window
    }
  }
  else
  {
    // --- no tags contained whatsoever -> window OK
    return window;
  }
  // --- window: <[/]tag>| after this point


  // --- check if an element is broken: <tag> ... | ... </tag>
  left = window.indexOf("</"); // --- find the 1st closing </tag> within the window
  if (left >= 0)
  {
    // --- extract tag name: </tag>
    //                       ^    ^
    //                     left  right
    right = window.indexOf(">", left+2);
    String tag = window.substring(left+2, right);

    // --- check if the window contains the corresponding opening <tag>
    while (!(window.substring(cut, left)).contains("<" + tag))
    {
      // --- no opening <tag> found within the window
      cut = right+1;                    // --- cut window after the closing </tag>|

      left = window.indexOf("</", cut); // --- find the next closing </tag>
      if (left < 0) break;              // --- no closing </tag> found, then window OK

      // --- extract tag name: </tag>
      right = window.indexOf(">", left+2);
      tag   = window.substring(left+2, right);
    }
  }
  // --- window: <tag> ... </tag>| after this point

  window = window.substring(cut);

  return window;
}
// ----------------------------------------------------------


// ----------------------------------------------------------
// --- extract right text window
// NB: mind the HTML tags
// ----------------------------------------------------------
public String rightWindow(String txt, int to)
{
  System.out.println("\nKiPar.rightWindow(...): extract right text window");

  //             "........... txt ..........."
  // --- window: "........... txt ....."
  //                                   to
  //                                   ^
  //                                  cut

  if (to >= txt.length()) return txt;
  String window = txt.substring(0, to);
  int    cut    = to;

  // --- check if a tag is possibly broken: <[/]ta|g>
  int left  = window.lastIndexOf("<");
  int right = window.lastIndexOf(">");
  if (left >= 0)
  {
    if (left <= right) 
    {
      // --- then the right-most tag is fully enclosed within the window
    }
    else
    {
      // --- trim the potential tag
      cut = left;
    }
  }
  else
  {
    // --- no tags contained whatsoever -> window OK
    return window;
  }
  // --- window: before this point |<[/]tag>


  // --- check if an element is broken: <tag> ... | ... </tag>
  left = txt.indexOf("</", cut);  // --- find the 1st closing </tag> after the window
  if (left >= 0)
  {
    // --- extract tag name: </tag>
    //                       ^    ^
    //                     left  right
    right = txt.indexOf(">", left+2);
    String tag = txt.substring(left+2, right);

    // --- check if the tag is also opened after the window
    while (!(txt.substring(cut, left)).contains("<" + tag))
    {
      // --- opening <tag> found within the window
      cut = right+1; // --- extend window to the right to close the HTML element: </tag>|

      left = txt.indexOf("</", cut);    // --- find the next closing </tag> after the window
      if (left < 0) break;              // --- no closing </tag> found, then window OK

      // --- extract tag name: </tag>
      right = txt.indexOf(">", left+2);
      tag = txt.substring(left+2, right);
    }
  }
  // --- window: <tag> ... </tag>| before this point

  window = txt.substring(0, cut);

  return window;
}
// ----------------------------------------------------------


// ************************************************************************************
// ************************************************************************************
// ************************************************************************************

}
