import gov.nih.nlm.ncbi.www.soap.eutils.*;
import gov.nih.nlm.ncbi.www.soap.eutils.esearch.*;
import gov.nih.nlm.ncbi.www.soap.eutils.efetch.*;

import java.io.*;
import java.net.*;

public class Entrez
{
  private static EUtilsServiceLocator service;
  private static EUtilsServiceSoap    utils;

  public Entrez() throws Exception 
  {
    service = new EUtilsServiceLocator();
    utils   = service.geteUtilsServiceSoap();
  }

  // ----------------------------------------------------------------------

  public void search(String db, String query, String retMax, String file_name) throws Exception 
  {
    System.out.println("\nEntrez.search(db, query, retMax, file_name):");
    System.out.println("      db = " + db);
    System.out.println("   query = " + query);
    System.out.println("  retMax = " + retMax);

    ESearchRequest parameters = new ESearchRequest();
    parameters.setDb(db);
    parameters.setTerm(query);
    parameters.setRetMax(retMax);

    System.out.println("  Writing to file: " + file_name + " ...");
    FileWriter file_out = new FileWriter(file_name);

    // ------------------------------------------------------------
    // --- process PubMed decade by decade to handle the amount of
    //     data in managable chunks
    // ------------------------------------------------------------
    for (int mindate = 1960, years = 10; mindate < 2010; mindate += years)
    {
      int maxdate = mindate + years - 1;

      // --- search parameters
      String minDate = "" + mindate + "";
      String maxDate = "" + maxdate + "";

      // --- call NCBI ESearch utility
      parameters.setMindate(minDate);
      parameters.setMaxdate(maxDate);
      ESearchResult res = utils.run_eSearch(parameters);

      // --- OUTPUT: IDs of the matching documents
      if (res.getIdList().getId() != null)
      {
        for(int i = 0; i < res.getIdList().getId().length; i++)
          file_out.write(res.getIdList().getId()[i] + "\n");
      }
    }

    file_out.close();
  }

  // ----------------------------------------------------------------------

  public int response(String db, String query, String retMax) throws Exception 
  {
    System.out.println("\nEntrez.response(db, query, retMax):");
    System.out.println("      db = " + db);
    System.out.println("   query = " + query);
    System.out.println("  retMax = " + retMax);

    ESearchRequest parameters = new ESearchRequest();
    parameters.setDb(db);
    parameters.setTerm(query);
    parameters.setRetMax(retMax);

    ESearchResult res = utils.run_eSearch(parameters);

    if (res.getIdList().getId() == null) return 0;
    else return res.getIdList().getId().length;
  }

  // ----------------------------------------------------------------------

  public String PMID(String PMCID) throws Exception
  {
    System.out.println("Entrez.PMID(PMCID): map PMCID to PMID");

    EFetchRequest parameters = new EFetchRequest();
    parameters.setDb("pmc");
    parameters.setReport("xml");
    parameters.setId(PMCID);

    String PMID = "";

    EFetchResult res = utils.run_eFetch(parameters);

    if (res.getPmcArticleset() != null)
    {
      ArticleId[] ids = res.getPmcArticleset().getArticle()[0].getFront().getArticleMeta().getArticleId();

      if (ids != null)
      {
        for (int i = 0; i < ids.length; i++)
        {
          if ( (ids[i].getPubIdType()).compareTo("pmid") == 0 ) 
          {
            PMID = ids[i].get_any()[0].getValue();
            return PMID;
          }
        }
      }
    }

    return PMID;
  }

  // ----------------------------------------------------------------------

  public void PMC_fetchFullPaper(String pmc_id, String file_name)
  {
    // --- step 1: start creating a few objects needed for download
    java.net.URL   u;
    InputStream    is = null;
    BufferedReader dis;
    String         s;

    try
    {
      u  = new java.net.URL("http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=" + pmc_id);
      is = u.openStream();  // --- throws an IOException
      dis = new BufferedReader(new InputStreamReader(is));
      FileWriter pmc_out = new FileWriter(file_name);
      while ( (s = dis.readLine()) != null ) pmc_out.write(s + "\n");
      pmc_out.close();
    }
    catch (MalformedURLException mue)
    {
      System.out.println("Ouch - a MalformedURLException happened.");
      mue.printStackTrace();
      System.exit(1);
    } 
    catch (IOException ioe)
    {
      System.out.println("Oops - an IOException happened.");
      ioe.printStackTrace();
      System.exit(1);
    }
    finally
    {
      // --- close the InputStream
      try {is.close();}
      catch (IOException ioe)
      {
        // --- just going to ignore this one
      }
    }
  }

  // ----------------------------------------------------------------------

  public Citation getCitation(String db, String id) throws Exception
  {
    System.out.println("Entrez.getCitation(" + db + ", " + id + "): fetch citation details");

    EFetchRequest parameters = new EFetchRequest();
    parameters.setDb(db);
    parameters.setReport("xml");
    parameters.setId(id);
    EFetchResult res = utils.run_eFetch(parameters);

    String journal  = "";
    String date     = "";
    String issue    = "";
    String ref      = "";
    String authors  = "";
    String title    = "";
    String Abstract = ""; // --- NB: abstract is a java *reserved word*

    if (db.toLowerCase().equals("pubmed"))
    {
      if (res.getPubmedArticleSet() != null)
      {
        ArticleType article = res.getPubmedArticleSet().getPubmedArticle()[0].getMedlineCitation().getArticle();

        JournalType j = article.getJournal();
        if (j != null)
        {
          // --- journal title ---
          if (j.getTitle() != null) journal = j.getTitle();

          // --- volume & issue ---
          JournalIssueType is = j.getJournalIssue();
          if (is != null)
          {
            if (is.getVolume() != null)
            {
              issue = is.getVolume();
              if (is.getIssue() != null) issue += ":" + is.getIssue();
            }

            if (is.getPubDate() != null)
            {
              date = is.getPubDate().getYear();
            }
          }
        }

        ref = journal + ". " + date + "; " + issue;

        // --- authors ---
        if (article.getAuthorList() != null)
        {
          AuthorType_nlmc[] authorList = article.getAuthorList().getAuthor();

          for (int i = 0; i < java.lang.reflect.Array.getLength(authorList); i++)
            authors += ", " + authorList[i].getLastName() + " " + authorList[i].getInitials();

          if (authors.length() > 2) authors = authors.substring(2);
        }

        // --- title ---
        title = article.getArticleTitle();

        // --- abstract ---
        if (article.get_abstract() != null) Abstract = article.get_abstract().getAbstractText();
      }
    }
    else // --- fetch metadata from PMC
    {
      org.apache.axis.message.MessageElement[] lista;
      int i, k;

      if (res.getPmcArticleset() != null)
      {
        ArticleMeta ameta = res.getPmcArticleset().getArticle()[0].getFront().getArticleMeta();
        JournalMeta jmeta = res.getPmcArticleset().getArticle()[0].getFront().getJournalMeta();

        // --- journal title ---
        JournalTitle[] j = jmeta.getJournalTitle();
        if (j != null)
        {
          lista = j[0].get_any();

          for (i = 0; i < java.lang.reflect.Array.getLength(lista); i++) journal += lista[i].toString();
        }

        // --- publication date ---
        String epubDate = "";
        String ppubDate = "";
        PubDate[] pubDate = ameta.getPubDate();

        for (i = 0; i < java.lang.reflect.Array.getLength(pubDate); i++)
        {
          String pubType = pubDate[i].getPubType();

               if (pubType.compareTo("epub") == 0) epubDate = pubDate[i].getYear();
          else if (pubType.compareTo("ppub") == 0) ppubDate = pubDate[i].getYear();
        }
        if (ppubDate.compareTo("") != 0) date = ppubDate;
        else                             date = epubDate;

        // --- volume & issue ---
        Volume v = ameta.getVolume();
        if (v != null)
        {
          lista = v.get_any();
          for (i = 0; i < java.lang.reflect.Array.getLength(lista); i++) issue += lista[i].toString();
        }
        Issue is = ameta.getIssue();
        if (is != null)
        {
          lista = is.get_any();
          if (java.lang.reflect.Array.getLength(lista) > 0) issue += ":";
          for (i = 0; i < java.lang.reflect.Array.getLength(lista); i++) issue += lista[i].toString();
        }

        ref = journal + ". " + date + "; " + issue;

        // --- title ---
        TitleGroup t = ameta.getTitleGroup();
        if (t != null)
        {
          lista = t.getArticleTitle().get_any();
          for (i = 0; i < java.lang.reflect.Array.getLength(lista); i++) title += lista[i].toString();
        }

        // --- authors ---
        ContribGroup cg = ameta.getContribGroup();
        if (cg != null)
        {
          Contrib[] contributor = cg.getContrib();
          for (i = 0; i < java.lang.reflect.Array.getLength(contributor); i++)
          {
            authors += ", ";

            Name name = contributor[i].getName();

            if (name != null)
            {
              Surname    surname = name.getSurname();
              GivenNames given   = name.getGivenNames();

              // --- surname ---
              if (surname != null)
              {
                lista = surname.get_any();
                for (k = 0; k < java.lang.reflect.Array.getLength(lista); k++)  authors += lista[k].toString();
              }

              authors += " ";

              // --- given names ---
              if (given != null)
              {
                lista = given.get_any();
                for (k = 0; k < java.lang.reflect.Array.getLength(lista); k++) authors += lista[k].toString();
              }
            }
          }
        }

        if (authors.length() > 2) authors = authors.substring(2);

        // --- abstract ---
        java.net.URL   u;
        InputStream    ins = null;
        BufferedReader dis;
        String         s;
        String         doc = "";

        u   = new java.net.URL("http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=" + id);
        ins = u.openStream();  // --- throws an IOException
        dis = new BufferedReader(new InputStreamReader(ins));

        while ( (s = dis.readLine()) != null ) doc += s;

        int b = doc.indexOf("<abstract>");
        int e = doc.indexOf("</abstract>") + 11;
        if (b >= 0) Abstract = doc.substring(b, e);
      }
    }

    return new Citation(id, authors, title, ref, Abstract);
  }

  // ----------------------------------------------------------------------

  public String getAbstract(String id) throws Exception
  {
    System.out.println("Entrez.getAbstract(" + id + "): fetch abstract from PubMed");

    String Abstract = "";

    EFetchRequest parameters = new EFetchRequest();
    parameters.setDb("pubmed");
    parameters.setReport("xml");
    parameters.setId(id);
    EFetchResult res = utils.run_eFetch(parameters);

    if (res.getPubmedArticleSet() != null)
    {
      ArticleType article = res.getPubmedArticleSet().getPubmedArticle()[0].getMedlineCitation().getArticle();

      if (article.get_abstract() != null) Abstract = article.get_abstract().getAbstractText();

      return Abstract;
    }

    return Abstract;
  }

  // ----------------------------------------------------------------------

  public String getPaper(String id) throws Exception
  {
    System.out.println("Entrez.getPaper(" + id + "): fetches a full paper from PubMed Central");

    java.net.URL   u;
    InputStream    ins = null;
    BufferedReader dis;
    String         s;
    String         doc = "";

    u   = new java.net.URL("http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=" + id);
//    u   = new java.net.URL("http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pmcentrez&artid=" + id);
    ins = u.openStream();  // --- throws an IOException
    dis = new BufferedReader(new InputStreamReader(ins));

    while ( (s = dis.readLine()) != null ) doc += s;

    if (doc.compareTo("") != 0) return doc;
    else                        return null;
  }
  // ----------------------------------------------------------------------

  public String MandM(String paper) throws Exception
  {
    String section = "";

    int b, e = 0;

    while ((b = paper.indexOf("<sec sec-type", e)) >= 0)
    {
      int type_b = paper.indexOf('"', b);
      int type_e = paper.indexOf('"', ++type_b);
      String sec_type = paper.substring(type_b, type_e);

      e = b+1;

      if (sec_type.compareTo("methods") == 0 || sec_type.compareTo("materials") == 0 || sec_type.compareTo("materials|methods") == 0 || sec_type.compareTo("methods|materials") == 0)
      {
        int stack = 1;

        while (stack > 0)
        {
          int e1 = paper.indexOf("<sec",   e);
          int e2 = paper.indexOf("</sec>", e);

          if (e1 > 0 && e1 < e2) {stack++; e = e1 + 1;}
          else                   {stack--; e = e2 + 6;}
        }

        section = section + paper.substring(b, e);
      }
    }

    return section;
  }

  // ----------------------------------------------------------------------

  public String cleanPMCtags(String text) throws Exception
  {
    String clean = text;

    clean = clean.replaceAll("</abstract>", "");
    clean = clean.replaceAll("<abstract[^>]*>", "");
    clean = clean.replaceAll("</ack>", "");
    clean = clean.replaceAll("<ack[^>]*>", "");
    clean = clean.replaceAll("</addr-line>", "");
    clean = clean.replaceAll("<addr-line[^>]*>", "");
    clean = clean.replaceAll("</aff>", "");
    clean = clean.replaceAll("<aff[^>]*>", "");
    clean = clean.replaceAll("</alt-title>", "");
    clean = clean.replaceAll("<alt-title[^>]*>", "");
    clean = clean.replaceAll("</article>", "");
    clean = clean.replaceAll("<article[^>]*>", "");
    clean = clean.replaceAll("</article-categories>", "");
    clean = clean.replaceAll("<article-categories[^>]*>", "");
    clean = clean.replaceAll("</article-id>", "");
    clean = clean.replaceAll("<article-id[^>]*>", "");
    clean = clean.replaceAll("</article-meta>", "");
    clean = clean.replaceAll("<article-meta[^>]*>", "");
    clean = clean.replaceAll("</article-title>", "");
    clean = clean.replaceAll("<article-title[^>]*>", "");
    clean = clean.replaceAll("</author-notes>", "");
    clean = clean.replaceAll("<author-notes[^>]*>", "");
    clean = clean.replaceAll("</back>", "");
    clean = clean.replaceAll("<back[^>]*>", "");
    clean = clean.replaceAll("</body>", "");
    clean = clean.replaceAll("<body[^>]*>", "");
    clean = clean.replaceAll("</bold>", "");
    clean = clean.replaceAll("<bold[^>]*>", "");
    clean = clean.replaceAll("</caption>", "");
    clean = clean.replaceAll("<caption[^>]*>", "");
    clean = clean.replaceAll("</citation>", "");
    clean = clean.replaceAll("<citation[^>]*>", "");
    clean = clean.replaceAll("</contract-num>", "");
    clean = clean.replaceAll("<contract-num[^>]*>", "");
    clean = clean.replaceAll("</contract-sponsor>", "");
    clean = clean.replaceAll("<contract-sponsor[^>]*>", "");
    clean = clean.replaceAll("</contrib>", "");
    clean = clean.replaceAll("<contrib[^>]*>", "");
    clean = clean.replaceAll("</contrib-group>", "");
    clean = clean.replaceAll("<contrib-group[^>]*>", "");
    clean = clean.replaceAll("</copyright-holder>", "");
    clean = clean.replaceAll("<copyright-holder[^>]*>", "");
    clean = clean.replaceAll("</copyright-statement>", "");
    clean = clean.replaceAll("<copyright-statement[^>]*>", "");
    clean = clean.replaceAll("</copyright-year>", "");
    clean = clean.replaceAll("<copyright-year[^>]*>", "");
    clean = clean.replaceAll("</corresp>", "");
    clean = clean.replaceAll("<corresp[^>]*>", "");
    clean = clean.replaceAll("</counts>", "");
    clean = clean.replaceAll("<counts[^>]*>", "");
    clean = clean.replaceAll("</custom-meta>", "");
    clean = clean.replaceAll("<custom-meta[^>]*>", "");
    clean = clean.replaceAll("</custom-meta-wrap>", "");
    clean = clean.replaceAll("<custom-meta-wrap[^>]*>", "");
    clean = clean.replaceAll("</date>", "");
    clean = clean.replaceAll("<date[^>]*>", "");
    clean = clean.replaceAll("</day>", "");
    clean = clean.replaceAll("<day[^>]*>", "");
    clean = clean.replaceAll("</dc:author>", "");
    clean = clean.replaceAll("<dc:author[^>]*>", "");
    clean = clean.replaceAll("</dc:date>", "");
    clean = clean.replaceAll("<dc:date[^>]*>", "");
    clean = clean.replaceAll("</dc:identifier>", "");
    clean = clean.replaceAll("<dc:identifier[^>]*>", "");
    clean = clean.replaceAll("</dcterms:bibliographicCitation>", "");
    clean = clean.replaceAll("<dcterms:bibliographicCitation[^>]*>", "");
    clean = clean.replaceAll("</dcterms:isPartOf>", "");
    clean = clean.replaceAll("<dcterms:isPartOf[^>]*>", "");
    clean = clean.replaceAll("</dc:title>", "");
    clean = clean.replaceAll("<dc:title[^>]*>", "");
    clean = clean.replaceAll("</dc:type>", "");
    clean = clean.replaceAll("<dc:type[^>]*>", "");
    clean = clean.replaceAll("</degrees>", "");
    clean = clean.replaceAll("<degrees[^>]*>", "");
    clean = clean.replaceAll("</edition>", "");
    clean = clean.replaceAll("<edition[^>]*>", "");
    clean = clean.replaceAll("</email>", "");
    clean = clean.replaceAll("<email[^>]*>", "");
    clean = clean.replaceAll("</ext-link>", "");
    clean = clean.replaceAll("<ext-link[^>]*>", "");
    clean = clean.replaceAll("</fig>", "");
    clean = clean.replaceAll("<fig[^>]*>", "");
    clean = clean.replaceAll("</fig-count>", "");
    clean = clean.replaceAll("<fig-count[^>]*>", "");
    clean = clean.replaceAll("</fn>", "");
    clean = clean.replaceAll("<fn[^>]*>", "");
    clean = clean.replaceAll("</fn-group>", "");
    clean = clean.replaceAll("<fn-group[^>]*>", "");
    clean = clean.replaceAll("</fpage>", "");
    clean = clean.replaceAll("<fpage[^>]*>", "");
    clean = clean.replaceAll("</front>", "");
    clean = clean.replaceAll("<front[^>]*>", "");
    clean = clean.replaceAll("</given-names>", "");
    clean = clean.replaceAll("<given-names[^>]*>", "");
    clean = clean.replaceAll("</graphic>", "");
    clean = clean.replaceAll("<graphic[^>]*>", "");
    clean = clean.replaceAll("</history>", "");
    clean = clean.replaceAll("<history[^>]*>", "");
    clean = clean.replaceAll("</issn>", "");
    clean = clean.replaceAll("<issn[^>]*>", "");
    clean = clean.replaceAll("</issue>", "");
    clean = clean.replaceAll("<issue[^>]*>", "");
    clean = clean.replaceAll("</italic>", "");
    clean = clean.replaceAll("<italic[^>]*>", "");
    clean = clean.replaceAll("</journal-id>", "");
    clean = clean.replaceAll("<journal-id[^>]*>", "");
    clean = clean.replaceAll("</journal-meta>", "");
    clean = clean.replaceAll("<journal-meta[^>]*>", "");
    clean = clean.replaceAll("</journal-title>", "");
    clean = clean.replaceAll("<journal-title[^>]*>", "");
    clean = clean.replaceAll("</kwd>", "");
    clean = clean.replaceAll("<kwd[^>]*>", "");
    clean = clean.replaceAll("</kwd-group>", "");
    clean = clean.replaceAll("<kwd-group[^>]*>", "");
    clean = clean.replaceAll("</label>", "");
    clean = clean.replaceAll("<label[^>]*>", "");
    clean = clean.replaceAll("</license>", "");
    clean = clean.replaceAll("<license[^>]*>", "");
    clean = clean.replaceAll("</License>", "");
    clean = clean.replaceAll("<License[^>]*>", "");
    clean = clean.replaceAll("</lpage>", "");
    clean = clean.replaceAll("<lpage[^>]*>", "");
    clean = clean.replaceAll("</meta-name>", "");
    clean = clean.replaceAll("<meta-name[^>]*>", "");
    clean = clean.replaceAll("</meta-value>", "");
    clean = clean.replaceAll("<meta-value[^>]*>", "");
    clean = clean.replaceAll("</month>", "");
    clean = clean.replaceAll("<month[^>]*>", "");
    clean = clean.replaceAll("</name>", "");
    clean = clean.replaceAll("<name[^>]*>", "");
    clean = clean.replaceAll("</notes>", "");
    clean = clean.replaceAll("<notes[^>]*>", "");
    clean = clean.replaceAll("</p>", "");
    clean = clean.replaceAll("<p[^>]*>", "");
    clean = clean.replaceAll("</page-count>", "");
    clean = clean.replaceAll("<page-count[^>]*>", "");
    clean = clean.replaceAll("</permissions>", "");
    clean = clean.replaceAll("<permissions[^>]*>", "");
    clean = clean.replaceAll("</permits>", "");
    clean = clean.replaceAll("<permits[^>]*>", "");
    clean = clean.replaceAll("</person-group>", "");
    clean = clean.replaceAll("<person-group[^>]*>", "");
    clean = clean.replaceAll("</pmc-articleset>", "");
    clean = clean.replaceAll("<pmc-articleset[^>]*>", "");
    clean = clean.replaceAll("</pub-date>", "");
    clean = clean.replaceAll("<pub-date[^>]*>", "");
    clean = clean.replaceAll("</pub-id>", "");
    clean = clean.replaceAll("<pub-id[^>]*>", "");
    clean = clean.replaceAll("</publisher>", "");
    clean = clean.replaceAll("<publisher[^>]*>", "");
    clean = clean.replaceAll("</publisher-loc>", "");
    clean = clean.replaceAll("<publisher-loc[^>]*>", "");
    clean = clean.replaceAll("</publisher-name>", "");
    clean = clean.replaceAll("<publisher-name[^>]*>", "");
    clean = clean.replaceAll("</ref>", "");
    clean = clean.replaceAll("<ref[^>]*>", "");
    clean = clean.replaceAll("</ref-count>", "");
    clean = clean.replaceAll("<ref-count[^>]*>", "");
    clean = clean.replaceAll("</ref-list>", "");
    clean = clean.replaceAll("<ref-list[^>]*>", "");
    clean = clean.replaceAll("</requires>", "");
    clean = clean.replaceAll("<requires[^>]*>", "");
    clean = clean.replaceAll("</role>", "");
    clean = clean.replaceAll("<role[^>]*>", "");
    clean = clean.replaceAll("</sc>", "");
    clean = clean.replaceAll("<sc[^>]*>", "");
    clean = clean.replaceAll("</sec>", "");
    clean = clean.replaceAll("<sec[^>]*>", "");
    clean = clean.replaceAll("</self-uri>", "");
    clean = clean.replaceAll("<self-uri[^>]*>", "");
    clean = clean.replaceAll("</series-title>", "");
    clean = clean.replaceAll("<series-title[^>]*>", "");
    clean = clean.replaceAll("</source>", "");
    clean = clean.replaceAll("<source[^>]*>", "");
    clean = clean.replaceAll("</sub>", "");
    clean = clean.replaceAll("<sub[^>]*>", "");
    clean = clean.replaceAll("</subject>", "");
    clean = clean.replaceAll("<subject[^>]*>", "");
    clean = clean.replaceAll("</subj-group>", "");
    clean = clean.replaceAll("<subj-group[^>]*>", "");
    clean = clean.replaceAll("</suffix>", "");
    clean = clean.replaceAll("<suffix[^>]*>", "");
    clean = clean.replaceAll("</sup>", "");
    clean = clean.replaceAll("<sup[^>]*>", "");
    clean = clean.replaceAll("</surname>", "");
    clean = clean.replaceAll("<surname[^>]*>", "");
    clean = clean.replaceAll("</table>", "");
    clean = clean.replaceAll("<table[^>]*>", "");
    clean = clean.replaceAll("</table-count>", "");
    clean = clean.replaceAll("<table-count[^>]*>", "");
    clean = clean.replaceAll("</table-wrap>", "");
    clean = clean.replaceAll("<table-wrap[^>]*>", "");
    clean = clean.replaceAll("</table-wrap-foot>", "");
    clean = clean.replaceAll("<table-wrap-foot[^>]*>", "");
    clean = clean.replaceAll("</tbody>", "");
    clean = clean.replaceAll("<tbody[^>]*>", "");
    clean = clean.replaceAll("</td>", "");
    clean = clean.replaceAll("<td[^>]*>", "");
    clean = clean.replaceAll("</thead>", "");
    clean = clean.replaceAll("<thead[^>]*>", "");
    clean = clean.replaceAll("</title>", ". ");
    clean = clean.replaceAll("<title[^>]*>", "");
    clean = clean.replaceAll("</title-group>", "");
    clean = clean.replaceAll("<title-group[^>]*>", "");
    clean = clean.replaceAll("</tr>", "");
    clean = clean.replaceAll("<tr[^>]*>", "");
    clean = clean.replaceAll("</uri>", "");
    clean = clean.replaceAll("<uri[^>]*>", "");
    clean = clean.replaceAll("</volume>", "");
    clean = clean.replaceAll("<volume[^>]*>", "");
    clean = clean.replaceAll("</Work>", "");
    clean = clean.replaceAll("<Work[^>]*>", "");
    clean = clean.replaceAll("</xref>", "");
    clean = clean.replaceAll("<xref[^>]*>", "");
    clean = clean.replaceAll("</year>", "");
    clean = clean.replaceAll("<year[^>]*>", "");
    clean = clean.replaceAll("<inline-graphic[^>]*>", "");
    clean = clean.replaceAll("&#x[^;]*;", "");

    return clean;
  }


  // ----------------------------------------------------------------------

  public String urlPMC(String PMCID)
  {
    String url_base = "http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pmcentrez&artid=";

    return url_base + PMCID;
  }

  // ----------------------------------------------------------------------

  public String urlPubMed(String PMID)
  {
    String url_base = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=";

    return url_base + PMID;
  }

  // ----------------------------------------------------------------------

}
