001package org.biojava.ontology.io;
002
003import java.io.BufferedReader;
004import java.io.IOException;
005import java.util.StringTokenizer;
006
007import org.biojava.bio.BioError;
008import org.biojava.ontology.AlreadyExistsException;
009import org.biojava.ontology.Ontology;
010import org.biojava.ontology.OntologyException;
011import org.biojava.ontology.OntologyFactory;
012import org.biojava.ontology.Term;
013import org.biojava.ontology.Triple;
014import org.biojava.utils.ChangeVetoException;
015
016/**
017 * Parse tab-delimited ontology files into Ontology objects.
018 *
019 * <p>
020 * The tab-delimited ontology files have three types of lines. Lines that are
021 * pure white space can be discarded. Comment lines begin with a hash (#) and
022 * can be discarded. The payload lines contain three fields seperated by tabs.
023 * These are <code>subject</code>, <code>predicate</code> and
024 * <code>object</code>.
025 * By convention, the content of each field contains no spaces.
026 * </p>
027 *
028 * <p>
029 * By convention, if there are comment lines beginning with <code>name:</code>
030 * or <code>description:</code> and these appear before any predicate
031 * declarations then they become the name and description of the ontology.
032 * Otherwise, the name and description will be the empty string.
033 * </p>
034 *
035 * <p>
036 * Term names normally will be just a term name like <code>predicate</code> or
037 * <code>person</code>. There are also terms that represent collections of
038 * triples. For example, here is the declaration for the 'triple' type in
039 * the core ontology.
040 * </p>
041 *
042 * <code><pre>
043 * ...
044 * triple       is-a    any
045 * triple       has-a   source
046 * triple       has-a   target
047 * triple       has-a   predicate
048 * (triple,has-a,any)   size    3
049 * ...
050 * </pre></code>
051 *
052 * <p>
053 * The first four lines just associate triple with some type with a predicate
054 * (e.g. is-a or has-a). The fifth line says that something must have a size of
055 * three. The 'something' is <code>(triple,has-a,any)     size    3</code> and is
056 * short-hand for a collection of triples that state that the source must be
057 * <code>triple</code>, the target must be <code>any</code> and the predicate
058 * must be <code>has-a</code>. This whole expression states that a triple
059 * has exactly three has-a relationships; that is, exactly three properties.
060 * </p>
061 *
062 * @author Matthew Pocock
063 */
064public class TabDelimParser {
065  /**
066   * Parse an ontology from a reader.
067   * The reader will be emptied of text. It is the caller's responsibility to
068   * close the reader.
069   *
070   * @param in  the BufferedReader to read from
071   * @param of  an OntologyFactory used to create the Ontology instance
072   * @return  a new Ontology
073   * @throws IOException if there is some problem with the buffered reader
074   * @throws OntologyException if it was not possible to instantiate a new
075   *         ontology
076   */
077  public Ontology parse(BufferedReader in, OntologyFactory of)
078  throws IOException, OntologyException {
079    String name = "";
080    String description = "";
081    Ontology onto = null;
082
083    for(
084      String line = in.readLine();
085      line != null;
086      line = in.readLine()
087    ) {
088      line = line.trim();
089      if(line.length() > 0) {
090        if(line.startsWith("#")) {
091          // comment line - let's try to pull out name or description
092
093          if(line.startsWith("#name:")) {
094            name = line.substring("#name:".length()).trim();
095          } else if(line.startsWith("#description:")) {
096            description = line.substring("#description:".length()).trim();
097          }
098        } else {
099          try {
100            // make sure we have an ontology
101            if(onto == null) {
102              onto = of.createOntology(name, description);
103            }
104
105            // build a tripple
106
107            /*
108
109            int t1 = line.indexOf("\t");
110            int t2 = line.indexOf("\t", t1 + 1);
111
112            String subject  = line.substring(0, t1);
113            String predicate = line.substring(t1 + 1, t2);
114            String object   = line.substring(t2 + 1);
115
116            */
117
118            StringTokenizer toke = new StringTokenizer(line);
119            String subject = toke.nextToken();
120            String predicate = toke.nextToken();
121            String object = toke.nextToken();
122
123            Term subT = resolveTerm(subject, onto);
124            Term objT = resolveTerm(object, onto);
125            Term relT = resolveTerm(predicate, onto);
126
127            Triple trip = resolveTriple(subT, objT, relT, onto);
128            trip = trip==null?null:trip; // prevent unused field error
129          } catch (StringIndexOutOfBoundsException e) {
130            throw new IOException("Could not parse line: " + line);
131          }
132        }
133      }
134    }
135
136    return onto;
137  }
138
139  private Term resolveTerm(String termName, Ontology onto) {
140    boolean isTrippleTerm = termName.startsWith("(") && termName.endsWith(")");
141
142    if(onto.containsTerm(termName)) {
143      return onto.getTerm(termName);
144    } else {
145      try {
146        if(isTrippleTerm) {
147          int c1 = termName.indexOf(",");
148          int c2 = termName.indexOf(",", c1 + 1);
149
150          String source = termName.substring(1, c1);
151          String target = termName.substring(c2 + 1, termName.length() - 1);
152          String predicate = termName.substring(c1 + 1, c2);
153
154          Term st = resolveTerm(source, onto);
155          Term tt = resolveTerm(target, onto);
156          Term rt = resolveTerm(predicate, onto);
157
158          return onto.createTriple(st, tt, rt, null, null);
159        } else {
160          return onto.createTerm(termName, "");
161        }
162      } catch (AlreadyExistsException aee) {
163        throw new BioError("Assertion Failure: Could not create term", aee);
164      } catch (ChangeVetoException cve) {
165        throw new BioError("Assertion Failure: Could not create term", cve);
166      }
167    }
168  }
169
170  private Triple resolveTriple(Term sub, Term obj, Term rel, Ontology onto) {
171    if(onto.containsTriple(sub, obj, rel)) {
172      return (Triple) onto.getTriples(sub, obj, rel).iterator().next();
173    } else {
174      try {
175        return onto.createTriple(sub, obj, rel, null, null);
176      } catch (AlreadyExistsException aee) {
177        throw new BioError("Assertion Failure: Could not create triple",aee);
178      } catch (ChangeVetoException cve) {
179        throw new BioError("Assertion Failure: Could not create triple", cve);
180      }
181    }
182  }
183}