001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.ontology.io;
022
023import org.biojava.nbio.ontology.*;
024
025import java.io.BufferedReader;
026import java.io.IOException;
027import java.util.StringTokenizer;
028
029
030
031/**
032 * Parse tab-delimited ontology files into Ontology objects.
033 *
034 * <p>
035 * The tab-delimited ontology files have three types of lines. Lines that are
036 * pure white space can be discarded. Comment lines begin with a hash (#) and
037 * can be discarded. The payload lines contain three fields seperated by tabs.
038 * These are <code>subject</code>, <code>predicate</code> and
039 * <code>object</code>.
040 * By convention, the content of each field contains no spaces.
041 * </p>
042 *
043 * <p>
044 * By convention, if there are comment lines beginning with <code>name:</code>
045 * or <code>description:</code> and these appear before any predicate
046 * declarations then they become the name and description of the ontology.
047 * Otherwise, the name and description will be the empty string.
048 * </p>
049 *
050 * <p>
051 * Term names normally will be just a term name like <code>predicate</code> or
052 * <code>person</code>. There are also terms that represent collections of
053 * triples. For example, here is the declaration for the 'triple' type in
054 * the core ontology.
055 * </p>
056 *
057 * <code><pre>
058 * ...
059 * triple       is-a    any
060 * triple       has-a   source
061 * triple       has-a   target
062 * triple       has-a   predicate
063 * (triple,has-a,any)   size    3
064 * ...
065 * </pre></code>
066 *
067 * <p>
068 * The first four lines just associate triple with some type with a predicate
069 * (e.g. is-a or has-a). The fifth line says that something must have a size of
070 * three. The 'something' is <code>(triple,has-a,any)   size    3</code> and is
071 * short-hand for a collection of triples that state that the source must be
072 * <code>triple</code>, the target must be <code>any</code> and the predicate
073 * must be <code>has-a</code>. This whole expression states that a triple
074 * has exactly three has-a relationships; that is, exactly three properties.
075 * </p>
076 *
077 * @author Matthew Pocock
078 */
079public class TabDelimParser {
080        /**
081         * Parse an ontology from a reader.
082         * The reader will be emptied of text. It is the caller's responsibility to
083         * close the reader.
084         *
085         * @param in  the BufferedReader to read from
086         * @param of  an OntologyFactory used to create the Ontology instance
087         * @return  a new Ontology
088         * @throws IOException if there is some problem with the buffered reader
089         * @throws OntologyException if it was not possible to instantiate a new
090         *         ontology
091         */
092        public Ontology parse(BufferedReader in, OntologyFactory of)
093        throws IOException, OntologyException {
094                String name = "";
095                String description = "";
096                Ontology onto = null;
097
098                for(
099                        String line = in.readLine();
100                        line != null;
101                        line = in.readLine()
102                ) {
103                        line = line.trim();
104                        if(line.length() > 0) {
105                                if(line.startsWith("#")) {
106                                        // comment line - let's try to pull out name or description
107
108                                        if(line.startsWith("#name:")) {
109                                                name = line.substring("#name:".length()).trim();
110                                        } else if(line.startsWith("#description:")) {
111                                                description = line.substring("#description:".length()).trim();
112                                        }
113                                } else {
114                                        try {
115                                                // make sure we have an ontology
116                                                if(onto == null) {
117                                                        onto = of.createOntology(name, description);
118                                                }
119
120                                                // build a tripple
121
122                                                /*
123
124                                                int t1 = line.indexOf("\t");
125                                                int t2 = line.indexOf("\t", t1 + 1);
126
127                                                String subject  = line.substring(0, t1);
128                                                String predicate = line.substring(t1 + 1, t2);
129                                                String object   = line.substring(t2 + 1);
130
131                                                */
132
133                                                StringTokenizer toke = new StringTokenizer(line);
134                                                String subject = toke.nextToken();
135                                                String predicate = toke.nextToken();
136                                                String object = toke.nextToken();
137
138                                                Term subT = resolveTerm(subject, onto);
139                                                Term objT = resolveTerm(object, onto);
140                                                Term relT = resolveTerm(predicate, onto);
141
142                                                Triple trip = resolveTriple(subT, objT, relT, onto);
143                                                trip = trip==null?null:trip; // prevent unused field error
144                                        } catch (StringIndexOutOfBoundsException e) {
145                                                throw new IOException("Could not parse line: " + line);
146                                        }
147                                }
148                        }
149                }
150
151                return onto;
152        }
153
154        private Term resolveTerm(String termName, Ontology onto) {
155                boolean isTrippleTerm = termName.startsWith("(") && termName.endsWith(")");
156
157                if(onto.containsTerm(termName)) {
158                        return onto.getTerm(termName);
159                } else {
160                        try {
161                                if(isTrippleTerm) {
162                                        int c1 = termName.indexOf(",");
163                                        int c2 = termName.indexOf(",", c1 + 1);
164
165                                        String source = termName.substring(1, c1);
166                                        String target = termName.substring(c2 + 1, termName.length() - 1);
167                                        String predicate = termName.substring(c1 + 1, c2);
168
169                                        Term st = resolveTerm(source, onto);
170                                        Term tt = resolveTerm(target, onto);
171                                        Term rt = resolveTerm(predicate, onto);
172
173                                        return onto.createTriple(st, tt, rt, null, null);
174                                } else {
175                                        return onto.createTerm(termName, "");
176                                }
177                        } catch (AlreadyExistsException aee) {
178                                throw new RuntimeException("Assertion Failure: Could not create term", aee);
179                        }
180                }
181        }
182
183        private Triple resolveTriple(Term sub, Term obj, Term rel, Ontology onto) {
184                if(onto.containsTriple(sub, obj, rel)) {
185                        return onto.getTriples(sub, obj, rel).iterator().next();
186                } else {
187                        try {
188                                return onto.createTriple(sub, obj, rel, null, null);
189                        } catch (AlreadyExistsException aee) {
190                                throw new RuntimeException("Assertion Failure: Could not create triple",aee);
191                        }
192                }
193        }
194}