001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.ontology.io;
023
024import java.io.BufferedReader;
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.List;
028import java.util.StringTokenizer;
029
030import org.biojava.bio.BioError;
031import org.biojava.bio.seq.io.ParseException;
032import org.biojava.ontology.AlreadyExistsException;
033import org.biojava.ontology.OntoTools;
034import org.biojava.ontology.Ontology;
035import org.biojava.ontology.OntologyException;
036import org.biojava.ontology.OntologyFactory;
037import org.biojava.ontology.Term;
038import org.biojava.utils.ChangeVetoException;
039
040/**
041 * Simple parser for the Gene Ontology (GO) flatfile format.
042 *
043 * @author Thomas Down
044 * @since 1.4
045 */
046
047public class GOParser {
048    public Ontology parseGO(BufferedReader goFile,
049                            String ontoName,
050                            String ontoDescription,
051                            OntologyFactory factory)
052        throws ParseException, IOException
053    {
054        try {
055            Ontology onto = factory.createOntology(ontoName, ontoDescription);
056            Term isa = onto.importTerm(OntoTools.IS_A, null);
057            Term partof = null; // fixme: onto.importTerm(OntoTools.PART_OF, null);
058            List termStack = new ArrayList();
059            String line;
060            while ((line = goFile.readLine()) != null) {
061                int leadSpaces = 0;
062                while (line.charAt(leadSpaces) == ' ') {
063                    ++leadSpaces;
064                }
065                line = line.trim();
066                if (line.startsWith("!")) {
067                    continue;
068                }
069
070                StringTokenizer toke = new StringTokenizer(line, "%<$", true);
071                String parentRel = toke.nextToken();
072                Term term = parseTerm(onto, toke.nextToken());
073                if (parentRel.equals("%")) {
074                    safeAddTriple(onto, term, (Term) termStack.get(leadSpaces - 1), isa);
075                } else if (parentRel.equals("<")) {
076                    safeAddTriple(onto, term, (Term) termStack.get(leadSpaces - 1), partof);
077                }
078                while (toke.hasMoreTokens()) {
079                    String altRel = toke.nextToken();
080                    Term altTerm = parseTerm(onto, toke.nextToken());
081                    if (altRel.equals("%")) {
082                        safeAddTriple(onto, term, altTerm, isa);
083                    } else if (altRel.equals("<")) {
084                        safeAddTriple(onto, term, altTerm, partof);
085                    }
086                }
087
088                if (termStack.size() == leadSpaces) {
089                    termStack.add(term);
090                } else {
091                    termStack.set(leadSpaces, term);
092                }
093            }
094            return onto;
095        } catch (AlreadyExistsException ex) {
096            throw new ParseException(ex, "Duplication in ontology");
097        } catch (OntologyException ex) {
098            throw new ParseException(ex);
099        } catch (ChangeVetoException ex) {
100            throw new BioError("Error accessing newly created ontology",ex);
101        }
102    }
103
104    private void safeAddTriple(Ontology onto, Term s, Term o, Term p)
105        throws AlreadyExistsException, ChangeVetoException
106    {
107        if (!onto.containsTriple(s, o, p)) {
108            onto.createTriple(s, o, p, null, null);
109        }
110    }
111
112    private Term parseTerm(Ontology onto, String s)
113        throws ParseException, AlreadyExistsException, ChangeVetoException
114    {
115        int semi = s.indexOf(';');
116        int semi2 = s.indexOf(';', semi + 1);
117        if (semi < 0) {
118            throw new ParseException("No semicolon in " + s);
119        }
120        String termDesc = s.substring(0, semi).trim();
121        String termName;
122        if (semi2 < 0) {
123            termName = s.substring(semi + 1).trim();
124        } else {
125            termName = s.substring(semi + 1, semi2).trim();
126        }
127        StringTokenizer toke = new StringTokenizer(termName, ", ");
128        termName = toke.nextToken();
129        if (onto.containsTerm(termName)) {
130            return onto.getTerm(termName);
131        } else {
132            Term t = onto.createTerm(termName, termDesc);
133            if (toke.hasMoreTokens()) {
134                List secondaries = new ArrayList();
135                while (toke.hasMoreTokens()) {
136                    secondaries.add(toke.nextToken());
137                }
138                t.getAnnotation().setProperty("go.secondary_ids", secondaries);
139            }
140            return t;
141        }
142    }
143}
144
145