001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.ontology.io; 023 024import java.io.BufferedReader; 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.List; 028import java.util.StringTokenizer; 029 030import org.biojava.bio.BioError; 031import org.biojava.bio.seq.io.ParseException; 032import org.biojava.ontology.AlreadyExistsException; 033import org.biojava.ontology.OntoTools; 034import org.biojava.ontology.Ontology; 035import org.biojava.ontology.OntologyException; 036import org.biojava.ontology.OntologyFactory; 037import org.biojava.ontology.Term; 038import org.biojava.utils.ChangeVetoException; 039 040/** 041 * Simple parser for the Gene Ontology (GO) flatfile format. 042 * 043 * @author Thomas Down 044 * @since 1.4 045 */ 046 047public class GOParser { 048 public Ontology parseGO(BufferedReader goFile, 049 String ontoName, 050 String ontoDescription, 051 OntologyFactory factory) 052 throws ParseException, IOException 053 { 054 try { 055 Ontology onto = factory.createOntology(ontoName, ontoDescription); 056 Term isa = onto.importTerm(OntoTools.IS_A, null); 057 Term partof = null; // fixme: onto.importTerm(OntoTools.PART_OF, null); 058 List termStack = new ArrayList(); 059 String line; 060 while ((line = goFile.readLine()) != null) { 061 int leadSpaces = 0; 062 while (line.charAt(leadSpaces) == ' ') { 063 ++leadSpaces; 064 } 065 line = line.trim(); 066 if (line.startsWith("!")) { 067 continue; 068 } 069 070 StringTokenizer toke = new StringTokenizer(line, "%<$", true); 071 String parentRel = toke.nextToken(); 072 Term term = parseTerm(onto, toke.nextToken()); 073 if (parentRel.equals("%")) { 074 safeAddTriple(onto, term, (Term) termStack.get(leadSpaces - 1), isa); 075 } else if (parentRel.equals("<")) { 076 safeAddTriple(onto, term, (Term) termStack.get(leadSpaces - 1), partof); 077 } 078 while (toke.hasMoreTokens()) { 079 String altRel = toke.nextToken(); 080 Term altTerm = parseTerm(onto, toke.nextToken()); 081 if (altRel.equals("%")) { 082 safeAddTriple(onto, term, altTerm, isa); 083 } else if (altRel.equals("<")) { 084 safeAddTriple(onto, term, altTerm, partof); 085 } 086 } 087 088 if (termStack.size() == leadSpaces) { 089 termStack.add(term); 090 } else { 091 termStack.set(leadSpaces, term); 092 } 093 } 094 return onto; 095 } catch (AlreadyExistsException ex) { 096 throw new ParseException(ex, "Duplication in ontology"); 097 } catch (OntologyException ex) { 098 throw new ParseException(ex); 099 } catch (ChangeVetoException ex) { 100 throw new BioError("Error accessing newly created ontology",ex); 101 } 102 } 103 104 private void safeAddTriple(Ontology onto, Term s, Term o, Term p) 105 throws AlreadyExistsException, ChangeVetoException 106 { 107 if (!onto.containsTriple(s, o, p)) { 108 onto.createTriple(s, o, p, null, null); 109 } 110 } 111 112 private Term parseTerm(Ontology onto, String s) 113 throws ParseException, AlreadyExistsException, ChangeVetoException 114 { 115 int semi = s.indexOf(';'); 116 int semi2 = s.indexOf(';', semi + 1); 117 if (semi < 0) { 118 throw new ParseException("No semicolon in " + s); 119 } 120 String termDesc = s.substring(0, semi).trim(); 121 String termName; 122 if (semi2 < 0) { 123 termName = s.substring(semi + 1).trim(); 124 } else { 125 termName = s.substring(semi + 1, semi2).trim(); 126 } 127 StringTokenizer toke = new StringTokenizer(termName, ", "); 128 termName = toke.nextToken(); 129 if (onto.containsTerm(termName)) { 130 return onto.getTerm(termName); 131 } else { 132 Term t = onto.createTerm(termName, termDesc); 133 if (toke.hasMoreTokens()) { 134 List secondaries = new ArrayList(); 135 while (toke.hasMoreTokens()) { 136 secondaries.add(toke.nextToken()); 137 } 138 t.getAnnotation().setProperty("go.secondary_ids", secondaries); 139 } 140 return t; 141 } 142 } 143} 144 145