001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.ontology.io; 022 023import org.biojava.nbio.ontology.*; 024 025import java.io.BufferedReader; 026import java.io.IOException; 027import java.util.StringTokenizer; 028 029 030 031/** 032 * Parse tab-delimited ontology files into Ontology objects. 033 * 034 * <p> 035 * The tab-delimited ontology files have three types of lines. Lines that are 036 * pure white space can be discarded. Comment lines begin with a hash (#) and 037 * can be discarded. The payload lines contain three fields seperated by tabs. 038 * These are <code>subject</code>, <code>predicate</code> and 039 * <code>object</code>. 040 * By convention, the content of each field contains no spaces. 041 * </p> 042 * 043 * <p> 044 * By convention, if there are comment lines beginning with <code>name:</code> 045 * or <code>description:</code> and these appear before any predicate 046 * declarations then they become the name and description of the ontology. 047 * Otherwise, the name and description will be the empty string. 048 * </p> 049 * 050 * <p> 051 * Term names normally will be just a term name like <code>predicate</code> or 052 * <code>person</code>. There are also terms that represent collections of 053 * triples. For example, here is the declaration for the 'triple' type in 054 * the core ontology. 055 * </p> 056 * 057 * <pre> 058 * ... 059 * triple is-a any 060 * triple has-a source 061 * triple has-a target 062 * triple has-a predicate 063 * (triple,has-a,any) size 3 064 * ... 065 * </pre> 066 * 067 * <p> 068 * The first four lines just associate triple with some type with a predicate 069 * (e.g. is-a or has-a). The fifth line says that something must have a size of 070 * three. The 'something' is <code>(triple,has-a,any) size 3</code> and is 071 * short-hand for a collection of triples that state that the source must be 072 * <code>triple</code>, the target must be <code>any</code> and the predicate 073 * must be <code>has-a</code>. This whole expression states that a triple 074 * has exactly three has-a relationships; that is, exactly three properties. 075 * </p> 076 * 077 * @author Matthew Pocock 078 */ 079public class TabDelimParser { 080 /** 081 * Parse an ontology from a reader. 082 * The reader will be emptied of text. It is the caller's responsibility to 083 * close the reader. 084 * 085 * @param in the BufferedReader to read from 086 * @param of an OntologyFactory used to create the Ontology instance 087 * @return a new Ontology 088 * @throws IOException if there is some problem with the buffered reader 089 * @throws OntologyException if it was not possible to instantiate a new 090 * ontology 091 */ 092 public Ontology parse(BufferedReader in, OntologyFactory of) 093 throws IOException, OntologyException { 094 String name = ""; 095 String description = ""; 096 Ontology onto = null; 097 098 for( 099 String line = in.readLine(); 100 line != null; 101 line = in.readLine() 102 ) { 103 line = line.trim(); 104 if(line.length() > 0) { 105 if(line.startsWith("#")) { 106 // comment line - let's try to pull out name or description 107 108 if(line.startsWith("#name:")) { 109 name = line.substring("#name:".length()).trim(); 110 } else if(line.startsWith("#description:")) { 111 description = line.substring("#description:".length()).trim(); 112 } 113 } else { 114 try { 115 // make sure we have an ontology 116 if(onto == null) { 117 onto = of.createOntology(name, description); 118 } 119 120 // build a tripple 121 122 /* 123 124 int t1 = line.indexOf("\t"); 125 int t2 = line.indexOf("\t", t1 + 1); 126 127 String subject = line.substring(0, t1); 128 String predicate = line.substring(t1 + 1, t2); 129 String object = line.substring(t2 + 1); 130 131 */ 132 133 StringTokenizer toke = new StringTokenizer(line); 134 String subject = toke.nextToken(); 135 String predicate = toke.nextToken(); 136 String object = toke.nextToken(); 137 138 Term subT = resolveTerm(subject, onto); 139 Term objT = resolveTerm(object, onto); 140 Term relT = resolveTerm(predicate, onto); 141 142 Triple trip = resolveTriple(subT, objT, relT, onto); 143 trip = trip==null?null:trip; // prevent unused field error 144 } catch (StringIndexOutOfBoundsException e) { 145 throw new IOException("Could not parse line: " + line); 146 } 147 } 148 } 149 } 150 151 return onto; 152 } 153 154 private Term resolveTerm(String termName, Ontology onto) { 155 boolean isTrippleTerm = termName.startsWith("(") && termName.endsWith(")"); 156 157 if(onto.containsTerm(termName)) { 158 return onto.getTerm(termName); 159 } else { 160 try { 161 if(isTrippleTerm) { 162 int c1 = termName.indexOf(","); 163 int c2 = termName.indexOf(",", c1 + 1); 164 165 String source = termName.substring(1, c1); 166 String target = termName.substring(c2 + 1, termName.length() - 1); 167 String predicate = termName.substring(c1 + 1, c2); 168 169 Term st = resolveTerm(source, onto); 170 Term tt = resolveTerm(target, onto); 171 Term rt = resolveTerm(predicate, onto); 172 173 return onto.createTriple(st, tt, rt, null, null); 174 } else { 175 return onto.createTerm(termName, ""); 176 } 177 } catch (AlreadyExistsException aee) { 178 throw new RuntimeException("Assertion Failure: Could not create term", aee); 179 } 180 } 181 } 182 183 private Triple resolveTriple(Term sub, Term obj, Term rel, Ontology onto) { 184 if(onto.containsTriple(sub, obj, rel)) { 185 return onto.getTriples(sub, obj, rel).iterator().next(); 186 } else { 187 try { 188 return onto.createTriple(sub, obj, rel, null, null); 189 } catch (AlreadyExistsException aee) { 190 throw new RuntimeException("Assertion Failure: Could not create triple",aee); 191 } 192 } 193 } 194}