001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojavax.bio.seq.io; 023 024import java.util.ArrayList; 025import java.util.Iterator; 026import java.util.List; 027import java.util.Set; 028import java.util.TreeSet; 029 030import org.biojava.bio.BioError; 031import org.biojava.bio.BioException; 032import org.biojava.bio.seq.Feature; 033import org.biojava.bio.seq.FeatureHolder; 034import org.biojava.bio.seq.Sequence; 035import org.biojava.bio.seq.SimpleFeatureHolder; 036import org.biojava.bio.seq.io.ChunkedSymbolListFactory; 037import org.biojava.bio.seq.io.ParseException; 038import org.biojava.bio.symbol.Alphabet; 039import org.biojava.bio.symbol.IllegalAlphabetException; 040import org.biojava.bio.symbol.Location; 041import org.biojava.bio.symbol.SimpleSymbolListFactory; 042import org.biojava.bio.symbol.Symbol; 043import org.biojava.bio.symbol.SymbolList; 044import org.biojava.bio.symbol.SymbolListFactory; 045import org.biojava.ontology.InvalidTermException; 046import org.biojava.utils.ChangeVetoException; 047import org.biojavax.Comment; 048import org.biojavax.Namespace; 049import org.biojavax.Note; 050import org.biojavax.RankedCrossRef; 051import org.biojavax.RankedDocRef; 052import org.biojavax.RichAnnotation; 053import org.biojavax.RichObjectFactory; 054import org.biojavax.SimpleComment; 055import org.biojavax.SimpleNote; 056import org.biojavax.SimpleRichAnnotation; 057import org.biojavax.bio.BioEntryRelationship; 058import org.biojavax.bio.seq.EmptyRichLocation; 059import org.biojavax.bio.seq.RichFeature; 060import org.biojavax.bio.seq.RichLocation; 061import org.biojavax.bio.seq.RichSequence; 062import org.biojavax.bio.seq.SimpleRichFeature; 063import org.biojavax.bio.seq.SimpleRichFeatureRelationship; 064import org.biojavax.bio.seq.SimpleRichSequence; 065import org.biojavax.bio.taxa.NCBITaxon; 066import org.biojavax.ontology.ComparableTerm; 067 068 069/** 070 * Constructs BioEntry objects by listening to events. 071 * @author Richard Holland 072 * @author George Waldon 073 * @since 1.5 074 */ 075public class SimpleRichSequenceBuilder extends RichSeqIOAdapter implements RichSequenceBuilder { 076 077 private RichAnnotation notes = new SimpleRichAnnotation(); 078 079 /** 080 * Creates a new instance of SimpleRichSequenceBuilder using a SimpleSymbolListFactory 081 * with a threshold of zero. 082 */ 083 public SimpleRichSequenceBuilder() { 084 this(new SimpleSymbolListFactory(),0); 085 } 086 087 /** 088 * Creates a new instance of SimpleRichSequenceBuilder with the 089 * desired symbollistfactory and threshold of zero. 090 * @param factory the symbollistfactory to use from the start. 091 */ 092 public SimpleRichSequenceBuilder(SymbolListFactory factory) { 093 this(factory,0); 094 } 095 096 /** 097 * Creates a new instance of SimpleRichSequenceBuilder with the 098 * desired symbollistfactory and threshold. 099 * @param factory the symbollistfactory to use. 100 * @param threshold the threshold at which the specified symbollistfactory 101 * should come into use. If <=0, it will be used from the start. 102 */ 103 public SimpleRichSequenceBuilder(SymbolListFactory factory, int threshold) { 104 this.reset(); 105 this.factory = factory; 106 this.threshold = threshold; 107 } 108 109 /** 110 * Sets the sequence info back to default values, ie. in order to start 111 * constructing a new sequence from scratch. 112 */ 113 private void reset() { 114 try{ 115 this.version = 0; 116 this.versionSeen = false; 117 this.seqVersion = 0.0; 118 this.seqVersionSeen = false; 119 this.accession = null; 120 this.description = null; 121 this.division = null; 122 this.identifier = null; 123 this.name = null; 124 this.crossRefs.clear(); 125 this.symbols = null; 126 this.namespace = null; 127 this.taxon = null; 128 this.seqPropCount = 1; //annotation rank 129 this.referenceCount = 1; //doc reference rank 130 this.commentRank = 1; //comment rank 131 this.featureRank = 1; //feature rank 132 this.featPropCount = 1; //feature annotation rank 133 this.comments.clear(); 134 this.relations.clear(); 135 this.references.clear(); 136 this.rootFeatures.clear(); 137 this.featureStack.clear(); 138 this.allFeatures.clear(); 139 this.notes.clear(); 140 }catch(ChangeVetoException ex){ 141 throw new BioError("A ChangeListener should not have been applied", ex); 142 } 143 } 144 145 /** 146 * {@inheritDoc} 147 */ 148 public void setVersion(int version) throws ParseException { 149 if (this.versionSeen) throw new ParseException("Current BioEntry already has a version"); 150 else { 151 try { 152 this.version = version; 153 this.versionSeen = true; 154 } catch (NumberFormatException e) { 155 throw new ParseException("Could not parse version as an integer"); 156 } 157 } 158 } 159 private int version; 160 private boolean versionSeen; 161 162 /** 163 * {@inheritDoc} 164 * NOT IMPLEMENTED 165 */ 166 public void setURI(String uri) throws ParseException { 167 throw new ParseException("We don't understand URIs"); 168 } 169 170 /** 171 * {@inheritDoc} 172 */ 173 public void setSeqVersion(String seqVersion) throws ParseException { 174 if (this.seqVersionSeen) throw new ParseException("Current BioEntry already has a sequence version"); 175 if (seqVersion==null) this.seqVersion=0.0; 176 else { 177 try { 178 this.seqVersion = Double.parseDouble(seqVersion); 179 this.seqVersionSeen = true; 180 } catch (NumberFormatException e) { 181 throw new ParseException("Could not parse sequence version as a double"); 182 } 183 } 184 } 185 private double seqVersion = 0.0; 186 private boolean seqVersionSeen; 187 188 /** 189 * {@inheritDoc} 190 * The last accession passed to this routine will always be the one used. 191 */ 192 public void setAccession(String accession) throws ParseException { 193 if (accession==null) throw new ParseException("Accession cannot be null"); 194 this.accession = accession; 195 } 196 private String accession; 197 198 /** 199 * {@inheritDoc} 200 */ 201 public void setDescription(String description) throws ParseException { 202 if (this.description!=null) throw new ParseException("Current BioEntry already has a description"); 203 this.description = description; 204 } 205 private String description; 206 207 /** 208 * {@inheritDoc} 209 */ 210 public void setDivision(String division) throws ParseException { 211 if (division==null) throw new ParseException("Division cannot be null"); 212 if (this.division!=null) throw new ParseException("Current BioEntry already has a division"); 213 this.division = division; 214 } 215 private String division; 216 217 /** 218 * {@inheritDoc} 219 */ 220 public void setIdentifier(String identifier) throws ParseException { 221 if (identifier==null) throw new ParseException("Identifier cannot be null"); 222 if (this.identifier!=null) throw new ParseException("Current BioEntry already has a identifier"); 223 this.identifier = identifier; 224 } 225 private String identifier; 226 227 /** 228 * {@inheritDoc} 229 */ 230 public void setName(String name) throws ParseException { 231 if (name==null) throw new ParseException("Name cannot be null"); 232 if (this.name!=null) throw new ParseException("Current BioEntry already has a name"); 233 this.name = name; 234 } 235 private String name; 236 237 /** 238 * {@inheritDoc} 239 */ 240 public void setRankedCrossRef(RankedCrossRef ref) throws ParseException { 241 if (ref==null) throw new ParseException("Reference cannot be null"); 242 ref.setRank(crossRefsRank++); 243 this.crossRefs.add(ref); 244 } 245 private Set<RankedCrossRef> crossRefs = new TreeSet<RankedCrossRef>(); 246 private int crossRefsRank = 1; 247 248 /** 249 * {@inheritDoc} 250 */ 251 public void addSymbols(Alphabet alpha, Symbol[] syms, int start, int length) throws IllegalAlphabetException { 252 if (this.symbols==null) { 253 if (threshold<=0) { 254 this.symbols = new ChunkedSymbolListFactory(this.factory); 255 } else { 256 this.symbols = new ChunkedSymbolListFactory(this.factory,threshold); 257 } 258 } 259 this.symbols.addSymbols(alpha, syms, start, length); 260 } 261 private SymbolListFactory factory; 262 private int threshold; 263 private ChunkedSymbolListFactory symbols; 264 265 /** 266 * {@inheritDoc} 267 */ 268 public void setComment(String comment) throws ParseException { 269 if (comment==null) throw new ParseException("Comment cannot be null"); 270 this.comments.add(new SimpleComment(comment,commentRank++)); 271 } 272 private Set<Comment> comments = new TreeSet<Comment>(); 273 private int commentRank = 1; 274 275 /** 276 * {@inheritDoc} 277 */ 278 public void setNamespace(Namespace namespace) throws ParseException { 279 if (namespace==null) throw new ParseException("Namespace cannot be null"); 280 if (this.namespace!=null) throw new ParseException("Current BioEntry already has a namespace"); 281 this.namespace = namespace; 282 } 283 private Namespace namespace; 284 285 /** 286 * {@inheritDoc} 287 */ 288 public void startFeature(Feature.Template templ) throws ParseException { 289 try { 290 RichFeature f = new SimpleRichFeature(featureHolder,templ); 291 f.setRank(this.featureRank++); 292 this.allFeatures.add(f); 293 if (this.featureStack.size() == 0) this.rootFeatures.add(f); 294 else { 295 RichFeature parent = (RichFeature)this.featureStack.get(this.featureStack.size() - 1); 296 parent.addFeatureRelationship( 297 new SimpleRichFeatureRelationship(parent, f, SimpleRichFeatureRelationship.getContainsTerm(), 0) 298 ); 299 } 300 this.featPropCount = 1; //reset feature anotation rank 301 this.featureStack.add(f); 302 } catch (ChangeVetoException e) { 303 throw new ParseException(e); 304 } catch (InvalidTermException e) { 305 throw new ParseException(e); 306 } 307 } 308 private FeatureHolder featureHolder = new SimpleFeatureHolder(); 309 private Set rootFeatures = new TreeSet(); 310 private List allFeatures = new ArrayList(); 311 private List featureStack = new ArrayList(); 312 private int featureRank = 1; 313 314 /** 315 * {@inheritDoc} 316 */ 317 public RichFeature getCurrentFeature() throws ParseException { 318 if (this.featureStack.size()==0) throw new ParseException("Not currently within a feature"); 319 else return (RichFeature)this.featureStack.get(this.featureStack.size()-1); 320 } 321 322 /** 323 * {@inheritDoc} 324 */ 325 public void setTaxon(NCBITaxon taxon) throws ParseException { 326 if (taxon==null) throw new ParseException("Taxon cannot be null"); 327 if (this.taxon!=null){ 328 if(! this.taxon.equals(taxon)){ 329 System.err.println( 330 "Warning: attempted to set taxon twice with different values. Keeping first value. "+ 331 "old value (retained): "+this.taxon+" new value: "+taxon+", accession: <"+accession+">, version:"+version); 332 } 333 } 334 this.taxon = taxon; 335 } 336 private NCBITaxon taxon; 337 338 /** 339 * {@inheritDoc} 340 */ 341 public void setRelationship(BioEntryRelationship relationship) throws ParseException { 342 if (relationship==null) throw new ParseException("Relationship cannot be null"); 343 this.relations.add(relationship); 344 } 345 private Set<BioEntryRelationship> relations = new TreeSet<BioEntryRelationship>(); 346 347 /** 348 * {@inheritDoc} 349 */ 350 public void setRankedDocRef(RankedDocRef ref) throws ParseException { 351 if (ref==null) throw new ParseException("Reference cannot be null"); 352 ref.setRank(referenceCount++); 353 this.references.add(ref); 354 } 355 private Set<RankedDocRef> references = new TreeSet<RankedDocRef>(); 356 private int referenceCount = 1; 357 358 /** 359 * {@inheritDoc} 360 */ 361 public void startSequence() throws ParseException { 362 this.reset(); 363 } 364 365 /** 366 * {@inheritDoc} 367 */ 368 public void addFeatureProperty(Object key, Object value) throws ParseException { 369 if (this.featureStack.size() == 0) throw new ParseException("Assertion failed: Not within a feature"); 370 if (!(key instanceof ComparableTerm)) key = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key.toString()); 371 if ((value != null) && !(value instanceof String)) value = value.toString(); 372 RichFeature f = this.getCurrentFeature(); 373 try { 374 Note n = new SimpleNote((ComparableTerm)key,(String)value,this.featPropCount++); 375 f.getRichAnnotation().addNote(n); 376 } catch (ChangeVetoException e) { 377 throw new ParseException(e); 378 } 379 } 380 int featPropCount = 1; 381 382 /** 383 * {@inheritDoc} 384 */ 385 public void addSequenceProperty(Object key, Object value) throws ParseException { 386 if (!(key instanceof ComparableTerm)) key = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key.toString()); 387 if (value!=null && !(value instanceof String)) value = value.toString(); 388 try { 389 Note n; 390 if (value==null) n = new SimpleNote((ComparableTerm)key,null,this.seqPropCount++); 391 else n = new SimpleNote((ComparableTerm)key,(String)value,this.seqPropCount++); 392 this.notes.addNote(n); 393 } catch (ChangeVetoException e) { 394 throw new ParseException(e); 395 } 396 } 397 int seqPropCount = 1; 398 399 /** 400 * {@inheritDoc} 401 */ 402 public void endFeature() throws ParseException { 403 if (this.featureStack.size() == 0) throw new ParseException("Assertion failed: Not within a feature"); 404 this.featureStack.remove(this.featureStack.size() - 1); 405 } 406 407 /** 408 * {@inheritDoc} 409 */ 410 public void endSequence() throws ParseException { 411 if (this.name==null) throw new ParseException("Name has not been supplied"); 412 if (this.namespace==null) throw new ParseException("Namespace has not been supplied"); 413 if (this.accession==null) throw new ParseException("No accessions have been supplied"); 414 } 415 416 /** 417 * {@inheritDoc} 418 */ 419 public void setCircular(boolean circular) throws ParseException { this.circular = circular; } 420 private boolean circular = false; 421 422 /** 423 * {@inheritDoc} 424 */ 425 public Sequence makeSequence() throws BioException { 426 this.endSequence(); // Check our input. 427 // make our basic object 428 SymbolList syms = this.symbols==null?SymbolList.EMPTY_LIST:this.symbols.makeSymbolList(); 429 RichSequence rs = new SimpleRichSequence(this.namespace,this.name,this.accession,this.version,syms,new Double(this.seqVersion)); 430 // set misc stuff 431 try { 432 // set features 433 for (Iterator i = this.allFeatures.iterator(); i.hasNext(); ){ 434 RichFeature f = (RichFeature)i.next(); 435 f.setParent(rs); 436 if (f.getName()==null || f.getName().length()==0) f.setName(rs.getAccession()+"#"+f.getRank()); // dummy feature name for use in GBrowse 437 } 438 rs.setDescription(this.description); 439 rs.setDivision(this.division); 440 rs.setIdentifier(this.identifier); 441 rs.setTaxon(this.taxon); 442 rs.setCircular(this.circular); 443 if(this.circular && this.symbols!=null) { 444 int circularlength = syms.length(); 445 for(Object obj:rootFeatures) { 446 Feature rf = (Feature)obj; 447 RichLocation rlc = RichLocation.Tools.enrich(rf.getLocation()); 448 rlc.setCircularLength(circularlength); 449 } 450 } 451 rs.setFeatureSet(this.rootFeatures); 452 for (Iterator<RankedCrossRef> i = this.crossRefs.iterator(); i.hasNext(); ) rs.addRankedCrossRef(i.next()); 453 for (Iterator<BioEntryRelationship> i = this.relations.iterator(); i.hasNext(); ) rs.addRelationship(i.next()); 454 if(this.circular && this.symbols!=null) { 455 int circularlength = syms.length(); 456 for(RankedDocRef rdf:references) { 457 RichLocation rlc = RichLocation.Tools.enrich(rdf.getLocation()); 458 if(!(rlc instanceof EmptyRichLocation)) // Can be empty 459 rlc.setCircularLength(circularlength); 460 } 461 } 462 for (Iterator<RankedDocRef> i = this.references.iterator(); i.hasNext(); ) rs.addRankedDocRef(i.next()); 463 for (Iterator<Comment> i = this.comments.iterator(); i.hasNext(); ) rs.addComment(i.next()); 464 // set annotations 465 rs.setNoteSet(this.notes.getNoteSet()); 466 } catch (Exception e) { 467 throw new ParseException(e); // Convert them all to parse exceptions. 468 } 469 // return the object 470 return rs; 471 } 472 473 /** 474 * {@inheritDoc} 475 */ 476 public RichSequence makeRichSequence() throws BioException { return (RichSequence)this.makeSequence(); } 477}