001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojavax.bio.seq.io;
023
024import java.util.ArrayList;
025import java.util.Iterator;
026import java.util.List;
027import java.util.Set;
028import java.util.TreeSet;
029
030import org.biojava.bio.BioError;
031import org.biojava.bio.BioException;
032import org.biojava.bio.seq.Feature;
033import org.biojava.bio.seq.FeatureHolder;
034import org.biojava.bio.seq.Sequence;
035import org.biojava.bio.seq.SimpleFeatureHolder;
036import org.biojava.bio.seq.io.ChunkedSymbolListFactory;
037import org.biojava.bio.seq.io.ParseException;
038import org.biojava.bio.symbol.Alphabet;
039import org.biojava.bio.symbol.IllegalAlphabetException;
040import org.biojava.bio.symbol.Location;
041import org.biojava.bio.symbol.SimpleSymbolListFactory;
042import org.biojava.bio.symbol.Symbol;
043import org.biojava.bio.symbol.SymbolList;
044import org.biojava.bio.symbol.SymbolListFactory;
045import org.biojava.ontology.InvalidTermException;
046import org.biojava.utils.ChangeVetoException;
047import org.biojavax.Comment;
048import org.biojavax.Namespace;
049import org.biojavax.Note;
050import org.biojavax.RankedCrossRef;
051import org.biojavax.RankedDocRef;
052import org.biojavax.RichAnnotation;
053import org.biojavax.RichObjectFactory;
054import org.biojavax.SimpleComment;
055import org.biojavax.SimpleNote;
056import org.biojavax.SimpleRichAnnotation;
057import org.biojavax.bio.BioEntryRelationship;
058import org.biojavax.bio.seq.EmptyRichLocation;
059import org.biojavax.bio.seq.RichFeature;
060import org.biojavax.bio.seq.RichLocation;
061import org.biojavax.bio.seq.RichSequence;
062import org.biojavax.bio.seq.SimpleRichFeature;
063import org.biojavax.bio.seq.SimpleRichFeatureRelationship;
064import org.biojavax.bio.seq.SimpleRichSequence;
065import org.biojavax.bio.taxa.NCBITaxon;
066import org.biojavax.ontology.ComparableTerm;
067
068
069/**
070 * Constructs BioEntry objects by listening to events.
071 * @author Richard Holland
072 * @author George Waldon
073 * @since 1.5
074 */
075public class SimpleRichSequenceBuilder extends RichSeqIOAdapter implements RichSequenceBuilder {
076    
077    private RichAnnotation notes = new SimpleRichAnnotation();
078    
079    /**
080     * Creates a new instance of SimpleRichSequenceBuilder using a SimpleSymbolListFactory
081     * with a threshold of zero.
082     */
083    public SimpleRichSequenceBuilder() {
084        this(new SimpleSymbolListFactory(),0);
085    }
086        
087    /**
088     * Creates a new instance of SimpleRichSequenceBuilder with the
089     * desired symbollistfactory and threshold of zero.
090     * @param factory the symbollistfactory to use from the start.
091     */
092    public SimpleRichSequenceBuilder(SymbolListFactory factory) {
093        this(factory,0);
094    }
095    
096    /**
097     * Creates a new instance of SimpleRichSequenceBuilder with the
098     * desired symbollistfactory and threshold.
099     * @param factory the symbollistfactory to use.
100     * @param threshold the threshold at which the specified symbollistfactory
101     * should come into use. If <=0, it will be used from the start.
102     */
103    public SimpleRichSequenceBuilder(SymbolListFactory factory, int threshold) {
104        this.reset();
105        this.factory = factory;
106        this.threshold = threshold;
107    }
108    
109    /**
110     * Sets the sequence info back to default values, ie. in order to start
111     * constructing a new sequence from scratch.
112     */
113    private void reset() {
114        try{
115            this.version = 0;
116            this.versionSeen = false;
117            this.seqVersion = 0.0;
118            this.seqVersionSeen = false;
119            this.accession = null;
120            this.description = null;
121            this.division = null;
122            this.identifier = null;
123            this.name = null;
124            this.crossRefs.clear();
125            this.symbols = null;
126            this.namespace = null;
127            this.taxon = null;
128            this.seqPropCount = 1;   //annotation rank
129            this.referenceCount = 1; //doc reference rank
130            this.commentRank = 1;    //comment rank
131            this.featureRank = 1;    //feature rank
132            this.featPropCount = 1;  //feature annotation rank
133            this.comments.clear();
134            this.relations.clear();
135            this.references.clear();
136            this.rootFeatures.clear();
137            this.featureStack.clear();
138            this.allFeatures.clear();
139            this.notes.clear();
140        }catch(ChangeVetoException ex){
141            throw new BioError("A ChangeListener should not have been applied", ex);
142        }
143    }
144    
145    /**
146     * {@inheritDoc}
147     */
148    public void setVersion(int version) throws ParseException {
149        if (this.versionSeen) throw new ParseException("Current BioEntry already has a version");
150        else {
151            try {
152                this.version = version;
153                this.versionSeen = true;
154            } catch (NumberFormatException e) {
155                throw new ParseException("Could not parse version as an integer");
156            }
157        }
158    }
159    private int version;
160    private boolean versionSeen;
161    
162    /**
163     * {@inheritDoc}
164     * NOT IMPLEMENTED
165     */
166    public void setURI(String uri) throws ParseException {
167        throw new ParseException("We don't understand URIs");
168    }
169    
170    /**
171     * {@inheritDoc}
172     */
173    public void setSeqVersion(String seqVersion) throws ParseException {
174        if (this.seqVersionSeen) throw new ParseException("Current BioEntry already has a sequence version");
175        if (seqVersion==null) this.seqVersion=0.0;
176        else {
177            try {
178                this.seqVersion = Double.parseDouble(seqVersion);
179                this.seqVersionSeen = true;
180            } catch (NumberFormatException e) {
181                throw new ParseException("Could not parse sequence version as a double");
182            }
183        }
184    }
185    private double seqVersion = 0.0;
186    private boolean seqVersionSeen;
187    
188    /**
189     * {@inheritDoc}
190     * The last accession passed to this routine will always be the one used.
191     */
192    public void setAccession(String accession) throws ParseException {
193        if (accession==null) throw new ParseException("Accession cannot be null");
194        this.accession = accession;
195    }
196    private String accession;
197    
198    /**
199     * {@inheritDoc}
200     */
201    public void setDescription(String description) throws ParseException {
202        if (this.description!=null) throw new ParseException("Current BioEntry already has a description");
203        this.description = description;
204    }
205    private String description;
206    
207    /**
208     * {@inheritDoc}
209     */
210    public void setDivision(String division) throws ParseException {
211        if (division==null) throw new ParseException("Division cannot be null");
212        if (this.division!=null) throw new ParseException("Current BioEntry already has a division");
213        this.division = division;
214    }
215    private String division;
216    
217    /**
218     * {@inheritDoc}
219     */
220    public void setIdentifier(String identifier) throws ParseException {
221        if (identifier==null) throw new ParseException("Identifier cannot be null");
222        if (this.identifier!=null) throw new ParseException("Current BioEntry already has a identifier");
223        this.identifier = identifier;
224    }
225    private String identifier;
226    
227    /**
228     * {@inheritDoc}
229     */
230    public void setName(String name) throws ParseException {
231        if (name==null) throw new ParseException("Name cannot be null");
232        if (this.name!=null) throw new ParseException("Current BioEntry already has a name");
233        this.name = name;
234    }
235    private String name;
236    
237    /**
238     * {@inheritDoc}
239     */
240    public void setRankedCrossRef(RankedCrossRef ref) throws ParseException {
241        if (ref==null) throw new ParseException("Reference cannot be null");
242        ref.setRank(crossRefsRank++);
243        this.crossRefs.add(ref);
244    }
245    private Set<RankedCrossRef> crossRefs = new TreeSet<RankedCrossRef>();
246    private int crossRefsRank = 1;
247    
248    /**
249     * {@inheritDoc}
250     */
251    public void addSymbols(Alphabet alpha, Symbol[] syms, int start, int length) throws IllegalAlphabetException {
252        if (this.symbols==null) {
253            if (threshold<=0) {
254                this.symbols = new ChunkedSymbolListFactory(this.factory);
255            } else {
256                this.symbols = new ChunkedSymbolListFactory(this.factory,threshold);
257            }
258        }
259        this.symbols.addSymbols(alpha, syms, start, length);
260    }
261    private SymbolListFactory factory;
262    private int threshold;
263    private ChunkedSymbolListFactory symbols;
264    
265    /**
266     * {@inheritDoc}
267     */
268    public void setComment(String comment) throws ParseException {
269        if (comment==null) throw new ParseException("Comment cannot be null");
270        this.comments.add(new SimpleComment(comment,commentRank++));
271    }
272    private Set<Comment> comments = new TreeSet<Comment>();
273    private int commentRank = 1;
274    
275    /**
276     * {@inheritDoc}
277     */
278    public void setNamespace(Namespace namespace) throws ParseException {
279        if (namespace==null) throw new ParseException("Namespace cannot be null");
280        if (this.namespace!=null) throw new ParseException("Current BioEntry already has a namespace");
281        this.namespace = namespace;
282    }
283    private Namespace namespace;
284    
285    /**
286     * {@inheritDoc}
287     */
288    public void startFeature(Feature.Template templ) throws ParseException {
289        try {
290            RichFeature f = new SimpleRichFeature(featureHolder,templ);
291            f.setRank(this.featureRank++);
292            this.allFeatures.add(f);
293            if (this.featureStack.size() == 0) this.rootFeatures.add(f);
294            else {
295                RichFeature parent = (RichFeature)this.featureStack.get(this.featureStack.size() - 1);
296                parent.addFeatureRelationship(
297                        new SimpleRichFeatureRelationship(parent, f, SimpleRichFeatureRelationship.getContainsTerm(), 0)
298                        );
299            }
300            this.featPropCount = 1; //reset feature anotation rank
301            this.featureStack.add(f);
302        } catch (ChangeVetoException e) {
303            throw new ParseException(e);
304        } catch (InvalidTermException e) {
305            throw new ParseException(e);
306        }
307    }
308    private FeatureHolder featureHolder = new SimpleFeatureHolder();
309    private Set rootFeatures = new TreeSet();
310    private List allFeatures = new ArrayList();
311    private List featureStack = new ArrayList();
312    private int featureRank = 1;
313    
314    /**
315     * {@inheritDoc}
316     */ 
317    public RichFeature getCurrentFeature() throws ParseException {
318        if (this.featureStack.size()==0) throw new ParseException("Not currently within a feature");
319        else return (RichFeature)this.featureStack.get(this.featureStack.size()-1);
320    }
321    
322    /**
323     * {@inheritDoc}
324     */
325    public void setTaxon(NCBITaxon taxon) throws ParseException {
326        if (taxon==null) throw new ParseException("Taxon cannot be null");
327        if (this.taxon!=null){
328            if(! this.taxon.equals(taxon)){
329                System.err.println(
330                        "Warning: attempted to set taxon twice with different values. Keeping first value. "+
331                        "old value (retained): "+this.taxon+" new value: "+taxon+", accession: <"+accession+">, version:"+version);
332            }
333        }
334        this.taxon = taxon;
335    }
336    private NCBITaxon taxon;
337    
338    /**
339     * {@inheritDoc}
340     */
341    public void setRelationship(BioEntryRelationship relationship) throws ParseException {
342        if (relationship==null) throw new ParseException("Relationship cannot be null");
343        this.relations.add(relationship);
344    }
345    private Set<BioEntryRelationship> relations = new TreeSet<BioEntryRelationship>();
346    
347    /**
348     * {@inheritDoc}
349     */
350    public void setRankedDocRef(RankedDocRef ref) throws ParseException {
351        if (ref==null) throw new ParseException("Reference cannot be null");
352        ref.setRank(referenceCount++);
353        this.references.add(ref);
354    }
355    private Set<RankedDocRef> references = new TreeSet<RankedDocRef>();
356    private int referenceCount = 1;
357    
358    /**
359     * {@inheritDoc}
360     */
361    public void startSequence() throws ParseException {
362        this.reset();
363    }
364    
365    /**
366     * {@inheritDoc}
367     */
368    public void addFeatureProperty(Object key, Object value) throws ParseException {
369        if (this.featureStack.size() == 0) throw new ParseException("Assertion failed: Not within a feature");
370        if (!(key instanceof ComparableTerm)) key = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key.toString());
371        if ((value != null) && !(value instanceof String)) value = value.toString();
372        RichFeature f = this.getCurrentFeature();
373        try {
374            Note n = new SimpleNote((ComparableTerm)key,(String)value,this.featPropCount++);
375            f.getRichAnnotation().addNote(n);
376        } catch (ChangeVetoException e) {
377            throw new ParseException(e);
378        }
379    }
380    int featPropCount = 1;
381    
382    /**
383     * {@inheritDoc}
384     */
385    public void addSequenceProperty(Object key, Object value) throws ParseException {
386        if (!(key instanceof ComparableTerm)) key = RichObjectFactory.getDefaultOntology().getOrCreateTerm(key.toString());
387        if (value!=null && !(value instanceof String)) value = value.toString();
388        try {
389            Note n;
390            if (value==null) n = new SimpleNote((ComparableTerm)key,null,this.seqPropCount++);
391            else n = new SimpleNote((ComparableTerm)key,(String)value,this.seqPropCount++);
392            this.notes.addNote(n);
393        } catch (ChangeVetoException e) {
394            throw new ParseException(e);
395        }
396    }
397    int seqPropCount = 1;
398    
399    /**
400     * {@inheritDoc}
401     */
402    public void endFeature() throws ParseException {
403        if (this.featureStack.size() == 0) throw new ParseException("Assertion failed: Not within a feature");
404        this.featureStack.remove(this.featureStack.size() - 1);
405    }
406    
407    /**
408     * {@inheritDoc}
409     */
410    public void endSequence() throws ParseException {
411        if (this.name==null) throw new ParseException("Name has not been supplied");
412        if (this.namespace==null) throw new ParseException("Namespace has not been supplied");
413        if (this.accession==null) throw new ParseException("No accessions have been supplied");
414    }
415        
416    /**
417     * {@inheritDoc}
418     */
419    public void setCircular(boolean circular) throws ParseException { this.circular = circular; }
420    private boolean circular = false;
421    
422    /**
423     * {@inheritDoc}
424     */
425    public Sequence makeSequence() throws BioException {
426        this.endSequence(); // Check our input.
427        // make our basic object
428        SymbolList syms = this.symbols==null?SymbolList.EMPTY_LIST:this.symbols.makeSymbolList();
429        RichSequence rs = new SimpleRichSequence(this.namespace,this.name,this.accession,this.version,syms,new Double(this.seqVersion));
430        // set misc stuff
431        try {
432            // set features
433            for (Iterator i = this.allFeatures.iterator(); i.hasNext(); ){
434                RichFeature f = (RichFeature)i.next();
435                f.setParent(rs);
436                if (f.getName()==null || f.getName().length()==0) f.setName(rs.getAccession()+"#"+f.getRank()); // dummy feature name for use in GBrowse
437            }
438            rs.setDescription(this.description);
439            rs.setDivision(this.division);
440            rs.setIdentifier(this.identifier);
441            rs.setTaxon(this.taxon);
442            rs.setCircular(this.circular);
443            if(this.circular && this.symbols!=null) {
444                int circularlength = syms.length();
445                for(Object obj:rootFeatures) {
446                    Feature rf = (Feature)obj;
447                    RichLocation rlc = RichLocation.Tools.enrich(rf.getLocation());
448                    rlc.setCircularLength(circularlength);
449                }
450            }
451            rs.setFeatureSet(this.rootFeatures);
452            for (Iterator<RankedCrossRef> i = this.crossRefs.iterator(); i.hasNext(); ) rs.addRankedCrossRef(i.next());
453            for (Iterator<BioEntryRelationship> i = this.relations.iterator(); i.hasNext(); ) rs.addRelationship(i.next());
454            if(this.circular && this.symbols!=null) {
455                int circularlength = syms.length();
456                for(RankedDocRef rdf:references) {
457                    RichLocation rlc = RichLocation.Tools.enrich(rdf.getLocation());
458                    if(!(rlc instanceof EmptyRichLocation)) // Can be empty
459                        rlc.setCircularLength(circularlength);
460                }
461            }
462            for (Iterator<RankedDocRef> i = this.references.iterator(); i.hasNext(); ) rs.addRankedDocRef(i.next());
463            for (Iterator<Comment> i = this.comments.iterator(); i.hasNext(); ) rs.addComment(i.next());
464            // set annotations
465            rs.setNoteSet(this.notes.getNoteSet());
466        } catch (Exception e) {
467            throw new ParseException(e); // Convert them all to parse exceptions.
468        }
469        // return the object
470        return rs;
471    }
472    
473    /**
474     * {@inheritDoc}
475     */
476    public RichSequence makeRichSequence() throws BioException { return (RichSequence)this.makeSequence(); }
477}