001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * @author Karl Nicholas <github:karlnicholas>
015 *
016 * For more information on the BioJava project and its aims,
017 * or to join the biojava-l mailing list, visit the home page
018 * at:
019 *
020 *      http://www.biojava.org/
021 *
022 * Created on 01-21-2010
023 */
024package org.biojava.nbio.core.sequence.io;
025
026import org.biojava.nbio.core.exceptions.ParserException;
027import org.biojava.nbio.core.sequence.AccessionID;
028import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
029import org.biojava.nbio.core.sequence.reference.AbstractReference;
030import org.biojava.nbio.core.sequence.template.AbstractSequence;
031import org.biojava.nbio.core.sequence.template.Compound;
032
033import java.util.ArrayList;
034import java.util.List;
035
036import org.biojava.nbio.core.sequence.DataSource;
037
038public class GenericGenbankHeaderParser<S extends AbstractSequence<C>, C extends Compound> implements SequenceHeaderParserInterface<S,C> {
039
040        @SuppressWarnings("unused")
041
042        /**
043         * Brief description of sequence; includes information such as source
044         * organism,gene name/protein name, or some description of the sequence's
045         * function (if the sequence is non-coding). If the sequence has a coding region
046         * (CDS), description may be followed by a completeness qualifier, such as
047         * "complete CDS".
048         */
049        private String description;
050        
051        /** 
052         * The unique identifier for a sequence record
053         */
054        private String accession = null;
055
056        private String identifier = null;
057
058        private String name = null;
059
060        /**
061         * A nucleotide sequence identification number that represents a single,
062         * specific sequence in the GenBank database. This identification number uses
063         * the accession.version format implemented by GenBank/EMBL/DDBJ in February
064         * 1999.
065         */
066        private int version;
067
068        private boolean versionSeen;
069
070        private ArrayList<String> comments = new ArrayList<>();
071
072        /**
073         * Publications by the authors of the sequence that discuss the data reported in
074         * the record. References are automatically sorted within the record based on
075         * date of publication, showing the oldest references first.
076         */
077        private List<AbstractReference> references = new ArrayList<>();
078
079        /**
080         * Word or phrase describing the sequence. If no keywords are included in the
081         * entry, the field contains only a period.
082         */
083        private List<String> keywords = new ArrayList<>();
084
085        /**
086         * Free-format information including an abbreviated form of the organism name,
087         * sometimes followed by a molecule type. (See section 3.4.10 of the GenBank
088         * release notes for more info.)
089         */     
090        private String source = null;
091
092        /**
093         * The formal scientific name for the source organism (genus and species, where
094         * appropriate) and its lineage, based on the phylogenetic classification scheme
095         * used in the NCBI Taxonomy Database. If the complete lineage of an organism is
096         * very long, an abbreviated lineage will be shown in the GenBank record and the
097         * complete lineage will be available in the Taxonomy Database. (See also the
098         * /db_xref=taxon:nnnn Feature qualifer, below.)
099         */
100        private List<String> organism = new ArrayList<>();
101        
102        /**
103         * GI sequence identifier
104         */
105        private String gi = null;
106
107        /**
108         * Parse the header and set the values in the sequence
109         * @param header
110         * @param sequence
111         */
112        @Override
113        public void parseHeader(String header, S sequence) {
114                sequence.setOriginalHeader(header);
115                sequence.setAccession(new AccessionID(accession, DataSource.GENBANK, version, identifier));
116                sequence.setDescription(description);
117                sequence.setComments(comments);
118                sequence.setReferences(references);
119        }
120
121        public String getAccession() {
122                return accession;
123        }
124
125        public String getIdentifier() {
126                return identifier;
127        }
128
129        public String getName() {
130                return name;
131        }
132
133        public int getVersion() {
134                return version;
135        }
136
137        public ArrayList<String> getComments() {
138                return comments;
139        }
140
141        public List<AbstractReference> getReferences() {
142                return references;
143        }
144
145        public String getDescription() {
146                return description;
147        }
148
149        /**
150         * Sets the sequence info back to default values, ie. in order to start
151         * constructing a new sequence from scratch.
152         */
153        @SuppressWarnings("unused")
154        private void reset() {
155                this.version = 0;
156                this.versionSeen = false;
157                this.accession = null;
158                this.description = null;
159                this.identifier = null;
160                this.name = null;
161                this.comments.clear();
162        }
163
164        /**
165         * {@inheritDoc}
166         * The last accession passed to this routine will always be the one used.
167         */
168        public void setVersion(int version) throws ParserException {
169                if (this.versionSeen) throw new ParserException("Current BioEntry already has a version");
170                else {
171                        try {
172                                this.version = version;
173                                this.versionSeen = true;
174                        } catch (NumberFormatException e) {
175                                throw new ParserException("Could not parse version as an integer");
176                        }
177                }
178        }
179
180
181        /**
182         * {@inheritDoc}
183         * The last accession passed to this routine will always be the one used.
184         */
185        public void setAccession(String accession) throws ParserException {
186                if (accession==null) throw new ParserException("Accession cannot be null");
187                this.accession = accession;
188        }
189
190        /**
191         * {@inheritDoc}
192         */
193        public void setDescription(String description) throws ParserException {
194                if (this.description!=null) throw new ParserException("Current BioEntry already has a description");
195                this.description = description;
196        }
197
198        /**
199         * {@inheritDoc}
200         */
201        public void setIdentifier(String identifier) throws ParserException {
202                if (identifier==null) throw new ParserException("Identifier cannot be null");
203                if (this.identifier!=null) throw new ParserException("Current BioEntry already has a identifier");
204                this.identifier = identifier;
205        }
206
207        /**
208         * {@inheritDoc}
209         */
210        public void setName(String name) throws ParserException {
211                if (name==null) throw new ParserException("Name cannot be null");
212                if (this.name!=null) throw new ParserException("Current BioEntry already has a name");
213                this.name = name;
214        }
215
216        /**
217         * {@inheritDoc}
218         */
219        public void setComment(String comment) throws ParserException {
220                if (comment==null) throw new ParserException("Comment cannot be null");
221                this.comments.add(comment);
222        }
223
224        public void addReference(AbstractReference abstractReference){
225                this.references.add(abstractReference);
226        }
227}