001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * @author Karl Nicholas <github:karlnicholas> 015 * 016 * For more information on the BioJava project and its aims, 017 * or to join the biojava-l mailing list, visit the home page 018 * at: 019 * 020 * http://www.biojava.org/ 021 * 022 * Created on 01-21-2010 023 */ 024package org.biojava.nbio.core.sequence.io; 025 026import org.biojava.nbio.core.exceptions.ParserException; 027import org.biojava.nbio.core.sequence.AccessionID; 028import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface; 029import org.biojava.nbio.core.sequence.reference.AbstractReference; 030import org.biojava.nbio.core.sequence.template.AbstractSequence; 031import org.biojava.nbio.core.sequence.template.Compound; 032 033import java.util.ArrayList; 034import java.util.List; 035 036import org.biojava.nbio.core.sequence.DataSource; 037 038public class GenericGenbankHeaderParser<S extends AbstractSequence<C>, C extends Compound> implements SequenceHeaderParserInterface<S,C> { 039 040 @SuppressWarnings("unused") 041 042 /** 043 * Brief description of sequence; includes information such as source 044 * organism,gene name/protein name, or some description of the sequence's 045 * function (if the sequence is non-coding). If the sequence has a coding region 046 * (CDS), description may be followed by a completeness qualifier, such as 047 * "complete CDS". 048 */ 049 private String description; 050 051 /** 052 * The unique identifier for a sequence record 053 */ 054 private String accession = null; 055 056 private String identifier = null; 057 058 private String name = null; 059 060 /** 061 * A nucleotide sequence identification number that represents a single, 062 * specific sequence in the GenBank database. This identification number uses 063 * the accession.version format implemented by GenBank/EMBL/DDBJ in February 064 * 1999. 065 */ 066 private int version; 067 068 private boolean versionSeen; 069 070 private ArrayList<String> comments = new ArrayList<>(); 071 072 /** 073 * Publications by the authors of the sequence that discuss the data reported in 074 * the record. References are automatically sorted within the record based on 075 * date of publication, showing the oldest references first. 076 */ 077 private List<AbstractReference> references = new ArrayList<>(); 078 079 /** 080 * Word or phrase describing the sequence. If no keywords are included in the 081 * entry, the field contains only a period. 082 */ 083 private List<String> keywords = new ArrayList<>(); 084 085 /** 086 * Free-format information including an abbreviated form of the organism name, 087 * sometimes followed by a molecule type. (See section 3.4.10 of the GenBank 088 * release notes for more info.) 089 */ 090 private String source = null; 091 092 /** 093 * The formal scientific name for the source organism (genus and species, where 094 * appropriate) and its lineage, based on the phylogenetic classification scheme 095 * used in the NCBI Taxonomy Database. If the complete lineage of an organism is 096 * very long, an abbreviated lineage will be shown in the GenBank record and the 097 * complete lineage will be available in the Taxonomy Database. (See also the 098 * /db_xref=taxon:nnnn Feature qualifer, below.) 099 */ 100 private List<String> organism = new ArrayList<>(); 101 102 /** 103 * GI sequence identifier 104 */ 105 private String gi = null; 106 107 /** 108 * Parse the header and set the values in the sequence 109 * @param header 110 * @param sequence 111 */ 112 @Override 113 public void parseHeader(String header, S sequence) { 114 sequence.setOriginalHeader(header); 115 sequence.setAccession(new AccessionID(accession, DataSource.GENBANK, version, identifier)); 116 sequence.setDescription(description); 117 sequence.setComments(comments); 118 sequence.setReferences(references); 119 } 120 121 public String getAccession() { 122 return accession; 123 } 124 125 public String getIdentifier() { 126 return identifier; 127 } 128 129 public String getName() { 130 return name; 131 } 132 133 public int getVersion() { 134 return version; 135 } 136 137 public ArrayList<String> getComments() { 138 return comments; 139 } 140 141 public List<AbstractReference> getReferences() { 142 return references; 143 } 144 145 public String getDescription() { 146 return description; 147 } 148 149 /** 150 * Sets the sequence info back to default values, ie. in order to start 151 * constructing a new sequence from scratch. 152 */ 153 @SuppressWarnings("unused") 154 private void reset() { 155 this.version = 0; 156 this.versionSeen = false; 157 this.accession = null; 158 this.description = null; 159 this.identifier = null; 160 this.name = null; 161 this.comments.clear(); 162 } 163 164 /** 165 * {@inheritDoc} 166 * The last accession passed to this routine will always be the one used. 167 */ 168 public void setVersion(int version) throws ParserException { 169 if (this.versionSeen) throw new ParserException("Current BioEntry already has a version"); 170 else { 171 try { 172 this.version = version; 173 this.versionSeen = true; 174 } catch (NumberFormatException e) { 175 throw new ParserException("Could not parse version as an integer"); 176 } 177 } 178 } 179 180 181 /** 182 * {@inheritDoc} 183 * The last accession passed to this routine will always be the one used. 184 */ 185 public void setAccession(String accession) throws ParserException { 186 if (accession==null) throw new ParserException("Accession cannot be null"); 187 this.accession = accession; 188 } 189 190 /** 191 * {@inheritDoc} 192 */ 193 public void setDescription(String description) throws ParserException { 194 if (this.description!=null) throw new ParserException("Current BioEntry already has a description"); 195 this.description = description; 196 } 197 198 /** 199 * {@inheritDoc} 200 */ 201 public void setIdentifier(String identifier) throws ParserException { 202 if (identifier==null) throw new ParserException("Identifier cannot be null"); 203 if (this.identifier!=null) throw new ParserException("Current BioEntry already has a identifier"); 204 this.identifier = identifier; 205 } 206 207 /** 208 * {@inheritDoc} 209 */ 210 public void setName(String name) throws ParserException { 211 if (name==null) throw new ParserException("Name cannot be null"); 212 if (this.name!=null) throw new ParserException("Current BioEntry already has a name"); 213 this.name = name; 214 } 215 216 /** 217 * {@inheritDoc} 218 */ 219 public void setComment(String comment) throws ParserException { 220 if (comment==null) throw new ParserException("Comment cannot be null"); 221 this.comments.add(comment); 222 } 223 224 public void addReference(AbstractReference abstractReference){ 225 this.references.add(abstractReference); 226 } 227}