Source code

001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.io.embl;
022
023import java.util.LinkedList;
024import java.util.List;
025
026
027/**
028 * this class contains the parsed data of embl file
029 *
030 * @author Noor Aldeen Al Mbaidin
031 * @since 5.0.0
032 */
033
034public class EmblRecord {
035
036    private EmblId emblId;
037    private List<EmblReference> emblReference;
038    private List<String> accessionNumber = new LinkedList<>();
039    private String projectIdentifier;
040    private String orGanelle;
041    private String createdDate;
042    private String featureHeader;
043    private String featureTable;
044    private String lastUpdatedDate;
045    private String sequenceDescription;
046    private List<String> keyword = new LinkedList<>();
047    private String organismSpecies;
048    private String organismClassification;
049    private String databaseCrossReference;
050    private String assemblyHeader;
051    private String assemblyInformation;
052    private String constructedSequence;
053    private String sequenceHeader;
054    private String sequence;
055
056    /**
057     * The ID (IDentification) line
058     * The tokens represent:
059     * 1. Primary accession number
060     * 2. Sequence version number
061     * 3. Topology: 'circular' or 'linear'
062     * 4. Molecule type
063     * 5. Data class
064     * 6. Taxonomic division
065     * 7. Sequence length
066     *
067     * @return EmblId
068     */
069    public EmblId getEmblId() {
070        return emblId;
071    }
072
073    public void setEmblId(EmblId emblId) {
074        this.emblId = emblId;
075    }
076
077    /**
078     * The Reference (RN, RC, RP, RX, RG, RA, RT, RL) Lines
079     * These lines comprise the literature citations within the database.
080     * The citations provide access to the papers from which the data has been
081     * abstracted.
082     *
083     * @return EmblReference
084     */
085    public List<EmblReference> getEmblReference() {
086        return emblReference;
087    }
088
089    public void setEmblReference(List<EmblReference> emblReference) {
090        this.emblReference = emblReference;
091    }
092
093    /**
094     * The AC (Accession number) line lists the accession numbers associated with
095     * the entry.
096     *
097     * @return List<String>
098     */
099    public List<String> getAccessionNumber() {
100        return accessionNumber;
101    }
102
103    public void setAccessionNumber(List<String> accessionNumber) {
104        this.accessionNumber = accessionNumber;
105    }
106
107    /**
108     * @return String
109     */
110    public String getProjectIdentifier() {
111        return projectIdentifier;
112    }
113
114    public void setProjectIdentifier(String projectIdentifier) {
115        this.projectIdentifier = projectIdentifier;
116    }
117
118    /**
119     * The OG (OrGanelle) linetype indicates the sub-cellular location of non-nuclear
120     * sequences.
121     *
122     * @return String
123     */
124    public String getOrGanelle() {
125        return orGanelle;
126    }
127
128    public void setOrGanelle(String orGanelle) {
129        this.orGanelle = orGanelle;
130    }
131
132    /**
133     * The DT  line shows when an entry first appeared in the database
134     *
135     * @return String
136     */
137    public String getCreatedDate() {
138        return createdDate;
139    }
140
141    public void setCreatedDate(String createdDate) {
142        this.createdDate = createdDate;
143    }
144
145    /**
146     * The FH (Feature Header) lines are present only to improve readability of
147     * an entry when it is printed or displayed on a terminal screen.
148     *
149     * @return String
150     */
151    public String getFeatureHeader() {
152        return featureHeader;
153    }
154
155    public void setFeatureHeader(String featureHeader) {
156        this.featureHeader = featureHeader;
157    }
158
159    /**
160     * The FT (Feature Table) lines provide a mechanism for the annotation of the
161     * sequence data. Regions or sites in the sequence which are of interest are
162     * listed in the table.
163     *
164     * @return String
165     */
166    public String getFeatureTable() {
167        return featureTable;
168    }
169
170    public void setFeatureTable(String featureTable) {
171        this.featureTable = featureTable;
172    }
173
174    /**
175     * The DT (DaTe) line shows when an entry was last updated in the database.
176     *
177     * @return String
178     */
179    public String getLastUpdatedDate() {
180        return lastUpdatedDate;
181    }
182
183    public void setLastUpdatedDate(String lastUpdatedDate) {
184        this.lastUpdatedDate = lastUpdatedDate;
185    }
186
187    /**
188     * The DE (Description) lines contain general descriptive information about the
189     * sequence stored. This may include the designations of genes for which the
190     * sequence codes, the region of the genome from which it is derived, or other
191     * information which helps to identify the sequence.
192     *
193     * @return String
194     */
195    public String getSequenceDescription() {
196        return sequenceDescription;
197    }
198
199    public void setSequenceDescription(String sequenceDescription) {
200        this.sequenceDescription = sequenceDescription;
201    }
202
203    /**
204     * The KW (KeyWord) lines provide information which can be used to generate
205     * cross-reference indexes of the sequence entries based on functional,
206     * structural, or other categories deemed important.
207     *
208     * @return List<String>
209     */
210    public List<String> getKeyword() {
211        return keyword;
212    }
213
214    public void setKeyword(List<String> keyword) {
215        this.keyword = keyword;
216    }
217
218    /**
219     * The OS (Organism Species) line specifies the preferred scientific name of
220     * the organism which was the source of the stored sequence. In most
221     * cases this is done by giving the Latin genus and species designations,
222     * followed (in parentheses) by the preferred common name in English where known.
223     *
224     * @return String
225     */
226    public String getOrganismSpecies() {
227        return organismSpecies;
228    }
229
230    public void setOrganismSpecies(String organismSpecies) {
231        this.organismSpecies = organismSpecies;
232    }
233
234    /**
235     * The OC (Organism Classification) lines contain the taxonomic classification
236     * Of the source organism
237     *
238     * @return String
239     */
240    public String getOrganismClassification() {
241        return organismClassification;
242    }
243
244    public void setOrganismClassification(String organismClassification) {
245        this.organismClassification = organismClassification;
246    }
247
248    /**
249     * The DR (Database Cross-reference) line cross-references other databases which
250     * contain information related to the entry in which the DR line appears.
251     *
252     * @return String
253     */
254    public String getDatabaseCrossReference() {
255        return databaseCrossReference;
256    }
257
258    public void setDatabaseCrossReference(String databaseCrossReference) {
259        this.databaseCrossReference = databaseCrossReference;
260    }
261
262    /**
263     * The AH (Assembly Header) line provides column headings for the assembly information.
264     *
265     * @return String
266     */
267    public String getAssemblyHeader() {
268        return assemblyHeader;
269    }
270
271    public void setAssemblyHeader(String assemblyHeader) {
272        this.assemblyHeader = assemblyHeader;
273    }
274
275    /**
276     * The AS (Assembly Information) lines provide information on the composition of
277     * a TPA or TSA sequence.
278     *
279     * @return String
280     */
281    public String getAssemblyInformation() {
282        return assemblyInformation;
283    }
284
285    public void setAssemblyInformation(String assemblyInformation) {
286        this.assemblyInformation = assemblyInformation;
287    }
288
289    /**
290     * Con(structed) sequences in the CON data classes represent complete
291     * chromosomes, genomes and other long sequences constructed from segment entries.
292     *
293     * @return String
294     */
295    public String getConstructedSequence() {
296        return constructedSequence;
297    }
298
299    public void setConstructedSequence(String constructedSequence) {
300        this.constructedSequence = constructedSequence;
301    }
302
303    /**
304     * The SQ (SeQuence header) line marks the beginning of the sequence data and
305     * Gives a summary of its content.
306     *
307     * @return String
308     */
309    public String getSequenceHeader() {
310        return sequenceHeader;
311    }
312
313    public void setSequenceHeader(String sequenceHeader) {
314        this.sequenceHeader = sequenceHeader;
315    }
316
317    /**
318     * The Sequence Data Line
319     *
320     * @return String
321     */
322    public String getSequence() {
323        return sequence;
324    }
325
326    public void setSequence(String sequence) {
327        this.sequence = sequence;
328    }
329
330}