001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.sequence.io.embl; 022 023import java.util.LinkedList; 024import java.util.List; 025 026 027/** 028 * this class contains the parsed data of embl file 029 * 030 * @author Noor Aldeen Al Mbaidin 031 * @since 5.0.0 032 */ 033 034public class EmblRecord { 035 036 private EmblId emblId; 037 private List<EmblReference> emblReference; 038 private List<String> accessionNumber = new LinkedList<>(); 039 private String projectIdentifier; 040 private String orGanelle; 041 private String createdDate; 042 private String featureHeader; 043 private String featureTable; 044 private String lastUpdatedDate; 045 private String sequenceDescription; 046 private List<String> keyword = new LinkedList<>(); 047 private String organismSpecies; 048 private String organismClassification; 049 private String databaseCrossReference; 050 private String assemblyHeader; 051 private String assemblyInformation; 052 private String constructedSequence; 053 private String sequenceHeader; 054 private String sequence; 055 056 /** 057 * The ID (IDentification) line 058 * The tokens represent: 059 * 1. Primary accession number 060 * 2. Sequence version number 061 * 3. Topology: 'circular' or 'linear' 062 * 4. Molecule type 063 * 5. Data class 064 * 6. Taxonomic division 065 * 7. Sequence length 066 * 067 * @return EmblId 068 */ 069 public EmblId getEmblId() { 070 return emblId; 071 } 072 073 public void setEmblId(EmblId emblId) { 074 this.emblId = emblId; 075 } 076 077 /** 078 * The Reference (RN, RC, RP, RX, RG, RA, RT, RL) Lines 079 * These lines comprise the literature citations within the database. 080 * The citations provide access to the papers from which the data has been 081 * abstracted. 082 * 083 * @return EmblReference 084 */ 085 public List<EmblReference> getEmblReference() { 086 return emblReference; 087 } 088 089 public void setEmblReference(List<EmblReference> emblReference) { 090 this.emblReference = emblReference; 091 } 092 093 /** 094 * The AC (Accession number) line lists the accession numbers associated with 095 * the entry. 096 * 097 * @return List<String> 098 */ 099 public List<String> getAccessionNumber() { 100 return accessionNumber; 101 } 102 103 public void setAccessionNumber(List<String> accessionNumber) { 104 this.accessionNumber = accessionNumber; 105 } 106 107 /** 108 * @return String 109 */ 110 public String getProjectIdentifier() { 111 return projectIdentifier; 112 } 113 114 public void setProjectIdentifier(String projectIdentifier) { 115 this.projectIdentifier = projectIdentifier; 116 } 117 118 /** 119 * The OG (OrGanelle) linetype indicates the sub-cellular location of non-nuclear 120 * sequences. 121 * 122 * @return String 123 */ 124 public String getOrGanelle() { 125 return orGanelle; 126 } 127 128 public void setOrGanelle(String orGanelle) { 129 this.orGanelle = orGanelle; 130 } 131 132 /** 133 * The DT line shows when an entry first appeared in the database 134 * 135 * @return String 136 */ 137 public String getCreatedDate() { 138 return createdDate; 139 } 140 141 public void setCreatedDate(String createdDate) { 142 this.createdDate = createdDate; 143 } 144 145 /** 146 * The FH (Feature Header) lines are present only to improve readability of 147 * an entry when it is printed or displayed on a terminal screen. 148 * 149 * @return String 150 */ 151 public String getFeatureHeader() { 152 return featureHeader; 153 } 154 155 public void setFeatureHeader(String featureHeader) { 156 this.featureHeader = featureHeader; 157 } 158 159 /** 160 * The FT (Feature Table) lines provide a mechanism for the annotation of the 161 * sequence data. Regions or sites in the sequence which are of interest are 162 * listed in the table. 163 * 164 * @return String 165 */ 166 public String getFeatureTable() { 167 return featureTable; 168 } 169 170 public void setFeatureTable(String featureTable) { 171 this.featureTable = featureTable; 172 } 173 174 /** 175 * The DT (DaTe) line shows when an entry was last updated in the database. 176 * 177 * @return String 178 */ 179 public String getLastUpdatedDate() { 180 return lastUpdatedDate; 181 } 182 183 public void setLastUpdatedDate(String lastUpdatedDate) { 184 this.lastUpdatedDate = lastUpdatedDate; 185 } 186 187 /** 188 * The DE (Description) lines contain general descriptive information about the 189 * sequence stored. This may include the designations of genes for which the 190 * sequence codes, the region of the genome from which it is derived, or other 191 * information which helps to identify the sequence. 192 * 193 * @return String 194 */ 195 public String getSequenceDescription() { 196 return sequenceDescription; 197 } 198 199 public void setSequenceDescription(String sequenceDescription) { 200 this.sequenceDescription = sequenceDescription; 201 } 202 203 /** 204 * The KW (KeyWord) lines provide information which can be used to generate 205 * cross-reference indexes of the sequence entries based on functional, 206 * structural, or other categories deemed important. 207 * 208 * @return List<String> 209 */ 210 public List<String> getKeyword() { 211 return keyword; 212 } 213 214 public void setKeyword(List<String> keyword) { 215 this.keyword = keyword; 216 } 217 218 /** 219 * The OS (Organism Species) line specifies the preferred scientific name of 220 * the organism which was the source of the stored sequence. In most 221 * cases this is done by giving the Latin genus and species designations, 222 * followed (in parentheses) by the preferred common name in English where known. 223 * 224 * @return String 225 */ 226 public String getOrganismSpecies() { 227 return organismSpecies; 228 } 229 230 public void setOrganismSpecies(String organismSpecies) { 231 this.organismSpecies = organismSpecies; 232 } 233 234 /** 235 * The OC (Organism Classification) lines contain the taxonomic classification 236 * Of the source organism 237 * 238 * @return String 239 */ 240 public String getOrganismClassification() { 241 return organismClassification; 242 } 243 244 public void setOrganismClassification(String organismClassification) { 245 this.organismClassification = organismClassification; 246 } 247 248 /** 249 * The DR (Database Cross-reference) line cross-references other databases which 250 * contain information related to the entry in which the DR line appears. 251 * 252 * @return String 253 */ 254 public String getDatabaseCrossReference() { 255 return databaseCrossReference; 256 } 257 258 public void setDatabaseCrossReference(String databaseCrossReference) { 259 this.databaseCrossReference = databaseCrossReference; 260 } 261 262 /** 263 * The AH (Assembly Header) line provides column headings for the assembly information. 264 * 265 * @return String 266 */ 267 public String getAssemblyHeader() { 268 return assemblyHeader; 269 } 270 271 public void setAssemblyHeader(String assemblyHeader) { 272 this.assemblyHeader = assemblyHeader; 273 } 274 275 /** 276 * The AS (Assembly Information) lines provide information on the composition of 277 * a TPA or TSA sequence. 278 * 279 * @return String 280 */ 281 public String getAssemblyInformation() { 282 return assemblyInformation; 283 } 284 285 public void setAssemblyInformation(String assemblyInformation) { 286 this.assemblyInformation = assemblyInformation; 287 } 288 289 /** 290 * Con(structed) sequences in the CON data classes represent complete 291 * chromosomes, genomes and other long sequences constructed from segment entries. 292 * 293 * @return String 294 */ 295 public String getConstructedSequence() { 296 return constructedSequence; 297 } 298 299 public void setConstructedSequence(String constructedSequence) { 300 this.constructedSequence = constructedSequence; 301 } 302 303 /** 304 * The SQ (SeQuence header) line marks the beginning of the sequence data and 305 * Gives a summary of its content. 306 * 307 * @return String 308 */ 309 public String getSequenceHeader() { 310 return sequenceHeader; 311 } 312 313 public void setSequenceHeader(String sequenceHeader) { 314 this.sequenceHeader = sequenceHeader; 315 } 316 317 /** 318 * The Sequence Data Line 319 * 320 * @return String 321 */ 322 public String getSequence() { 323 return sequence; 324 } 325 326 public void setSequence(String sequence) { 327 this.sequence = sequence; 328 } 329 330}