001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.core.sequence.io.embl;
022
023
024import java.io.*;
025import java.util.Arrays;
026import java.util.LinkedList;
027
028
029/**
030 * This class should process the data of embl file
031 *
032 * @author Noor Aldeen Al Mbaidin
033 * @since 5.0.0
034 */
035public class EmblReader {
036
037        /**
038         * The parsing is done in this method.<br>
039         * This method tries to process all the Embl records
040         * in the File , closes the underlying resource,
041         * and return the results in object of EmblRecord.<br>
042         *
043         * @return EmblRecord containing all the parsed Embl records
044         * @throws IOException
045         */
046        public static EmblRecord process(File file) throws IOException {
047
048                EmblRecord emblRecord = new EmblRecord();
049                StringBuilder sequence = new StringBuilder("");
050                LinkedList<EmblReference> emblReferences = new LinkedList<>();
051                EmblReference emblReference = new EmblReference();
052                LinkedList<String> accessionNumber = new LinkedList<>();
053                LinkedList<String> keyword = new LinkedList<>();
054
055                if (file == null)
056                        throw new NullPointerException("file can't be null");
057
058                if (file.isDirectory())
059                        throw new IllegalArgumentException("the file can't be a directory");
060
061                try (FileReader fileReader = new FileReader(file)) {
062                        String line = "";
063                        String lineIdentifier;
064                        String lineInfo;
065                        try (BufferedReader bufferedReader = new BufferedReader(fileReader)) {
066                                while ((line = bufferedReader.readLine()) != null) {
067                                        if (line.length() > 1) {
068                                                lineInfo = line.substring(2, line.length()).trim();
069                                                lineIdentifier = line.substring(0, 2);
070                                                if (lineIdentifier.equals("ID"))
071                                                        emblRecord.setEmblId(populateID(lineInfo));
072                                                else if (lineIdentifier.equals("AC"))
073                                                        populateAccessionNumber(line, accessionNumber);
074                                                else if (lineIdentifier.equals("DT") && line.contains("Created"))
075                                                        emblRecord.setCreatedDate(lineInfo);
076                                                else if (lineIdentifier.equals("DT") && line.contains("updated"))
077                                                        emblRecord.setLastUpdatedDate(lineInfo);
078                                                else if (lineIdentifier.equals("DE"))
079                                                        emblRecord.setSequenceDescription(lineInfo);
080                                                else if (lineIdentifier.equals("KW"))
081                                                        keyword.add(lineInfo);
082                                                else if (lineIdentifier.equals("OS"))
083                                                        emblRecord.setOrganismSpecies(lineInfo);
084                                                else if (lineIdentifier.equals("OC"))
085                                                        emblRecord.setOrganismClassification(lineInfo);
086                                                else if (lineIdentifier.equals("OG"))
087                                                        emblRecord.setOrGanelle(lineInfo);
088                                                else if (lineIdentifier.equals("RN") || lineIdentifier.equals("RP")
089                                                                || lineIdentifier.equals("RX") || lineIdentifier.equals("RG")
090                                                                || lineIdentifier.equals("RA") || lineIdentifier.equals("RT")
091                                                                || lineIdentifier.equals("RL"))
092                                                        populateEmblReferences(lineIdentifier, lineInfo, emblReference, emblReferences);
093                                                else if (lineIdentifier.equals("DR"))
094                                                        emblRecord.setDatabaseCrossReference(lineInfo);
095                                                else if (lineIdentifier.equals("AH"))
096                                                        emblRecord.setAssemblyHeader(lineInfo);
097                                                else if (lineIdentifier.equals("AS"))
098                                                        emblRecord.setAssemblyInformation(lineInfo);
099                                                else if (lineIdentifier.equals("CO"))
100                                                        emblRecord.setConstructedSequence(lineInfo);
101                                                else if (lineIdentifier.equals("FH"))
102                                                        emblRecord.setFeatureHeader(lineInfo);
103                                                else if (lineIdentifier.equals("FT"))
104                                                        emblRecord.setFeatureTable(lineInfo);
105                                                else if (lineIdentifier.equals("SQ"))
106                                                        emblRecord.setSequenceHeader(lineInfo);
107                                                else if (lineIdentifier.equals("  ") && !lineIdentifier.equals("//"))
108                                                        populateSequence(line, sequence);
109                                                else if (lineIdentifier.equals("//")) {
110                                                        emblRecord.setKeyword(keyword);
111                                                        emblRecord.setEmblReference(emblReferences);
112                                                        emblRecord.setAccessionNumber(accessionNumber);
113                                                        emblRecord.setSequence(sequence.toString());
114                                                }
115
116                                        }
117                                }
118                        }
119                }
120
121                return emblRecord;
122        }
123
124        private static void populateSequence(String line, StringBuilder sequence) {
125                String sequenceLine = line.replace(" ", "").
126                                replaceAll("[0-9]", "");
127                sequence.append(sequenceLine);
128        }
129
130        private static void populateEmblReferences(String lineIdentifier, String lineInfo, EmblReference emblReference
131                        , LinkedList<EmblReference> emblReferences) {
132                if (lineIdentifier.equals("RN"))
133                        emblReference.setReferenceNumber(lineInfo);
134                else if (lineIdentifier.equals("RP"))
135                        emblReference.setReferencePosition(lineInfo);
136                else if (lineIdentifier.equals("RX"))
137                        emblReference.setReferenceCrossReference(lineInfo);
138                else if (lineIdentifier.equals("RG"))
139                        emblReference.setReferenceGroup(lineInfo);
140                else if (lineIdentifier.equals("RA"))
141                        emblReference.setReferenceAuthor(lineInfo);
142                else if (lineIdentifier.equals("RT"))
143                        emblReference.setReferenceTitle(lineInfo);
144                else if (lineIdentifier.equals("RL")) {
145                        emblReference.setReferenceLocation(lineInfo);
146                        emblReferences.add(emblReference.copyEmblReference(emblReference));
147                }
148        }
149
150        private static void populateAccessionNumber(String line, LinkedList<String> accessionNumber) {
151                accessionNumber.add(line);
152        }
153
154        private static EmblId populateID(String line) {
155                String[] strings = line.split(";");
156                Arrays.stream(strings).map(String::trim).toArray(unused -> strings);
157                EmblId emblId = new EmblId(strings[0], strings[1], strings[2]
158                                , strings[3], strings[4], strings[5], strings[6]);
159                return emblId;
160        }
161
162
163}