001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.io;
023
024import org.biojava.nbio.core.sequence.DNASequence;
025import org.biojava.nbio.core.sequence.ProteinSequence;
026import org.biojava.nbio.core.sequence.RNASequence;
027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
029import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
030import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
031import org.biojava.nbio.core.sequence.compound.RNACompoundSet;
032import org.biojava.nbio.core.sequence.template.AbstractSequence;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036import java.io.File;
037import java.io.FileInputStream;
038import java.io.InputStream;
039import java.util.LinkedHashMap;
040import java.util.Map;
041
042/**
043 *
044 * @author Scooter Willis 
045 */
046public class GenbankReaderHelper {
047
048        private final static Logger logger = LoggerFactory.getLogger(GenbankReaderHelper.class);
049
050        /**
051         * Selecting lazySequenceLoad=true will parse the Genbank file and figure out the accessionid and offsets and return sequence objects
052         * that can in the future read the sequence from the disk. This allows the loading of large Genbank files where you are only interested
053         * in one sequence based on accession id.
054         * @param file
055         * @param lazySequenceLoad
056         * @return
057         * @throws Exception
058         */
059        public static Map<String, DNASequence> readGenbankDNASequence(File file, boolean lazySequenceLoad) throws Exception {
060                if (!lazySequenceLoad) {
061                        return readGenbankDNASequence(file);
062                }
063
064                GenbankReader<DNASequence, NucleotideCompound> GenbankProxyReader =
065                                new GenbankReader<>(
066                                                file,
067                                                new GenericGenbankHeaderParser<DNASequence, NucleotideCompound>(),
068                                                new FileProxyDNASequenceCreator(
069                                                                file,
070                                                                DNACompoundSet.getDNACompoundSet(),
071                                                                new GenbankSequenceParser<AbstractSequence<NucleotideCompound>, NucleotideCompound>()
072                                                        )
073                                        );
074                return GenbankProxyReader.process();
075
076        }
077
078        /**
079         * Selecting lazySequenceLoad=true will parse the Genbank file and figure out the accessionid and offsets and return sequence objects
080         * that can in the future read the sequence from the disk. This allows the loading of large Genbank files where you are only interested
081         * in one sequence based on accession id.
082         * @param file
083         * @param lazySequenceLoad
084         * @return
085         * @throws Exception
086         */
087        public static Map<String, ProteinSequence> readGenbankProteinSequence(File file, boolean lazySequenceLoad) throws Exception {
088                if (!lazySequenceLoad) {
089                        return readGenbankProteinSequence(file);
090                }
091
092                GenbankReader<ProteinSequence, AminoAcidCompound> GenbankProxyReader =
093                                new GenbankReader<>(
094                                                file,
095                                                new GenericGenbankHeaderParser<ProteinSequence, AminoAcidCompound>(),
096                                                new FileProxyProteinSequenceCreator(
097                                                                file,
098                                                                AminoAcidCompoundSet.getAminoAcidCompoundSet(),
099                                                                new GenbankSequenceParser<AbstractSequence<AminoAcidCompound>, AminoAcidCompound>()
100                                                        )
101                                        );
102                return GenbankProxyReader.process();
103
104        }
105
106        /**
107         * Selecting lazySequenceLoad=true will parse the Genbank file and figure out the accessionid and offsets and return sequence objects
108         * that can in the future read the sequence from the disk. This allows the loading of large Genbank files where you are only interested
109         * in one sequence based on accession id.
110         * @param file
111         * @param lazySequenceLoad
112         * @return
113         * @throws Exception
114         */
115        public static Map<String, RNASequence> readGenbankRNASequence(File file, boolean lazySequenceLoad) throws Exception {
116                if (!lazySequenceLoad) {
117                        return readGenbankRNASequence(file);
118                }
119
120                GenbankReader<RNASequence, NucleotideCompound> GenbankProxyReader =
121                                new GenbankReader<>(
122                                                file,
123                                                new GenericGenbankHeaderParser<RNASequence, NucleotideCompound>(),
124                                                new FileProxyRNASequenceCreator(
125                                                                file,
126                                                                RNACompoundSet.getRNACompoundSet(),
127                                                                new GenbankSequenceParser<AbstractSequence<NucleotideCompound>, NucleotideCompound>()
128                                                        )
129                                        );
130                return GenbankProxyReader.process();
131
132        }
133
134        /**
135         * Read a Genbank file containing amino acids with setup that would handle most
136         * cases.
137         *
138         * @param file
139         * @return
140         * @throws Exception
141         */
142        public static Map<String, ProteinSequence> readGenbankProteinSequence(
143                        File file) throws Exception {
144                FileInputStream inStream = new FileInputStream(file);
145                Map<String, ProteinSequence> proteinSequences = readGenbankProteinSequence(inStream);
146                inStream.close();
147                return proteinSequences;
148        }
149
150        /**
151         * Read a Genbank file containing amino acids with setup that would handle most
152         * cases. User is responsible for closing InputStream because you opened it
153         *
154         * @param inStream
155         * @return
156         * @throws Exception
157         */
158        public static Map<String, ProteinSequence> readGenbankProteinSequence(
159                        InputStream inStream) throws Exception {
160                GenbankReader<ProteinSequence, AminoAcidCompound> GenbankReader = new GenbankReader<>(
161                                inStream,
162                                new GenericGenbankHeaderParser<ProteinSequence, AminoAcidCompound>(),
163                                new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
164                return GenbankReader.process();
165        }
166
167        /**
168         * Read a Genbank DNA sequence
169         * @param inStream
170         * @return
171         * @throws Exception
172         */
173        public static Map<String, DNASequence> readGenbankDNASequence(
174                        InputStream inStream) throws Exception {
175                GenbankReader<DNASequence, NucleotideCompound> GenbankReader = new GenbankReader<>(
176                                inStream,
177                                new GenericGenbankHeaderParser<DNASequence, NucleotideCompound>(),
178                                new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
179                return GenbankReader.process();
180        }
181
182        /**
183         *
184         * @param file
185         * @return
186         * @throws Exception
187         */
188        public static Map<String, DNASequence> readGenbankDNASequence(
189                        File file) throws Exception {
190                FileInputStream inStream = new FileInputStream(file);
191                Map<String, DNASequence> dnaSequences = readGenbankDNASequence(inStream);
192                inStream.close();
193                return dnaSequences;
194        }
195        /**
196         * Read a Genbank RNA sequence
197         * @param inStream
198         * @return
199         * @throws Exception
200         */
201        public static Map<String, RNASequence> readGenbankRNASequence(
202                        InputStream inStream) throws Exception {
203                GenbankReader<RNASequence, NucleotideCompound> GenbankReader = new GenbankReader<>(
204                                inStream,
205                                new GenericGenbankHeaderParser<RNASequence, NucleotideCompound>(),
206                                new RNASequenceCreator(RNACompoundSet.getRNACompoundSet()));
207                return GenbankReader.process();
208        }
209
210        /**
211         *
212         * @param file
213         * @return
214         * @throws Exception
215         */
216        public static Map<String, RNASequence> readGenbankRNASequence(
217                        File file) throws Exception {
218                FileInputStream inStream = new FileInputStream(file);
219                Map<String, RNASequence> rnaSequences = readGenbankRNASequence(inStream);
220                inStream.close();
221                return rnaSequences;
222        }
223
224}