001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 */
022package org.biojava.nbio.core.sequence.io;
023
024import org.biojava.nbio.core.sequence.DNASequence;
025import org.biojava.nbio.core.sequence.ProteinSequence;
026import org.biojava.nbio.core.sequence.RNASequence;
027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
029import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
030import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
031import org.biojava.nbio.core.sequence.compound.RNACompoundSet;
032
033import java.io.File;
034import java.io.FileInputStream;
035import java.io.IOException;
036import java.io.InputStream;
037import java.util.LinkedHashMap;
038import java.util.Map;
039
040/**
041 *
042 * @author Scooter Willis 
043 */
044public class FastaReaderHelper {
045
046        /**
047         * Selecting lazySequenceLoad=true will parse the FASTA file and figure out the accessionid and offsets and return sequence objects
048         * that can in the future read the sequence from the disk. This allows the loading of large fasta files where you are only interested
049         * in one sequence based on accession id.
050         * @param file
051         * @param lazySequenceLoad
052         * @return
053         * @throws IOException
054         */
055        public static Map<String, DNASequence> readFastaDNASequence(File file, boolean lazySequenceLoad) throws IOException {
056                if (!lazySequenceLoad) {
057                        return readFastaDNASequence(file);
058                }
059
060                FastaReader<DNASequence, NucleotideCompound> fastaProxyReader =
061                                new FastaReader<>(
062                                                file,
063                                                new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
064                                                new FileProxyDNASequenceCreator(
065                                                                file,
066                                                                DNACompoundSet.getDNACompoundSet(),
067                                                                new FastaSequenceParser()
068                                                        )
069                                        );
070                return fastaProxyReader.process();
071
072        }
073
074        /**
075         * Selecting lazySequenceLoad=true will parse the FASTA file and figure out the accessionid and offsets and return sequence objects
076         * that can in the future read the sequence from the disk. This allows the loading of large fasta files where you are only interested
077         * in one sequence based on accession id.
078         * @param file
079         * @param lazySequenceLoad
080         * @return
081         * @throws IOException
082         */
083        public static Map<String, RNASequence> readFastaRNASequence(File file, boolean lazySequenceLoad) throws IOException {
084                if (!lazySequenceLoad) {
085                        return readFastaRNASequence(file);
086                }
087
088                FastaReader<RNASequence, NucleotideCompound> fastaProxyReader =
089                                new FastaReader<>(
090                                                file,
091                                                new GenericFastaHeaderParser<RNASequence, NucleotideCompound>(),
092                                                new FileProxyRNASequenceCreator(
093                                                                file,
094                                                                RNACompoundSet.getRNACompoundSet(),
095                                                                new FastaSequenceParser()
096                                                        )
097                                        );
098                return fastaProxyReader.process();
099
100        }
101
102        /**
103         * Read a fasta file containing amino acids with setup that would handle most
104         * cases.
105         *
106         * @param file
107         * @return
108         * @throws IOException
109         */
110        public static Map<String, ProteinSequence> readFastaProteinSequence(
111                        File file) throws IOException {
112                FileInputStream inStream = new FileInputStream(file);
113                Map<String, ProteinSequence> proteinSequences = readFastaProteinSequence(inStream);
114                inStream.close();
115                return proteinSequences;
116        }
117
118        /**
119         * Read a fasta file containing amino acids with setup that would handle most
120         * cases. User is responsible for closing InputStream because you opened it
121         *
122         * @param inStream
123         * @return
124         * @throws IOException
125         */
126        public static Map<String, ProteinSequence> readFastaProteinSequence(
127                        InputStream inStream) throws IOException {
128                FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<>(
129                                inStream,
130                                new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(),
131                                new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
132                return fastaReader.process();
133        }
134
135        /**
136         * Read a fasta DNA sequence
137         * @param inStream
138         * @return
139         * @throws IOException
140         */
141        public static Map<String, DNASequence> readFastaDNASequence(
142                        InputStream inStream) throws IOException {
143                FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<>(
144                                inStream,
145                                new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
146                                new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
147                return fastaReader.process();
148        }
149
150        /**
151         *
152         * @param file
153         * @return
154         * @throws IOException
155         */
156        public static Map<String, DNASequence> readFastaDNASequence(
157                        File file) throws IOException {
158                FileInputStream inStream = new FileInputStream(file);
159                Map<String, DNASequence> dnaSequences = readFastaDNASequence(inStream);
160                inStream.close();
161                return dnaSequences;
162        }
163
164        /**
165         * Read a fasta RNA sequence
166         * @param inStream
167         * @return
168         * @throws IOException
169         */
170        public static Map<String, RNASequence> readFastaRNASequence(
171                        InputStream inStream) throws IOException {
172                FastaReader<RNASequence, NucleotideCompound> fastaReader = new FastaReader<>(
173                                inStream,
174                                new GenericFastaHeaderParser<RNASequence, NucleotideCompound>(),
175                                new RNASequenceCreator(RNACompoundSet.getRNACompoundSet()));
176                return fastaReader.process();
177        }
178
179        /**
180         *
181         * @param file
182         * @return
183         * @throws IOException
184         */
185        public static Map<String, RNASequence> readFastaRNASequence(
186                        File file) throws IOException {
187                FileInputStream inStream = new FileInputStream(file);
188                Map<String, RNASequence> rnaSequences = readFastaRNASequence(inStream);
189                inStream.close();
190                return rnaSequences;
191        }
192
193}