001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.io; 023 024import org.biojava.nbio.core.sequence.DNASequence; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.RNASequence; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 029import org.biojava.nbio.core.sequence.compound.DNACompoundSet; 030import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 031import org.biojava.nbio.core.sequence.compound.RNACompoundSet; 032 033import java.io.File; 034import java.io.FileInputStream; 035import java.io.IOException; 036import java.io.InputStream; 037import java.util.LinkedHashMap; 038import java.util.Map; 039 040/** 041 * 042 * @author Scooter Willis 043 */ 044public class FastaReaderHelper { 045 046 /** 047 * Selecting lazySequenceLoad=true will parse the FASTA file and figure out the accessionid and offsets and return sequence objects 048 * that can in the future read the sequence from the disk. This allows the loading of large fasta files where you are only interested 049 * in one sequence based on accession id. 050 * @param file 051 * @param lazySequenceLoad 052 * @return 053 * @throws IOException 054 */ 055 public static Map<String, DNASequence> readFastaDNASequence(File file, boolean lazySequenceLoad) throws IOException { 056 if (!lazySequenceLoad) { 057 return readFastaDNASequence(file); 058 } 059 060 FastaReader<DNASequence, NucleotideCompound> fastaProxyReader = 061 new FastaReader<>( 062 file, 063 new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(), 064 new FileProxyDNASequenceCreator( 065 file, 066 DNACompoundSet.getDNACompoundSet(), 067 new FastaSequenceParser() 068 ) 069 ); 070 return fastaProxyReader.process(); 071 072 } 073 074 /** 075 * Selecting lazySequenceLoad=true will parse the FASTA file and figure out the accessionid and offsets and return sequence objects 076 * that can in the future read the sequence from the disk. This allows the loading of large fasta files where you are only interested 077 * in one sequence based on accession id. 078 * @param file 079 * @param lazySequenceLoad 080 * @return 081 * @throws IOException 082 */ 083 public static Map<String, RNASequence> readFastaRNASequence(File file, boolean lazySequenceLoad) throws IOException { 084 if (!lazySequenceLoad) { 085 return readFastaRNASequence(file); 086 } 087 088 FastaReader<RNASequence, NucleotideCompound> fastaProxyReader = 089 new FastaReader<>( 090 file, 091 new GenericFastaHeaderParser<RNASequence, NucleotideCompound>(), 092 new FileProxyRNASequenceCreator( 093 file, 094 RNACompoundSet.getRNACompoundSet(), 095 new FastaSequenceParser() 096 ) 097 ); 098 return fastaProxyReader.process(); 099 100 } 101 102 /** 103 * Read a fasta file containing amino acids with setup that would handle most 104 * cases. 105 * 106 * @param file 107 * @return 108 * @throws IOException 109 */ 110 public static Map<String, ProteinSequence> readFastaProteinSequence( 111 File file) throws IOException { 112 FileInputStream inStream = new FileInputStream(file); 113 Map<String, ProteinSequence> proteinSequences = readFastaProteinSequence(inStream); 114 inStream.close(); 115 return proteinSequences; 116 } 117 118 /** 119 * Read a fasta file containing amino acids with setup that would handle most 120 * cases. User is responsible for closing InputStream because you opened it 121 * 122 * @param inStream 123 * @return 124 * @throws IOException 125 */ 126 public static Map<String, ProteinSequence> readFastaProteinSequence( 127 InputStream inStream) throws IOException { 128 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<>( 129 inStream, 130 new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), 131 new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); 132 return fastaReader.process(); 133 } 134 135 /** 136 * Read a fasta DNA sequence 137 * @param inStream 138 * @return 139 * @throws IOException 140 */ 141 public static Map<String, DNASequence> readFastaDNASequence( 142 InputStream inStream) throws IOException { 143 FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<>( 144 inStream, 145 new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(), 146 new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); 147 return fastaReader.process(); 148 } 149 150 /** 151 * 152 * @param file 153 * @return 154 * @throws IOException 155 */ 156 public static Map<String, DNASequence> readFastaDNASequence( 157 File file) throws IOException { 158 FileInputStream inStream = new FileInputStream(file); 159 Map<String, DNASequence> dnaSequences = readFastaDNASequence(inStream); 160 inStream.close(); 161 return dnaSequences; 162 } 163 164 /** 165 * Read a fasta RNA sequence 166 * @param inStream 167 * @return 168 * @throws IOException 169 */ 170 public static Map<String, RNASequence> readFastaRNASequence( 171 InputStream inStream) throws IOException { 172 FastaReader<RNASequence, NucleotideCompound> fastaReader = new FastaReader<>( 173 inStream, 174 new GenericFastaHeaderParser<RNASequence, NucleotideCompound>(), 175 new RNASequenceCreator(RNACompoundSet.getRNACompoundSet())); 176 return fastaReader.process(); 177 } 178 179 /** 180 * 181 * @param file 182 * @return 183 * @throws IOException 184 */ 185 public static Map<String, RNASequence> readFastaRNASequence( 186 File file) throws IOException { 187 FileInputStream inStream = new FileInputStream(file); 188 Map<String, RNASequence> rnaSequences = readFastaRNASequence(inStream); 189 inStream.close(); 190 return rnaSequences; 191 } 192 193}