001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.io; 023 024import org.biojava.nbio.core.sequence.DNASequence; 025import org.biojava.nbio.core.sequence.ProteinSequence; 026import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 027import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 028import org.biojava.nbio.core.sequence.compound.DNACompoundSet; 029import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 030 031import java.io.File; 032import java.io.FileInputStream; 033import java.io.IOException; 034import java.io.InputStream; 035import java.util.LinkedHashMap; 036 037/** 038 * 039 * @author Scooter Willis <willishf at gmail dot com> 040 */ 041public class FastaReaderHelper { 042 043 /** 044 * Selecting lazySequenceLoad=true will parse the FASTA file and figure out the accessionid and offsets and return sequence objects 045 * that can in the future read the sequence from the disk. This allows the loading of large fasta files where you are only interested 046 * in one sequence based on accession id. 047 * @param file 048 * @param lazySequenceLoad 049 * @return 050 * @throws IOException 051 */ 052 public static LinkedHashMap<String, DNASequence> readFastaDNASequence(File file, boolean lazySequenceLoad) throws IOException { 053 if (!lazySequenceLoad) { 054 return readFastaDNASequence(file); 055 } 056 057 FastaReader<DNASequence, NucleotideCompound> fastaProxyReader = 058 new FastaReader<DNASequence, NucleotideCompound>( 059 file, 060 new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(), 061 new FileProxyDNASequenceCreator( 062 file, 063 DNACompoundSet.getDNACompoundSet(), 064 new FastaSequenceParser() 065 ) 066 ); 067 return fastaProxyReader.process(); 068 069 } 070 071 /** 072 * Read a fasta file containing amino acids with setup that would handle most 073 * cases. 074 * 075 * @param file 076 * @return 077 * @throws IOException 078 */ 079 public static LinkedHashMap<String, ProteinSequence> readFastaProteinSequence( 080 File file) throws IOException { 081 FileInputStream inStream = new FileInputStream(file); 082 LinkedHashMap<String, ProteinSequence> proteinSequences = readFastaProteinSequence(inStream); 083 inStream.close(); 084 return proteinSequences; 085 } 086 087 /** 088 * Read a fasta file containing amino acids with setup that would handle most 089 * cases. User is responsible for closing InputStream because you opened it 090 * 091 * @param inStream 092 * @return 093 * @throws IOException 094 */ 095 public static LinkedHashMap<String, ProteinSequence> readFastaProteinSequence( 096 InputStream inStream) throws IOException { 097 FastaReader<ProteinSequence, AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence, AminoAcidCompound>( 098 inStream, 099 new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>(), 100 new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet())); 101 return fastaReader.process(); 102 } 103 104 /** 105 * Read a fasta DNA sequence 106 * @param inStream 107 * @return 108 * @throws IOException 109 */ 110 public static LinkedHashMap<String, DNASequence> readFastaDNASequence( 111 InputStream inStream) throws IOException { 112 FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<DNASequence, NucleotideCompound>( 113 inStream, 114 new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(), 115 new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); 116 return fastaReader.process(); 117 } 118 119 /** 120 * 121 * @param file 122 * @return 123 * @throws IOException 124 */ 125 public static LinkedHashMap<String, DNASequence> readFastaDNASequence( 126 File file) throws IOException { 127 FileInputStream inStream = new FileInputStream(file); 128 LinkedHashMap<String, DNASequence> dnaSequences = readFastaDNASequence(inStream); 129 inStream.close(); 130 return dnaSequences; 131 } 132 133 public static void main(String args[]) throws Exception { 134 135 LinkedHashMap<String, DNASequence> dnaSequences = FastaReaderHelper.readFastaDNASequence(new File("fasta.fna")); 136 for (DNASequence sequence : dnaSequences.values()) { 137 sequence.getRNASequence().getProteinSequence().getSequenceAsString(); 138 } 139 } 140}