001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.io; 023 024import java.io.BufferedReader; 025import java.io.OutputStream; 026import java.io.PrintStream; 027import java.util.Iterator; 028import java.util.LinkedHashMap; 029import java.util.Map; 030 031import org.biojava.bio.BioException; 032import org.biojava.bio.alignment.Alignment; 033import org.biojava.bio.alignment.SimpleAlignment; 034import org.biojava.bio.seq.Sequence; 035import org.biojava.bio.seq.SequenceIterator; 036import org.biojava.bio.symbol.IllegalSymbolException; 037import org.biojava.bio.symbol.SymbolList; 038 039/** 040 * This class implements the AlignmentFormat interface to read FASTA alignments. 041 * It is modeled after the MSFAlignmentFormat class. 042 * 043 * @author Nimesh Singh 044 */ 045 046public class FastaAlignmentFormat implements AlignmentFormat { 047 //Constants 048 public static final int DNA = 1; 049 public static final int PROTEIN = 2; 050 051 public FastaAlignmentFormat() { 052 } 053 054 /** 055 * Reads an alignment in FASTA format. 056 */ 057 public Alignment read(BufferedReader br) { 058 try { 059 SequenceIterator seqs = null; 060 br.mark(200); 061 String line = br.readLine(); 062 line = br.readLine(); 063 br.reset(); 064 065 for (int i = 0; i < line.length(); i++) { 066 if (Character.toUpperCase(line.charAt(i)) == 'F' || 067 Character.toUpperCase(line.charAt(i)) == 'L' || 068 Character.toUpperCase(line.charAt(i)) == 'I' || 069 Character.toUpperCase(line.charAt(i)) == 'P' || 070 Character.toUpperCase(line.charAt(i)) == 'Q' || 071 Character.toUpperCase(line.charAt(i)) == 'E') { 072 seqs = SeqIOTools.readFastaProtein(br); 073 } 074 } 075 if (seqs == null) { 076 seqs = SeqIOTools.readFastaDNA(br); 077 } 078 079 Map seqMap = new LinkedHashMap(); 080 Sequence curSeq = null; 081 while (seqs.hasNext()) { 082 curSeq = seqs.nextSequence(); 083 seqMap.put(curSeq.getName(), curSeq); 084 } 085 086 return new SimpleAlignment(seqMap); 087 } catch (Exception e) { 088 System.err.println("FastaAlignmentFormat.read -- " + e.getMessage()); 089 } 090 return null; 091 } 092 093 /** 094 * Writes out the alignment to an FASTA file. 095 */ 096 public void write(OutputStream os, Alignment align, int fileType) throws BioException, IllegalSymbolException { 097 PrintStream out = new PrintStream(os); 098 Iterator<String> labels = align.getLabels().listIterator(); 099 String curLabel = null; 100 SymbolList curSeq = null; 101 int lineWidth = 60; 102 103 if (fileType == DNA) { 104 //toke = DNATools.getDNA().getTokenization("token"); 105 } 106 else if (fileType == PROTEIN) { 107 //toke = ProteinTools.getTAlphabet().getTokenization("token"); 108 } 109 else { 110 System.out.println("FastaAlignment.write -- File type not recognized."); 111 return; 112 } 113 114 while (labels.hasNext()) { 115 curLabel = labels.next(); 116 curSeq = align.symbolListForLabel(curLabel); 117 118 out.print(">"); 119 out.println(curLabel); 120 121 for (int pos = 1; pos <= curSeq.length(); pos += lineWidth) { 122 int end = Math.min(pos + lineWidth - 1, curSeq.length()); 123 out.println(curSeq.subStr(pos, end)); 124 } 125 } 126 } //end write 127 128 public void writeDna(OutputStream os, Alignment align) throws BioException, IllegalSymbolException { 129 write(os, align, DNA); 130 } 131 132 public void writeProtein(OutputStream os, Alignment align) throws BioException, IllegalSymbolException { 133 write(os, align, PROTEIN); 134 } 135}