001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.io;
023
024import java.io.BufferedReader;
025import java.io.OutputStream;
026import java.io.PrintStream;
027import java.util.Iterator;
028import java.util.LinkedHashMap;
029import java.util.Map;
030
031import org.biojava.bio.BioException;
032import org.biojava.bio.alignment.Alignment;
033import org.biojava.bio.alignment.SimpleAlignment;
034import org.biojava.bio.seq.Sequence;
035import org.biojava.bio.seq.SequenceIterator;
036import org.biojava.bio.symbol.IllegalSymbolException;
037import org.biojava.bio.symbol.SymbolList;
038
039/**
040 * This class implements the AlignmentFormat interface to read FASTA alignments.
041 * It is modeled after the MSFAlignmentFormat class.
042 *
043 * @author Nimesh Singh
044 */
045
046public class FastaAlignmentFormat implements AlignmentFormat {
047    //Constants
048    public static final int DNA = 1;
049    public static final int PROTEIN = 2;
050
051    public FastaAlignmentFormat() {
052    }
053
054    /**
055     * Reads an alignment in FASTA format.
056     */
057    public Alignment read(BufferedReader br) {
058        try {
059            SequenceIterator seqs = null;
060            br.mark(200);
061            String line = br.readLine();
062            line = br.readLine();
063            br.reset();
064
065            for (int i = 0; i < line.length(); i++) {
066                if (Character.toUpperCase(line.charAt(i)) == 'F' ||
067                    Character.toUpperCase(line.charAt(i)) == 'L' ||
068                    Character.toUpperCase(line.charAt(i)) == 'I' ||
069                    Character.toUpperCase(line.charAt(i)) == 'P' ||
070                    Character.toUpperCase(line.charAt(i)) == 'Q' ||
071                    Character.toUpperCase(line.charAt(i)) == 'E') {
072                        seqs = SeqIOTools.readFastaProtein(br);
073                }
074            }
075            if (seqs == null) {
076                seqs = SeqIOTools.readFastaDNA(br);
077            }
078
079            Map seqMap = new LinkedHashMap();
080            Sequence curSeq = null;
081            while (seqs.hasNext()) {
082                curSeq = seqs.nextSequence();
083                seqMap.put(curSeq.getName(), curSeq);
084            }
085
086            return new SimpleAlignment(seqMap);
087        } catch (Exception e) {
088            System.err.println("FastaAlignmentFormat.read -- " + e.getMessage());
089        }
090        return null;
091    }
092
093    /**
094     * Writes out the alignment to an FASTA file.
095     */
096    public void write(OutputStream os, Alignment align, int fileType) throws BioException, IllegalSymbolException {
097        PrintStream out = new PrintStream(os);
098        Iterator<String> labels = align.getLabels().listIterator();
099        String curLabel = null;
100        SymbolList curSeq = null;
101        int lineWidth = 60;
102
103        if (fileType == DNA) {
104            //toke = DNATools.getDNA().getTokenization("token");
105        }
106        else if (fileType == PROTEIN) {
107            //toke = ProteinTools.getTAlphabet().getTokenization("token");
108        }
109        else {
110            System.out.println("FastaAlignment.write -- File type not recognized.");
111            return;
112        }
113
114        while (labels.hasNext()) {
115            curLabel = labels.next();
116            curSeq = align.symbolListForLabel(curLabel);
117
118            out.print(">");
119            out.println(curLabel);
120
121            for (int pos = 1; pos <= curSeq.length(); pos += lineWidth) {
122                int end = Math.min(pos + lineWidth - 1, curSeq.length());
123                out.println(curSeq.subStr(pos, end));
124            }
125        }
126    } //end write
127
128    public void writeDna(OutputStream os, Alignment align) throws BioException, IllegalSymbolException {
129        write(os, align, DNA);
130    }
131
132    public void writeProtein(OutputStream os, Alignment align) throws BioException, IllegalSymbolException {
133        write(os, align, PROTEIN);
134    }
135}