001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package demo;
022
023import java.io.ByteArrayInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.UnsupportedEncodingException;
027
028import org.biojava.nbio.core.sequence.ProteinSequence;
029import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
030import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
031import org.biojava.nbio.core.sequence.io.CasePreservingProteinSequenceCreator;
032import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
033import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface;
034import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface;
035import org.biojava.nbio.structure.Atom;
036import org.biojava.nbio.structure.ResidueNumber;
037import org.biojava.nbio.structure.Structure;
038import org.biojava.nbio.structure.StructureException;
039import org.biojava.nbio.structure.StructureTools;
040import org.biojava.nbio.structure.align.gui.StructureAlignmentDisplay;
041import org.biojava.nbio.structure.align.model.AFPChain;
042import org.biojava.nbio.structure.align.util.AlignmentTools;
043import org.biojava.nbio.structure.align.util.AtomCache;
044import org.biojava.nbio.structure.io.FastaStructureParser;
045import org.biojava.nbio.structure.io.StructureSequenceMatcher;
046
047/**
048 * Demo of how to use the {@link FastaStructureParser} class to read protein
049 * structures from a FASTA file.
050 *
051 * @author Spencer Bliven
052 *
053 */
054public class DemoAlignmentFromFasta {
055
056        public static void getAlignmentFromFasta() throws StructureException {
057
058                // Load a test sequence
059                // Normally this would come from a file, eg
060                // File fasta = new File("/path/to/file.fa");
061                String fastaStr =
062                        "> 1KQ1.A\n" +
063                        "mianeniqdkalenfkanqtevtvfflngFQ.MKGVIEEYDK.....YVVSLNsqgkQHLIYKh......\n" +
064                        ".......................AISTYTVetegqastesee\n" +
065                        "> 1C4Q.D\n" +
066                        "............................tPDcVTGKVEYTKYndddtFTVKVG....DKELATnranlqs\n" +
067                        "lllsaqitgmtvtiktnachnggGFSEVIFr...........\n";
068
069
070                InputStream fasta;
071                try {
072                        fasta = new ByteArrayInputStream(fastaStr.getBytes("UTF-8"));
073                } catch (UnsupportedEncodingException e) {
074                        e.printStackTrace();
075                        return;
076                }
077
078                // Create a header parser to parse the header lines into valid structure accessions.
079                // The resulting accession can be anything interpretable by AtomCache.getStructure.
080                // Possible Examples: "4HHB" (whole structure), "d4hhba_" (SCOP domain),
081                //   "4HHB.A:1-15" (residue range)
082                // For this example, the built-in fasta parser will extract the correct accession.
083                SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser;
084                headerParser = new GenericFastaHeaderParser<ProteinSequence, AminoAcidCompound>();
085
086                // Create AtomCache to fetch structures from the PDB
087                AtomCache cache = new AtomCache();
088
089                // Create SequenceCreator. This converts a String to a ProteinSequence
090                AminoAcidCompoundSet aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet();
091                SequenceCreatorInterface<AminoAcidCompound> creator;
092                creator = new CasePreservingProteinSequenceCreator(aaSet);
093
094                // parse file
095                FastaStructureParser parser = new FastaStructureParser(
096                                fasta, headerParser, creator, cache);
097                try {
098                        parser.process();
099                } catch (IOException e) {
100                        e.printStackTrace();
101                        return;
102                } catch (StructureException e) {
103                        e.printStackTrace();
104                        return;
105                }
106
107                ResidueNumber[][] residues = parser.getResidues();
108                ProteinSequence[] sequences = parser.getSequences();
109                Structure[] structures = parser.getStructures();
110
111                // Set lowercase residues to null too
112                for(int structNum = 0; structNum<sequences.length;structNum++) {
113                        CasePreservingProteinSequenceCreator.setLowercaseToNull(
114                                        sequences[structNum],residues[structNum]);
115                }
116
117                // Remove alignment columns with a gap
118                residues = StructureSequenceMatcher.removeGaps(residues);
119
120
121                // Create AFPChain from the alignment
122                Atom[] ca1 = StructureTools.getAtomCAArray(structures[0]);
123                Atom[] ca2 = StructureTools.getAtomCAArray(structures[1]);
124                AFPChain afp = AlignmentTools.createAFPChain(ca1, ca2, residues[0], residues[1]);
125
126
127                try {
128                        StructureAlignmentDisplay.display(afp, ca1, ca2);
129                } catch (StructureException e) {
130                        e.printStackTrace();
131                        return;
132                }
133        }
134
135
136        public static void main(String[] args) throws StructureException {
137                getAlignmentFromFasta();
138        }
139}