001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package demo; 022 023import java.io.ByteArrayInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.UnsupportedEncodingException; 027 028import org.biojava.nbio.core.sequence.ProteinSequence; 029import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 030import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 031import org.biojava.nbio.core.sequence.io.CasePreservingProteinSequenceCreator; 032import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser; 033import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface; 034import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface; 035import org.biojava.nbio.structure.Atom; 036import org.biojava.nbio.structure.ResidueNumber; 037import org.biojava.nbio.structure.Structure; 038import org.biojava.nbio.structure.StructureException; 039import org.biojava.nbio.structure.StructureTools; 040import org.biojava.nbio.structure.align.gui.StructureAlignmentDisplay; 041import org.biojava.nbio.structure.align.model.AFPChain; 042import org.biojava.nbio.structure.align.util.AlignmentTools; 043import org.biojava.nbio.structure.align.util.AtomCache; 044import org.biojava.nbio.structure.io.FastaStructureParser; 045import org.biojava.nbio.structure.io.StructureSequenceMatcher; 046 047/** 048 * Demo of how to use the {@link FastaStructureParser} class to read protein 049 * structures from a FASTA file. 050 * 051 * @author Spencer Bliven 052 * 053 */ 054public class DemoAlignmentFromFasta { 055 056 public static void getAlignmentFromFasta() throws StructureException { 057 058 // Load a test sequence 059 // Normally this would come from a file, eg 060 // File fasta = new File("/path/to/file.fa"); 061 String fastaStr = 062 "> 1KQ1.A\n" + 063 "mianeniqdkalenfkanqtevtvfflngFQ.MKGVIEEYDK.....YVVSLNsqgkQHLIYKh......\n" + 064 ".......................AISTYTVetegqastesee\n" + 065 "> 1C4Q.D\n" + 066 "............................tPDcVTGKVEYTKYndddtFTVKVG....DKELATnranlqs\n" + 067 "lllsaqitgmtvtiktnachnggGFSEVIFr...........\n"; 068 069 070 InputStream fasta; 071 try { 072 fasta = new ByteArrayInputStream(fastaStr.getBytes("UTF-8")); 073 } catch (UnsupportedEncodingException e) { 074 e.printStackTrace(); 075 return; 076 } 077 078 // Create a header parser to parse the header lines into valid structure accessions. 079 // The resulting accession can be anything interpretable by AtomCache.getStructure. 080 // Possible Examples: "4HHB" (whole structure), "d4hhba_" (SCOP domain), 081 // "4HHB.A:1-15" (residue range) 082 // For this example, the built-in fasta parser will extract the correct accession. 083 SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser; 084 headerParser = new GenericFastaHeaderParser<>(); 085 086 // Create AtomCache to fetch structures from the PDB 087 AtomCache cache = new AtomCache(); 088 089 // Create SequenceCreator. This converts a String to a ProteinSequence 090 AminoAcidCompoundSet aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 091 SequenceCreatorInterface<AminoAcidCompound> creator; 092 creator = new CasePreservingProteinSequenceCreator(aaSet); 093 094 // parse file 095 FastaStructureParser parser = new FastaStructureParser( 096 fasta, headerParser, creator, cache); 097 try { 098 parser.process(); 099 } catch (IOException e) { 100 e.printStackTrace(); 101 return; 102 } catch (StructureException e) { 103 e.printStackTrace(); 104 return; 105 } 106 107 ResidueNumber[][] residues = parser.getResidues(); 108 ProteinSequence[] sequences = parser.getSequences(); 109 Structure[] structures = parser.getStructures(); 110 111 // Set lowercase residues to null too 112 for(int structNum = 0; structNum<sequences.length;structNum++) { 113 CasePreservingProteinSequenceCreator.setLowercaseToNull( 114 sequences[structNum],residues[structNum]); 115 } 116 117 // Remove alignment columns with a gap 118 residues = StructureSequenceMatcher.removeGaps(residues); 119 120 121 // Create AFPChain from the alignment 122 Atom[] ca1 = StructureTools.getAtomCAArray(structures[0]); 123 Atom[] ca2 = StructureTools.getAtomCAArray(structures[1]); 124 AFPChain afp = AlignmentTools.createAFPChain(ca1, ca2, residues[0], residues[1]); 125 126 127 try { 128 StructureAlignmentDisplay.display(afp, ca1, ca2); 129 } catch (StructureException e) { 130 e.printStackTrace(); 131 return; 132 } 133 } 134 135 136 public static void main(String[] args) throws StructureException { 137 getAlignmentFromFasta(); 138 } 139}