001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package demo; 022 023import org.biojava.nbio.structure.ResidueNumber; 024import org.biojava.nbio.structure.Structure; 025import org.biojava.nbio.structure.StructureException; 026import org.biojava.nbio.structure.align.util.AtomCache; 027import org.biojava.nbio.structure.gui.BiojavaJmol; 028import org.biojava.nbio.structure.io.FastaStructureParser; 029import org.biojava.nbio.core.sequence.ProteinSequence; 030import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 031import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet; 032import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser; 033import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator; 034import org.biojava.nbio.core.sequence.io.template.SequenceCreatorInterface; 035import org.biojava.nbio.core.sequence.io.template.SequenceHeaderParserInterface; 036 037import java.io.ByteArrayInputStream; 038import java.io.IOException; 039import java.io.InputStream; 040import java.io.UnsupportedEncodingException; 041 042/** 043 * Demo of how to use the {@link FastaStructureParser} class to read protein 044 * structures from a FASTA file. 045 * 046 * @author Spencer Bliven 047 * 048 */ 049public class DemoStructureFromFasta { 050 051 @SuppressWarnings("unused") 052 public static void getStructureFromFasta() { 053 054 // Load a test sequence 055 // Normally this would come from a file, eg 056 // File fasta = new File("/path/to/file.fa"); 057 String fastaStr = 058 "> 4HHB\n" + 059 "VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHGK\n" + 060 "KVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPA\n" + 061 "VHASLDKFLASVSTVLTSKYR\n"; 062 InputStream fasta; 063 try { 064 fasta = new ByteArrayInputStream(fastaStr.getBytes("UTF-8")); 065 } catch (UnsupportedEncodingException e) { 066 e.printStackTrace(); 067 return; 068 } 069 070 // Create a header parser to parse the header lines into valid structure accessions. 071 // The resulting accession can be anything interpretable by AtomCache.getStructure. 072 // Possible Examples: "4HHB" (whole structure), "d4hhba_" (SCOP domain), 073 // "4HHB.A:1-15" (residue range) 074 // For this example, the built-in fasta parser will extract the correct accession. 075 SequenceHeaderParserInterface<ProteinSequence, AminoAcidCompound> headerParser; 076 headerParser = new GenericFastaHeaderParser<>(); 077 078 // Create AtomCache to fetch structures from the PDB 079 AtomCache cache = new AtomCache(); 080 081 // Create SequenceCreator. This converts a String to a ProteinSequence 082 AminoAcidCompoundSet aaSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); 083 SequenceCreatorInterface<AminoAcidCompound> creator; 084 creator = new ProteinSequenceCreator(aaSet); 085 086 // parse file 087 FastaStructureParser parser = new FastaStructureParser( 088 fasta, headerParser, creator, cache); 089 try { 090 parser.process(); 091 } catch (IOException e) { 092 e.printStackTrace(); 093 return; 094 } catch (StructureException e) { 095 e.printStackTrace(); 096 return; 097 } 098 099 // Get info from the parser 100 ResidueNumber[][] residues = parser.getResidues(); 101 ProteinSequence[] sequences = parser.getSequences(); 102 Structure[] structures = parser.getStructures(); 103 String[] accessions = parser.getAccessions(); 104 105 // Use it! For example: 106 // Display the structure, highlighting the sequence 107 displayStructure( structures[0], residues[0]); 108 } 109 110 111 /** 112 * Displays the given structure and highlights the given residues. 113 * 114 * @param structure The structure to display 115 * @param residues A list of residues to highlight 116 */ 117 private static void displayStructure(Structure structure, 118 ResidueNumber[] residues) { 119 //Display each structure 120 BiojavaJmol jmol = new BiojavaJmol(); 121 jmol.setStructure(structure); 122 123 //Highlight non-null atoms 124 jmol.evalString("select *; spacefill off; wireframe off; color chain; backbone 0.4; "); 125 String selectionCmd = buildJmolSelection(residues); 126 jmol.evalString(selectionCmd); 127 jmol.evalString("backbone 1.0; select none;"); 128 } 129 130 131 132 /** 133 * Converts an array of ResidueNumbers into a jMol selection. 134 * 135 * <p>For example, "select 11^ :A.CA or 12^ :A.CA;" would select the 136 * CA atoms of residues 11-12 on chain A. 137 * @param residues Residues to include in the selection. Nulls are ignored. 138 * @return 139 */ 140 private static String buildJmolSelection(ResidueNumber[] residues) { 141 StringBuilder cmd = new StringBuilder("select "); 142 for(ResidueNumber res : residues) { 143 if(res != null) { 144 cmd.append(String.format("%d^%s:%s.CA or ", res.getSeqNum(), 145 res.getInsCode()==null?" ":res.getInsCode(), 146 res.getChainName())); 147 } 148 } 149 cmd.append("none;");//easier than removing the railing 'or' 150 return cmd.toString(); 151 } 152 153 154 155 public static void main(String[] args) { 156 getStructureFromFasta(); 157 } 158}