001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package demo; 022 023import java.io.IOException; 024import java.util.Arrays; 025import java.util.List; 026import java.util.ArrayList; 027import java.util.concurrent.ExecutionException; 028 029import org.biojava.nbio.structure.Atom; 030import org.biojava.nbio.structure.StructureException; 031import org.biojava.nbio.structure.StructureIdentifier; 032import org.biojava.nbio.structure.align.ce.CeMain; 033import org.biojava.nbio.structure.align.client.StructureName; 034import org.biojava.nbio.structure.align.multiple.MultipleAlignment; 035import org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain; 036import org.biojava.nbio.structure.align.multiple.mc.MultipleMcParameters; 037import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentWriter; 038import org.biojava.nbio.structure.align.util.AtomCache; 039 040/** 041 * Demo for running the MultipleMC Algorithm on a protein family. 042 * For visualizing the results in jmol use the same Demo in the GUI module. 043 * Here only the sequence alignment will be displayed. 044 * Choose the family by commenting out the protein family names. 045 * 046 * @author Aleix Lafita 047 * 048 */ 049public class DemoMultipleMC { 050 051 public static void main(String[] args) throws IOException, StructureException, InterruptedException, ExecutionException { 052 053 //Specify the structures to align 054 //ASP-proteinases (CEMC paper) 055 //List<String> names = Arrays.asList("3app", "4ape", "2apr", "5pep", "1psn", "4cms", "1bbs.A", "1smr.A", "2jxr.A", "1mpp", "2asi", "1am5"); 056 //Protein Kinases (CEMC paper) 057 //List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06"); 058 //DHFR (Gerstein 1998 paper) 059 //List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr"); 060 //Beta-propeller (MATT paper) 061 //List<String> names = Arrays.asList("d1nr0a1", "d1nr0a2", "d1p22a2", "d1tbga_"); 062 //Beta-helix (MATT paper) 063 List<String> names = Arrays.asList("d1hm9a1", "d1kk6a_", "d1krra_", "d1lxaa_", "d1ocxa_", "d1qrea_", "d1xata_", "d3tdta_"); 064 //TIM barrels (MUSTA paper) 065 //List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B"); 066 //Calcium Binding (MUSTA paper) 067 //List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb"); 068 //Serine Rich Proteins SERP (MUSTA paper) 069 //List<String> names = Arrays.asList("7api.A", "8api.A", "1hle.A", "1ova.A", "2ach.A", "9api.A", "1psi", "1atu", "1kct", "1ath.A", "1att.A"); 070 //Serine Proteases (MUSTA paper) 071 //List<String> names = Arrays.asList("1cse.E", "1sbn.E", "1pek.E", "3prk", "3tec.E"); 072 //GPCRs 073 //List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A"); 074 //Immunoglobulins (MAMMOTH paper) 075 //List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf"); 076 //Globins (MAMMOTH, POSA, Gerstein&Levitt and MUSTA papers) 077 //List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A"); 078 //Rossman-Fold (POSA paper) 079 //List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1"); 080 //Circular Permutations (Bliven CECP paper) - dynamin GTP-ase with CP G-domain 081 //List<String> names = Arrays.asList("d1u0la2", "d1jwyb_"); 082 //Circular Permutations: SAND and MFPT domains 083 //List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_"); //"d1oqja" 084 //Amonium Transporters (Aleix Bachelor's Thesis) 085 //List<String> names = Arrays.asList("1xqf.A","2b2f.A", "3b9w.A","3hd6.A"); 086 //Cytochrome C Oxidases (Aleix Bachelor's Thesis) 087 //List<String> names = Arrays.asList("2dyr.A","2gsm.A","2yev.A","3hb3.A","3omn.A","1fft.A","1xme.A","3o0r.B","3ayf.A"); 088 //Cation Transporting ATPases (Aleix Bachelor's Thesis) 089 //List<String> names = Arrays.asList("3b8e.A","2zxe.A", "3tlm.A","1iwo.A"); 090 //Ankyrin Repeats 091 //List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B"); //ankyrin 092 093 //Load the CA atoms of the structures 094 AtomCache cache = new AtomCache(); 095 List<Atom[]> atomArrays = new ArrayList<>(); 096 097 List<StructureIdentifier> ids = new ArrayList<>(); 098 for (String name:names) { 099 StructureIdentifier id = new StructureName(name); 100 ids.add(id); 101 atomArrays.add(cache.getAtoms(id)); 102 } 103 104 //Here the multiple structural alignment algorithm comes in place to generate the alignment object 105 MultipleMcMain algorithm = new MultipleMcMain(new CeMain()); 106 MultipleMcParameters params = (MultipleMcParameters) algorithm.getParameters(); 107 params.setMinBlockLen(15); 108 params.setMinAlignedStructures(10); 109 110 MultipleAlignment result = algorithm.align(atomArrays); 111 result.getEnsemble().setStructureIdentifiers(ids); 112 113 //Information about the alignment 114 result.getEnsemble().setAlgorithmName(algorithm.getAlgorithmName()); 115 result.getEnsemble().setVersion(algorithm.getVersion()); 116 117 //Output the sequence alignment + transformations 118 System.out.println(MultipleAlignmentWriter.toFatCat(result)); 119 //System.out.println(MultipleAlignmentWriter.toFASTA(result)); 120 System.out.println(MultipleAlignmentWriter.toTransformMatrices(result)); 121 System.out.println(MultipleAlignmentWriter.toXML(result.getEnsemble())); 122 } 123}