001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package demo;
022
023import java.io.IOException;
024import java.util.Arrays;
025import java.util.List;
026import java.util.ArrayList;
027import java.util.concurrent.ExecutionException;
028
029import org.biojava.nbio.structure.Atom;
030import org.biojava.nbio.structure.StructureException;
031import org.biojava.nbio.structure.StructureIdentifier;
032import org.biojava.nbio.structure.align.ce.CeMain;
033import org.biojava.nbio.structure.align.client.StructureName;
034import org.biojava.nbio.structure.align.multiple.MultipleAlignment;
035import org.biojava.nbio.structure.align.multiple.mc.MultipleMcMain;
036import org.biojava.nbio.structure.align.multiple.mc.MultipleMcParameters;
037import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentWriter;
038import org.biojava.nbio.structure.align.util.AtomCache;
039
040/**
041 * Demo for running the MultipleMC Algorithm on a protein family.
042 * For visualizing the results in jmol use the same Demo in the GUI module.
043 * Here only the sequence alignment will be displayed.
044 * Choose the family by commenting out the protein family names.
045 *
046 * @author Aleix Lafita
047 *
048 */
049public class DemoMultipleMC {
050
051        public static void main(String[] args) throws IOException, StructureException, InterruptedException, ExecutionException {
052
053                //Specify the structures to align
054                //ASP-proteinases (CEMC paper)
055                //List<String> names = Arrays.asList("3app", "4ape", "2apr", "5pep", "1psn", "4cms", "1bbs.A", "1smr.A", "2jxr.A", "1mpp", "2asi", "1am5");
056                //Protein Kinases (CEMC paper)
057                //List<String> names = Arrays.asList("1cdk.A", "1cja.A", "1csn", "1b6c.B", "1ir3.A", "1fgk.A", "1byg.A", "1hck", "1blx.A", "3erk", "1bmk.A", "1kob.A", "1tki.A", "1phk", "1a06");
058                //DHFR (Gerstein 1998 paper)
059                //List<String> names = Arrays.asList("d1dhfa_", "8dfr", "d4dfra_", "3dfr");
060                //Beta-propeller (MATT paper)
061                //List<String> names = Arrays.asList("d1nr0a1", "d1nr0a2", "d1p22a2", "d1tbga_");
062                //Beta-helix (MATT paper)
063                List<String> names = Arrays.asList("d1hm9a1", "d1kk6a_", "d1krra_", "d1lxaa_", "d1ocxa_", "d1qrea_", "d1xata_", "d3tdta_");
064                //TIM barrels (MUSTA paper)
065                //List<String> names = Arrays.asList("1tim.A", "1vzw", "1nsj", "3tha.A", "4enl", "2mnr", "7tim.A", "1tml", "1btc", "a1piia1", "6xia", "5rub.A", "2taa.B");
066                //Calcium Binding (MUSTA paper)
067                //List<String> names = Arrays.asList("4cpv", "2scp.A", "2sas", "1top", "1scm.B", "3icb");
068                //Serine Rich Proteins SERP (MUSTA paper)
069                //List<String> names = Arrays.asList("7api.A", "8api.A", "1hle.A", "1ova.A", "2ach.A", "9api.A", "1psi", "1atu", "1kct", "1ath.A", "1att.A");
070                //Serine Proteases (MUSTA paper)
071                //List<String> names = Arrays.asList("1cse.E", "1sbn.E", "1pek.E", "3prk", "3tec.E");
072                //GPCRs
073                //List<String> names = Arrays.asList("2z73.A", "1u19.A", "4ug2.A", "4xt3", "4or2.A", "3odu.A");
074                //Immunoglobulins (MAMMOTH paper)
075                //List<String> names = Arrays.asList("2hla.B", "3hla.B", "1cd8", "2rhe", "1tlk", "1ten", "1ttf");
076                //Globins (MAMMOTH, POSA, Gerstein&Levitt and MUSTA papers)
077                //List<String> names = Arrays.asList("1mbc", "1hlb", "1thb.A", "1ith.A", "1idr.A", "1dlw", "1kr7.A", "1ew6.A", "1it2.A", "1eco", "3sdh.A", "1cg5.B", "1fhj.B", "1ird.A", "1mba", "2gdm", "1b0b", "1h97.A", "1ash.A", "1jl7.A");
078                //Rossman-Fold (POSA paper)
079                //List<String> names = Arrays.asList("d1heta2", "d1ek6a_", "d1obfo1", "2cmd", "d1np3a2", "d1bgva1", "d1id1a_", "d1id1a_", "d1oi7a1");
080                //Circular Permutations (Bliven CECP paper) - dynamin GTP-ase with CP G-domain
081                //List<String> names = Arrays.asList("d1u0la2", "d1jwyb_");
082                //Circular Permutations: SAND and MFPT domains
083                //List<String> names = Arrays.asList("d2bjqa1", "d1h5pa_", "d1ufna_");  //"d1oqja"
084                //Amonium Transporters (Aleix Bachelor's Thesis)
085                //List<String> names = Arrays.asList("1xqf.A","2b2f.A", "3b9w.A","3hd6.A");
086                //Cytochrome C Oxidases (Aleix Bachelor's Thesis)
087                //List<String> names = Arrays.asList("2dyr.A","2gsm.A","2yev.A","3hb3.A","3omn.A","1fft.A","1xme.A","3o0r.B","3ayf.A");
088                //Cation Transporting ATPases (Aleix Bachelor's Thesis)
089                //List<String> names = Arrays.asList("3b8e.A","2zxe.A", "3tlm.A","1iwo.A");
090                //Ankyrin Repeats
091                //List<String> names = Arrays.asList("d1n0ra_", "3ehq.A", "1awc.B");  //ankyrin
092
093                //Load the CA atoms of the structures
094                AtomCache cache = new AtomCache();
095                List<Atom[]> atomArrays = new ArrayList<Atom[]>();
096
097                List<StructureIdentifier> ids = new ArrayList<StructureIdentifier>();
098                for (String name:names) {
099                        StructureIdentifier id = new StructureName(name);
100                        ids.add(id);
101                        atomArrays.add(cache.getAtoms(id));
102                }
103
104                //Here the multiple structural alignment algorithm comes in place to generate the alignment object
105                MultipleMcMain algorithm = new MultipleMcMain(new CeMain());
106                MultipleMcParameters params = (MultipleMcParameters) algorithm.getParameters();
107                params.setMinBlockLen(15);
108                params.setMinAlignedStructures(10);
109
110                MultipleAlignment result = algorithm.align(atomArrays);
111                result.getEnsemble().setStructureIdentifiers(ids);
112
113                //Information about the alignment
114                result.getEnsemble().setAlgorithmName(algorithm.getAlgorithmName());
115                result.getEnsemble().setVersion(algorithm.getVersion());
116
117                //Output the sequence alignment + transformations
118                System.out.println(MultipleAlignmentWriter.toFatCat(result));
119                //System.out.println(MultipleAlignmentWriter.toFASTA(result));
120                System.out.println(MultipleAlignmentWriter.toTransformMatrices(result));
121                System.out.println(MultipleAlignmentWriter.toXML(result.getEnsemble()));
122        }
123}