001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package demo; 022 023import org.biojava.nbio.alignment.Alignments; 024import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType; 025import org.biojava.nbio.alignment.SimpleGapPenalty; 026import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper; 027import org.biojava.nbio.alignment.template.GapPenalty; 028import org.biojava.nbio.alignment.template.PairwiseSequenceAligner; 029import org.biojava.nbio.core.alignment.template.SequencePair; 030import org.biojava.nbio.core.sequence.DNASequence; 031import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet; 032import org.biojava.nbio.core.sequence.compound.NucleotideCompound; 033 034public class TestDNANeedlemanWunsch { 035 036 public static void main(String[] args) throws Exception { 037 038 String query = "AGGATGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGGTGAAGCCCAGCTTGCTGGGTGGATCA" + 039 "GTGGCGAACGGGTGAGTAACACGTGAGCAACCTGCCCCTGACTCTGGGATAAGCGCTGGAAACGGTGTCT" + 040 "AATACTGGATATGAGCTACCACCGCATGGTGAGTGGTTGGAAAGATTTTTCGGTTGGGGATGGGCTCGCG" + 041 "GCCTATCAGCTTGTTGGTGAGGTAATGGCTCACCAAGGCGTCGACGGGTAGCCGGCCTGAGAGGGTGACC" + 042 "GGCCACACTGGGACTGAGACACGGCCCAGACTCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGC" + 043 "GGAAGCCTGATGCAGCAACGCCGCGTGAGGGACGACGGCTTCGGGTTGTAAACCTCTTTTAGCAGGGAAG" + 044 "AAGCGAGAGTGACGGTACCTGCAGAAAAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAG" + 045 "GGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCTGTGAAAA" + 046 "CCCGAGGCTCAACCTNNGGGCTGCAGTGGGTACGGGCAGACTAGAGTGCGGTAGGGGAGATTGGAATTCC" + 047 "TGGTGTAGCGGTGGAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGATCTCTGGGCCGTAAC" + 048 "TGACGCTGAGGAGCGAAAGGGTGGGGAGCAAACAGGCTTAGATACCCTGGTAGTCCACCCCGTAAACGTT" + 049 "GGGAACTAGTTGTGGGGTCCTTTCCACGGATTCCGTGACGCACGTAACGCATTAAGTTCCCCGCCTGGGG" + 050 "AGTACGGCCGCAAGGCTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGCGGAGCATGCGGATTA" + 051 "AATCGATGCAACGCGAAGAACCTTACCAAGGCTTGACATACACGAGAACGCTGCAGAAATGTAGAACTCT" + 052 "TTGGACACTCGTGAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCG" + 053 "CAACGAGCGCAACCCTCGTTCTATGTTGCCAGCACGTAATGGTGGGAACTCATGGGATACTGCCGGGGTC" + 054 "AACTCGGAGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTCTTGGGCTTCACGCATGCTACA" + 055 "ATGGCCGGTACAAAGGGCTGCAATACCGTGAGGTGGAGCGAATCCCAAAAAGCCGGTCCCAGTTCGGATT" + 056 "GAGGTCTGCAACTCGACCTCATGAAGTCGGAGTCGCTAGTAATCGCAGATCAGCAACGCTGCGGTGAATA" + 057 "CGTTCCCGGGTCTTGTACACACCGCCCGTCAAGTCATGAAAGTCGGTAACACCTGAAGCCGGTGGCCTAA" + 058 "CCCTTGTGGAGGGAGCCGGTAATTAAA"; 059 060 String target = "CTGGCCGCCTGCTTAACACATCCAAGTCGAACGGTGAAGCCCCANCTTACTGGGTGGATCAGTGCCGAAC" + 061 "GGGTGAGTAACACGTGAGCAACCTCCCCCTGACTCTGGGATAAGCGCTGGAANCGGTGTCTAATACTGGA" + 062 "TATGAGCTACCACCGCATGGTGAGTGGTTGGAAAGATTTTTCGGTTGGGGATGGGCTCGCGCCCTATGAG" + 063 "CTTGTTGGTGAGGTAATGGCTCACCAAGCCGTCGACGGGTAGCCGGCCTGAGAGGGTGACCGNCCACACT" + 064 "GGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGGAAGCCT" + 065 "GATTCANCAACCCCGCGTGAGGGACGACGGCCTTCGGGTTGTAAACCTCTTTTAGCAGGGAAGAAGCGAG" + 066 "AGTGACGGTACCTGCAGAAAAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAA" + 067 "GCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCTGTGAAAACCCGAGG" + 068 "CTCAACCTCGGGCCTGCAGTGGGTACGGGCAGACTAGAGTGCGGTAGGGGAGATTGGAATTCCTGGTGTA" + 069 "GCGGTGGAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGATCTCTGGGCCGTAACTGACGCT" + 070 "GAGGAGCGAAAGGGTGGGGAGCAAACAGGCTTAGATACCCTGGTAGTCCACCCCGTAAACGTTGGGAACT" + 071 "AGTTGTGGGGTCCTTTCCACGGATTCCGTGACGCAGCTAACGCATTAAGTTCCCCGCCTGGGGAGTACGG" + 072 "CCGCAAGGCTAAAACTCAAAGGAATTGACGGGGACCCGCACAAGCGGCGGAGCATGCGGATTAATTCGAT" + 073 "GCAACGCGAAGAACCTTACCAAGGCTTGACATACACGAGAACGCTGCAGAAATGTAGAACTCTTTGGACA" + 074 "CTCGTGAACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAG" + 075 "CGCAACCCTCGTTCTATGTTGCCAGCACGTAATGGTGGGAACTCATGGGATACTGCCGGGGTCAACTCGG" + 076 "AGGAAGGTGGGGATGACGTCAAATCATCATGCCCCTTATGTCTTGGGCTTCACGCATGCTACAATGGCCG" + 077 "GTACAAAGGGCTGCAATACCGTGAGGTGGAGCGAATCCCAAAAAGCCGGTCCCAGTTCGGATTGAGGTCT" + 078 "GCAACTCGACCTCATGAAGTCGGAGTCGCTAGTAATCGCAGATCAGCAACGCTGCGGTGAATACGTTCCC" + 079 "GGGTCTTGTACACACCGCCCGTCAAGTCATGAAAGTCGGTAACACCTGAAGCCGGTGGCCCAACCCTTGT" + 080 "GGAGGGAGCCGTCGAAGGTGGGATCGGTAATTAGGACTAAGTCGTAACAAGGTAGCCGTACC"; 081 082 GapPenalty penalty = new SimpleGapPenalty(-14, -4); 083 PairwiseSequenceAligner<DNASequence, NucleotideCompound> aligner = Alignments.getPairwiseAligner( 084 new DNASequence(query, AmbiguityDNACompoundSet.getDNACompoundSet()), 085 new DNASequence(target, AmbiguityDNACompoundSet.getDNACompoundSet()), 086 PairwiseSequenceAlignerType.GLOBAL, 087 penalty, SubstitutionMatrixHelper.getNuc4_4()); 088 SequencePair<DNASequence, NucleotideCompound> 089 alignment = aligner.getPair(); 090 091 System.out.println("Alignment: "+ alignment); 092 093 int identical = alignment.getNumIdenticals(); 094 System.out.println("Number of identical residues: "+ identical); 095 System.out.println("% identical query: "+ identical / (float) query.length()); 096 System.out.println("% identical target: "+ identical / (float) target.length()); 097 } 098}