BioJava:CookBook3:PSA
How to create a Pairwise Sequence Alignment in BioJava
Global alignment
package org.biojava.nbio.alignment;
import java.net.URL;
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
import org.biojava.nbio.alignment.template.SequencePair;
import org.biojava.nbio.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
public class CookbookAlignPairGlobal {
public static void main(String[] args) {
String[] ids = new String[] {"Q21691", "Q21495", "O48771"};
try {
alignPairGlobal(ids[0], ids[1]);
} catch (Exception e){
e.printStackTrace();
}
}
private static void alignPairGlobal(String id1, String id2) throws Exception {
ProteinSequence s1 = getSequenceForId(id1), s2 = getSequenceForId(id2);
SubstitutionMatrix<AminoAcidCompound> matrix = new SimpleSubstitutionMatrix<AminoAcidCompound>();
SequencePair<ProteinSequence, AminoAcidCompound> pair = Alignments.getPairwiseAlignment(s1, s2,
PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), matrix);
System.out.printf("%n%s vs %s%n%s", pair.getQuery().getAccession(), pair.getTarget().getAccession(), pair);
}
private static ProteinSequence getSequenceForId(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("[http://www.uniprot.org/uniprot/%s.fasta](http://www.uniprot.org/uniprot/%s.fasta)", uniProtId));
ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId);
System.out.printf("id : %s %s%n%s%n", uniProtId, seq, seq.getOriginalHeader());
return seq;
}
}
Local alignment
package org.biojava.nbio.alignment;
import java.net.URL;
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
import org.biojava.nbio.alignment.template.SequencePair;
import org.biojava.nbio.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
public class CookbookAlignPairLocal {
public static void main(String[] args) {
String[] ids = new String[] {"Q21691", "Q21495", "O48771"};
try {
alignPairLocal(ids[0], ids[1]);
} catch (Exception e){
e.printStackTrace();
}
}
private static void alignPairLocal(String id1, String id2) throws Exception {
ProteinSequence s1 = getSequenceForId(id1), s2 = getSequenceForId(id2);
SubstitutionMatrix<AminoAcidCompound> matrix = new SimpleSubstitutionMatrix<AminoAcidCompound>();
SequencePair<ProteinSequence, AminoAcidCompound> pair = Alignments.getPairwiseAlignment(s1, s2,
PairwiseSequenceAlignerType.LOCAL, new SimpleGapPenalty(), matrix);
System.out.printf("%n%s vs %s%n%s", pair.getQuery().getAccession(), pair.getTarget().getAccession(), pair);
}
private static ProteinSequence getSequenceForId(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("http://www.uniprot.org/uniprot/%s.fasta", uniProtId));
ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId);
System.out.printf("id : %s %s%n%s%n", uniProtId, seq, seq.getOriginalHeader());
return seq;
}
}
How to concurrently create a PSA for each pair in a sequence list in BioJava
Global alignments
package org.biojava3.alignment;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
import org.biojava.nbio.alignment.template.SequencePair;
import org.biojava.nbio.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.util.ConcurrencyTools;
public class CookbookAlignAllGlobal {
public static void main(String[] args) {
String[] ids = new String[] {"Q21691", "Q21495", "O48771"};
try {
alignAllGlobal(ids);
} catch (Exception e){
e.printStackTrace();
}
}
private static void alignAllGlobal(String[] ids) throws Exception {
List<ProteinSequence> lst = new ArrayList<ProteinSequence>();
for (String id : ids) {
lst.add(getSequenceForId(id));
}
SubstitutionMatrix<AminoAcidCompound> matrix = new SimpleSubstitutionMatrix<AminoAcidCompound>();
List<SequencePair<ProteinSequence, AminoAcidCompound>> alig = Alignments.getAllPairsAlignments(lst,
PairwiseSequenceAlignerType.GLOBAL, new SimpleGapPenalty(), matrix);
for (SequencePair<ProteinSequence, AminoAcidCompound> pair : alig) {
System.out.printf("%n%s vs %s%n%s", pair.getQuery().getAccession(), pair.getTarget().getAccession(), pair);
}
ConcurrencyTools.shutdown();
}
private static ProteinSequence getSequenceForId(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("http://www.uniprot.org/uniprot/%s.fasta", uniProtId));
ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId);
System.out.printf("id : %s %s%n%s%n", uniProtId, seq, seq.getOriginalHeader());
return seq;
}
}
Local alignments
package org.biojava3.alignment;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
import org.biojava.nbio.alignment.template.SequencePair;
import org.biojava.nbio.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.util.ConcurrencyTools;
public class CookbookAlignAllLocal {
public static void main(String[] args) {
String[] ids = new String[] {"Q21691", "Q21495", "O48771"};
try {
alignAllLocal(ids);
} catch (Exception e){
e.printStackTrace();
}
}
private static void alignAllLocal(String[] ids) throws Exception {
List<ProteinSequence> lst = new ArrayList<ProteinSequence>();
for (String id : ids) {
lst.add(getSequenceForId(id));
}
SubstitutionMatrix<AminoAcidCompound> matrix = new SimpleSubstitutionMatrix<AminoAcidCompound>();
List<SequencePair<ProteinSequence, AminoAcidCompound>> alig = Alignments.getAllPairsAlignments(lst,
PairwiseSequenceAlignerType.LOCAL, new SimpleGapPenalty(), matrix);
for (SequencePair<ProteinSequence, AminoAcidCompound> pair : alig) {
System.out.printf("%n%s vs %s%n%s", pair.getQuery().getAccession(), pair.getTarget().getAccession(), pair);
}
ConcurrencyTools.shutdown();
}
private static ProteinSequence getSequenceForId(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("http://www.uniprot.org/uniprot/%s.fasta", uniProtId));
ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId);
System.out.printf("id : %s %s%n%s%n", uniProtId, seq, seq.getOriginalHeader());
return seq;
}
}