BioJava:CookBook3:MSAProfiler
How to profile the time and memory used for Multiple Sequence Alignment in BioJava
package org.biojava3.alignment;
import java.io.File; import java.io.PrintStream; import
java.util.ArrayList; import java.util.List;
import org.biojava.nbio.alignment.Alignments.PairwiseSequenceScorerType;
import org.biojava.nbio.alignment.Alignments.ProfileProfileAlignerType;
import org.biojava.nbio.alignment.template.GapPenalty; import
org.biojava.nbio.alignment.template.PairwiseSequenceScorer; import
org.biojava.nbio.alignment.template.Profile; import
org.biojava.nbio.alignment.template.SubstitutionMatrix; import
org.biojava.nbio.core.sequence.ProteinSequence; import
org.biojava.nbio.core.sequence.compound.AminoAcidCompound; import
org.biojava.nbio.core.sequence.io.FastaReaderHelper; import
org.biojava.nbio.core.util.ConcurrencyTools;
public class CookbookMSAProfiler {
private static class Profiler {
private long maxMemoryUsed, timeCheckpoint;
private final long timeStart;
private Profiler() {
maxMemoryUsed = Runtime.getRuntime().totalMemory();
timeStart = timeCheckpoint = System.nanoTime();
}
private long getMaxMemoryUsed() {
return maxMemoryUsed = Math.max(maxMemoryUsed, Runtime.getRuntime().totalMemory());
}
private long getTimeSinceCheckpoint() {
return System.nanoTime() - timeCheckpoint;
}
private long getTimeSinceStart() {
return System.nanoTime() - timeStart;
}
private void setCheckpoint() {
maxMemoryUsed = Math.max(maxMemoryUsed, Runtime.getRuntime().totalMemory());
timeCheckpoint = System.nanoTime();
}
}
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("The first argument must be a fasta file of protein sequences.");
return;
}
// ConcurrencyTools.setThreadPoolSingle();
PrintStream fout = new PrintStream("msa.txt");
Profiler profiler = new Profiler();
System.out.printf("Loading sequences from %s... ", args[0]);
List<ProteinSequence> list = new ArrayList<ProteinSequence>();
list.addAll(FastaReaderHelper.readFastaProteinSequence(new File(args[0])).values());
if (args.length > 1 && Integer.parseInt(args[1]) < list.size()) {
System.out.printf("%s/%d", args[1], list.size());
list = list.subList(0, Integer.parseInt(args[1]));
} else {
System.out.printf("%d", list.size());
}
System.out.printf(" sequences in %d ms using %d kB%n%n", profiler.getTimeSinceCheckpoint()/1000000,
profiler.getMaxMemoryUsed()/1024);
profiler.setCheckpoint();
System.out.print("Stage 1: pairwise similarity calculation... ");
GapPenalty gaps = new SimpleGapPenalty();
SubstitutionMatrix<AminoAcidCompound> blosum62 = new SimpleSubstitutionMatrix<AminoAcidCompound>();
List<PairwiseSequenceScorer<ProteinSequence, AminoAcidCompound>> scorers = Alignments.getAllPairsScorers(list,
PairwiseSequenceScorerType.GLOBAL_IDENTITIES, gaps, blosum62);
Alignments.runPairwiseScorers(scorers);
System.out.printf("%d scores in %d ms using %d kB%n%n", scorers.size(),
profiler.getTimeSinceCheckpoint()/1000000, profiler.getMaxMemoryUsed()/1024);
profiler.setCheckpoint();
System.out.print("Stage 2: hierarchical clustering into a guide tree... ");
GuideTree<ProteinSequence, AminoAcidCompound> tree = new GuideTree<ProteinSequence, AminoAcidCompound>(list,
scorers);
scorers = null;
System.out.printf("%d ms using %d kB%n%n%s%n%n", profiler.getTimeSinceCheckpoint()/1000000,
profiler.getMaxMemoryUsed()/1024, tree);
profiler.setCheckpoint();
System.out.print("Stage 3: progressive alignment... ");
Profile<ProteinSequence, AminoAcidCompound> msa = Alignments.getProgressiveAlignment(tree,
ProfileProfileAlignerType.GLOBAL, gaps, blosum62);
System.out.printf("%d profile-profile alignments in %d ms using %d kB%n%n", list.size() - 1,
profiler.getTimeSinceCheckpoint()/1000000, profiler.getMaxMemoryUsed()/1024);
fout.print(msa);
fout.close();
ConcurrencyTools.shutdown();
System.out.printf("Total time: %d ms%nMemory use: %d kB%n", profiler.getTimeSinceStart()/1000000,
profiler.getMaxMemoryUsed()/1024);
}
}