GSoC:MSA Design

Design for Alignment in BioJava3

*Part of Project by [Mark Chapman](Mark Chapman "wikilink")*

This page has a list of packages, interfaces, and classes to form the basis of the BioJava3 alignment module. The new module ports nearly all the current BioJava 1.7 alignment features to the BioJava3 standard. Additional features prepare for the inclusion of multiple sequence alignments.

The base data structures are immutable for efficiency with extensions to mutable forms for JavaEE/Bean compliance. Simple factory methods reside in the static Alignments class. A full interface hierarchy allows for flexible customization and expansion.

Dependencies

  • org.biojava3.core.sequence.location.template.Location
  • org.biojava3.core.sequence.template.Compound
  • org.biojava3.core.sequence.template.CompoundSet
  • org.biojava3.core.sequence.template.Sequence

org.biojava3.alignment

Alignments


class Alignments // static utility`  
`   private Alignments() { } // prevents instantiation`  
`   List`<Pair<S>`> getAllPairsAlignments(List`<S extends Sequence>`)`  
`   int[] getAllPairsScores(List`<? extends Sequence>`)`  
`   Pair`<S extends Sequence>getPairwiseAlignment(S, S)`  
`   int getPairwiseScore(Sequence, Sequence)`  
`   Profile`<S>getMultipleSequenceAlignment(List`<S extends Sequence>`)`  
`   enum MSAEmulation { CLUSTALW, MUSCLE, KALIGN, CUSTOM }`  
`   class Defaults // static inner class`  
`     MSAEmulation getEmulation()`  
`     GapPenalty getGapPenalty()`  
`     Class`<? extends HierarchicalClusterer>getHierarchicalClusterer()`  
`     Class`<? extends PairwiseSequenceAligner>getPairwiseSequenceAligner()`  
`     Class`<? extends PairwiseSequenceScorer>getPairwiseSequenceScorer()`  
`     Class`<? extends PartitionRefiner>getPartitionRefiner()`  
`     Class`<? extends ProfileProfileAligner>getProfileProfileAligner()`  
`     Class`<? extends RescoreRefiner>getRescoreRefiner()`  
`     SubstitutionMatrix getSubstitutionMatrix()`  
`     void setEmulation(MSAEmulation)`  
`     void setGapPenalty(GapPenalty)`  
`     void setHierarchicalClusterer(Class`<? extends HierarchicalClusterer>`)`  
`     void setPairwiseSequenceAligner(Class`<? extends PairwiseSequenceAligner>`)`  
`     void setPairwiseSequenceScorer(Class`<? extends PairwiseSequenceScorer>`)`  
`     void setPartitionRefiner(Class`<? extends PartitionRefiner>`)`  
`     void setProfileProfileAligner(Class`<? extends ProfileProfileAligner>`)`  
`     void setRescoreRefiner(Class`<? extends RescoreRefiner>`)`  
`     void setSubstitutionMatrix(SubstitutionMatrix)`

SimpleGapPenalty


class SimpleGapPenalty implements GapPenalty`  
`   SimpleGapPenalty(short, short) // open, extend`

SimpleSubstitutionMatrix


class SimpleSubstitutionMatrix`<S extends CompoundSet>implements SubstitutionMatrix`<S>  
`   SimpleSubstitutionMatrix(File) // guess compound set from source`  
`   SimpleSubstitutionMatrix(String) // guess compound set from source`  
`   SimpleSubstitutionMatrix(S, File)`  
`   SimpleSubstitutionMatrix(S, short, short) // identity matrix (ignore or handle ambiguities?)`  
`   SimpleSubstitutionMatrix(S, String)`  
`   SimpleSubstitutionMatrix(S, String, String) // optional name parameter`  
`   String toString()`

NeedlemanWunsch


class NeedlemanWunsch`<S extends Sequence>extends AbstractPairwiseSequenceAligner`<S>  
`   NeedlemanWunsch(S, S, GapPenalty, SubstitutionMatrix)`

SmithWaterman


class SmithWaterman`<S extends Sequence>extends AbstractPairwiseSequenceAligner`<S>  
`   SmithWaterman(S, S, GapPenalty, SubstitutionMatrix)`

FractionalIdentityScorer


class FractionalIdentityScorer`<S extends Sequence>implements PairwiseSequenceScorer`<S>  
`   FractionalIdentityScorer(SequencePair`<S>`)`

FractionalSimilarityScorer


class FractionalSimilarityScorer`<S extends Sequence>implements PairwiseSequenceScorer`<S>  
`   FractionalSimilarityScorer(SequencePair`<S>`)`

KmersScorer


class KmersScorer`<S extends Sequence>implements PairwiseSequenceScorer`<S>  
`   KmersScorer(S, S, int)`

WuManberScorer


class WuManberScorer`<S extends Sequence>implements PairwiseSequenceScorer`<S>  
`   WuManberScorer(S, S, ?) // TODO: not sure of parameters`

FractionalIdentityInProfileScorer


class FractionalIdentityInProfileScorer`<S extends Sequence>implements PairInProfileScorer`<S>  
`   FractionalIdentityInProfileScorer(S, S, Profile)`

FractionalSimilarityInProfileScorer


class FractionalSimilarityInProfileScorer`<S extends Sequence>implements PairInProfileScorer`<S>  
`   FractionalSimilarityInProfileScorer(S, S, Profile)`

GuideTree


class GuideTree`<S extends Sequence>  
`   GuideTree(int[], List`<S>`, HierarchicalClusterer) // all pairs score`  
`   int[] getAllPairsScores()`  
`   float[][] getDistanceMatrix()`  
`   Node getRoot()`  
`   int[][] getScoreMatrix()`  
`   class Node`  
`     float getBranchLength1()`  
`     float getBranchLength2()`  
`     Node getChild1()`  
`     Node getChild2()`  
`     Node getParent()`  
`     S getSequence() // null unless isLeaf()`  
`     boolean isLeaf()`

StandardRescoreRefiner


class StandardRescoreRefiner`<S extends Sequence>implements RescoreRefiner`<S>  
`   StandardRescoreRefiner(Profile, Class`<? extends PairInProfileScorer<S>`>, Class`<? extends ProfileProfileAligner<S>`>)`

DayhoffRescoreRefiner


class DayhoffRescoreRefiner`<S extends Sequence>implements RescoreRefiner`<S>  
`   DayhoffRescoreRefiner(Profile, Class`<? extends PairInProfileScorer<S>`>, Class`<? extends ProfileProfileAligner<S>`>)`

SinglePartitionRefiner


class SinglePartitionRefiner`<S extends Sequence>implements PartitionRefiner`<S>  
`   SinglePartitionRefiner(Profile, Class`<? extends ProfileProfileAligner<S>`>)`

StochasticPartitionRefiner


class StochasticPartitionRefiner`<S extends Sequence>implements PartitionRefiner`<S>  
`   StochasticPartitionRefiner(Profile, Class`<? extends ProfileProfileAligner<S>`>)`

TreeTraversalPartitionRefiner


class TreeTraversalPartitionRefiner`<S extends Sequence>implements PartitionRefiner`<S>  
`   TreeTraversalPartitionRefiner(Profile, Class`<? extends ProfileProfileAligner<S>`>)`

org.biojava3.alignment.template

GapPenalty


interface GapPenalty`  
`   enum Type {CONSTANT, LINEAR, AFFINE} // gep = 0, gep = gop, gep != gop ... TODO: add DYNAMIC type`  
`   short getExtensionPenalty()`  
`   short getOpenPenalty()`  
`   Type getType()`  
`   void setExtensionPenalty(short)`  
`   void setOpenPenalty(short)`

SubstitutionMatrix


interface SubstitutionMatrix`<S extends CompoundSet<C extends Compound>`>`  
`   S getCompoundSet()`  
`   String getDescription()`  
`   short[][] getMatrix()`  
`   String getMatrixAsString()`  
`   short getMaxValue()`  
`   short getMinValue()`  
`   String getName()`  
`   short getValue(C, C)`  
`   void normalizeMatrix(short) // scale`  
`   void setDescription(String)`  
`   void setName(String)`

AlignedSequence


interface AlignedSequence`<C extends Compound>extends Sequence`<C>  
`   int getAlignmentIndexAt(int)`  
`   int getEnd()`  
`   Location getLocationInAlignment()`  
`   int getNumGaps()`  
`   Sequence`<C>getOriginalSequence()`  
`   int getOverlapCount() // if !isCircular() ? == 1 : >= 1`  
`   int getSequenceIndexAt(int)`  
`   int getStart()`  
`   boolean isCircular()`

MutableAlignedSequence


interface MutableAlignedSequence`<C extends Compound>extends AlignedSequence`<C>  
`   void setLocation(Location)`  
`   void shiftAtAlignmentLocation(Location, int)`  
`   void shiftAtSequenceLocation(Location, int)`

Profile


interface Profile`<S extends Sequence<C extends Compound>`> extends Iterable`<S>  
`   AlignedSequence`<C>getAlignedSequence(int)`  
`   AlignedSequence`<C>getAlignedSequence(S) // will find either aligned or original sequences`  
`   List`<AlignedSequence<C>`> getAlignedSequences() // unmodifiable unless class implements MutableProfile`  
`   List`<AlignedSequence<C>`> getAlignedSequences(int...) // useful for views`  
`   List`<AlignedSequence<C>`> getAlignedSequences(S...) // useful for views`  
`   C getCompoundAt(int, int)`  
`   C getCompoundAt(S, int) // will find either aligned or original sequences`  
`   List`<C>getCompoundsAt(int) // useful for views`  
`   CompoundSet`<C>getCompoundSet()`  
`   int[] getIndicesAt(int) // useful for views`  
`   int getIndexOf(C)`  
`   int getLastIndexOf(C)`  
`   int getLength() // number of columns`  
`   int getSize() // number of rows ... if !isCircular() ? == number of sequences : >= number of sequences`  
`   ProfileView`<S>getSubProfile(Location) // only include sequences that overlap Location`  
`   boolean isCircular() // if so, sequences longer than length() return multiple compounds at any location`  
`   String toString() // simple view: each sequence on 1 line`  
`   String toString(int) // formatted view: show start and end indices of profile and sequences, limited line length`

MutableProfile


interface MutableProfile`<S extends Sequence<C extends Compound>`> extends Profile`<S>  
`   // getAlignedSequences modifiable, full iterator with remove`  
`   void setSequences(List`<AlignedSequence<C>`>)`

ProfileView


interface ProfileView`<S extends Sequence<C extends Compound>`> extends Profile`<S>  
`   int getEnd()`  
`   int getStart()`  
`   Profile`<S>getViewedProfile()`  
`   String toString() // simple view`  
`   String toString(int) // formatted view`

SequencePair


interface SequencePair`<S extends Sequence<C extends Compound>`> extends Profile`<S>  
`   C getCompoundInQueryAt(int)`  
`   C getCompoundInTargetAt(int)`  
`   int getIndexInQueryAt(int)`  
`   int getIndexInQueryForTargetAt(int)`  
`   int getIndexInTargetAt(int)`  
`   int getIndexInTargetForQueryAt(int)`  
`   int getNumIdenticals()`  
`   int getNumSimilars()`  
`   AlignedSequence`<C>getQuery()`  
`   AlignedSequence`<C>getTarget()`

MutableSequencePair


interface MutableSequencePair`<S extends Sequence>extends MutableProfile`<S>`, SequencePair`<S>  
`   void setPair(AlignedSequence`<C>`, AlignedSequence`<C>`)`  
`   void setQuery(AlignedSequence`<C>`)`  
`   void setTarget(AlignedSequence`<C>`)`

AbstractSequencePair


abstract class AbstractSequencePair`<S extends Sequence>implements SequencePair`<S>  
`   float getPercentGapsQuery()`  
`   float getPercentGapsTarget()`  
`   float getPercentIdentityQuery()`  
`   float getPercentIdentitySubject()`  
`   float getPercentSimilarityQuery()`  
`   float getPercentSimilaritySubject()`

ProfilePair


interface ProfilePair`<S extends Sequence>extends Profile`<S>  
`   Profile`<S>getQuery()`  
`   Profile`<S>getTarget()`

MutableProfilePair


interface MutableProfilePair`<S extends Sequence>extends MutableProfile`<S>`, ProfilePair`<S>  
`   void setPair(Profile`<S>`, Profile`<S>`)`  
`   void setQuery(Profile`<S>`)`  
`   void setTarget(Profile`<S>`)`

Scorer


interface Scorer // resides in core module`  
`   int getMaxScore()`  
`   int getMinScore()`  
`   int getScore()`

PairwiseSequenceScorer


interface PairwiseSequenceScorer`<S extends Sequence>extends Scorer`  
`   SequencePair`<S>getPair()`

PairInProfileScorer


interface PairInProfileScorer`<S extends Sequence>extends PairwiseSequenceScorer`<S>  
`   Profile`<S>getProfile()`

ProfileProfileScorer


interface ProfileProfileScorer`<S extends Sequence>extends Scorer`  
`   ProfilePair`<S>getPair()`

Aligner


interface Aligner`<S extends Sequence>extends Scorer`  
`   long getComputationTime()`  
`   Profile`<S>getProfile()`

MatrixAligner


interface MatrixAligner`<S extends Sequence>extends Aligner`<S>  
`   short[][] getScoreMatrix()`  
`   short getScoreMatrixAt(int, int)`  
`   String getScoreMatrixAsString()`

PairwiseSequenceAligner


interface PairwiseSequenceAligner`<S extends Sequence>extends MatrixAligner`<S>`, PairwiseSequenceScorer`<S>  
`   // combines 2 interfaces`

ProfileProfileAligner


interface ProfileProfileAligner`<S extends Sequence>extends MatrixAligner`<S>`, ProfileProfileScorer`<S>  
`   // combines 2 interfaces`

RescoreRefiner


interface RescoreRefiner`<S extends Sequence>extends Aligner`<S>`, ProfileProfileScorer`<S>  
`   Class`<? extends PairInProfileScorer<S>`> getPairInProfileScorer()`  
`   Class`<? extends ProfileProfileAligner<S>`> getProfileProfileAligner()`

PartitionRefiner


interface PartitionRefiner`<S extends Sequence>extends Aligner`<S>`, ProfileProfileScorer`<S>  
`   Class`<? extends ProfileProfileAligner<S>`> getProfileProfileAligner()`

AbstractPairwiseSequenceAligner


abstract class AbstractPairwiseSequenceAligner`<S extends Sequence>implements PairwiseSequenceAligner`<S>  
`   AbstractPairwiseSequenceAligner(S, S, GapPenalty, SubstitutionMatrix)`  
`   GapPenalty getGapPenalty()`  
`   SubstitutionMatrix getSubstitutionMatrix()`

AbstractMutablePairwiseSequenceAligner


abstract class AbstractMutablePairwiseSequenceAligner`<S extends Sequence>extends AbstractPairwiseSequenceAligner`<S>implements MutablePairwiseSequenceAligner`<S>  
`   AbstractMutablePairwiseSequenceAligner()`  
`   AbstractMutablePairwiseSequenceAligner(S, S, GapPenalty, SubstitutionMatrix)`  
`   void setGapPenalty(GapPenalty)`  
`   void setSubstitutionMatrix(SubstitutionMatrix)`

org.biojava3.alignment.views

SimpleProfileView


class SimpleProfileView`<S extends Sequence>implements ProfileView`<S>  
`   SimpleProfileView(Profile`<S>`, int, int)`

CompoundCountsView


class CompoundCountsView`<S extends Sequence>extends SimpleProfileView`<S>  
`   CompoundCountsView(Profile`<S>`)`  
`   CompoundCountsView(Profile`<S>`, int, int)`  
`   int[] getCompoundCountsAt(int) // returned array is size of CompoundSet`

CompoundWeightsView


class CompoundWeightsView`<S extends Sequence>extends SimpleProfileView`<S>  
`   CompoundWeightsView(Profile`<S>`)`  
`   CompoundWeightsView(Profile`<S>`, int, int)`  
`   float[] getCompoundWeightsAt(int) // returned array is size of CompoundSet`

ConsensusView


class ConsensusView`<S extends Sequence>extends SimpleProfileView`<S>  
`   ConsensusView(Profile`<S>`)`  
`   ConsensusView(Profile`<S>`, int, int)`  
`   S getConsensusSequence()`

org.biojava3.core.util

ConcurrencyTools


class ConcurrencyTools // static utility`  
`   private ConcurrencyTools() { } // prevents instantiation`  
`   ExecutorService getThreadPool()`  
`   void setThreadPoolDefault()`  
`   void setThreadPoolSingle()`  
`   void setThreadPool(ExecutorService)`  
`   void shutdown()`  
`   void shutdownAndAwaitTermination()`  
`   Future`<T>submit(Callable`<T>`, String)`  
`   Future`<T>submit(Callable`<T>`)`  
`   // TODO: additional logging and listening services`

LoggingTools


class LoggingTools // static utility`  
`   private LoggingTools() { } // prevents instantiation`

Questions / Comments

Where should indexing start with the bio default of 1 rather than the Java standard of 0?

Please add comments here…