001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022
023package org.biojava.bio.dp;
024
025import org.biojava.bio.BioException;
026import org.biojava.bio.seq.Sequence;
027import org.biojava.bio.seq.SequenceIterator;
028import org.biojava.bio.seq.db.SequenceDB;
029import org.biojava.bio.symbol.IllegalAlphabetException;
030import org.biojava.bio.symbol.IllegalSymbolException;
031import org.biojava.bio.symbol.SymbolList;
032
033/**
034 * An abstract implementation of TrainingAlgorithm that provides a framework
035 * for plugging in per-cycle code for parameter optimization.
036 *
037 * @author Matthew Pocock
038 * @author Thomas Down
039 */
040public abstract class AbstractTrainer implements TrainingAlgorithm {
041  private DP dp;
042
043  private double lastScore = -Double.NEGATIVE_INFINITY;
044  private double currentScore = -Double.NEGATIVE_INFINITY;
045  private int cycle;
046
047  public double getLastScore() {
048    return lastScore;
049  }
050
051  public double getCurrentScore() {
052    return currentScore;
053  }
054
055  public int getCycle() {
056    return cycle;
057  }
058
059  public DP getDP() {
060    return dp;
061  }
062
063  protected abstract double singleSequenceIteration(ModelTrainer trainer,
064                                                    SymbolList symList)
065  throws IllegalSymbolException, IllegalTransitionException, IllegalAlphabetException;
066
067  /**
068   * Trains the sequences in db until stopper says to finnish.
069   */
070  public void train(
071    SequenceDB db,
072    double nullModelWeight,
073    StoppingCriteria stopper
074  ) throws IllegalSymbolException, BioException {
075    try {
076      ModelTrainer trainer = new SimpleModelTrainer();
077      trainer.setNullModelWeight(nullModelWeight);
078      trainer.registerModel(dp.getModel());
079
080      do {
081        cycle++;
082        lastScore = currentScore;
083        currentScore = 0.0;
084        for(SequenceIterator si = db.sequenceIterator(); si.hasNext(); ) {
085          Sequence seq = si.nextSequence();
086          currentScore += singleSequenceIteration(trainer, seq);
087        }
088        trainer.train();
089        trainer.clearCounts();
090      } while(!stopper.isTrainingComplete(this));
091    } catch (Exception e) {
092      throw new BioException("Unable to train", e);
093    }
094  }
095
096  public AbstractTrainer(DP dp) {
097    this.dp = dp;
098  }
099
100  protected AbstractTrainer() {}
101}