001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.bio.seq.db;
023
024import java.net.MalformedURLException;
025import java.net.URL;
026import java.net.URLConnection;
027
028import org.biojava.bio.BioException;
029import org.biojava.bio.seq.Sequence;
030import org.biojava.bio.seq.SequenceIterator;
031import org.biojava.bio.seq.io.SeqIOTools;
032import org.biojava.bio.seq.io.SequenceBuilderFactory;
033import org.biojava.bio.seq.io.SequenceFormat;
034import org.biojava.bio.seq.io.StreamReader;
035import org.biojava.bio.seq.io.SymbolTokenization;
036import org.biojava.bio.symbol.Alphabet;
037import org.biojava.utils.AbstractChangeable;
038import org.biojava.utils.ChangeVetoException;
039
040/**
041 * Functions for access to a web based database that returns sequences
042 * in a variety of formats.
043 *
044 * @author Jason Stajich
045 * @author Matthew Pocock
046 * @author Mark Schreiber
047 * @author Richard Holland
048 */
049
050public abstract class WebSequenceDB
051extends AbstractChangeable
052implements SequenceDBLite {
053  protected abstract SequenceFormat getSequenceFormat();
054
055  protected abstract URL getAddress(String id)
056  throws MalformedURLException;
057
058  protected abstract Alphabet getAlphabet();
059
060  /**
061   * Gets a sequence using its unique ID (eg for GenBank this would be the GI number)
062   * @param id the unique ID
063   * @return the matching sequence
064   * @throws BioException if the ID is invalid
065   * @throws BioException if the io operation times out or has problems
066   *    connecting. Can also indicate an invalid URL has been constructed.
067   */
068  public Sequence getSequence(String id)
069  throws BioException {
070    if( id.equals("") ) {
071      throw new BioException("did not specify a valid id for getSequence");
072    }
073
074    try {
075      URL queryURL = getAddress(id);
076      //System.err.println("query is "+ queryURL.toString());
077      URLConnection connection = queryURL.openConnection();
078      SequenceFormat sFormat = getSequenceFormat();
079
080//      SequenceBuilder sbuilder = new SimpleSequenceBuilder();
081//      FastaDescriptionLineParser sFact =
082//        new FastaDescriptionLineParser(sbuilder);
083
084      Alphabet alpha = getAlphabet();
085      SequenceBuilderFactory sFact = SeqIOTools.formatToFactory(sFormat,alpha);
086      SymbolTokenization rParser = alpha.getTokenization("token");
087      //System.err.println("got data from "+ queryURL);
088      SequenceIterator seqI = new StreamReader(
089        connection.getInputStream(),
090        sFormat, rParser, sFact
091      );
092
093      return seqI.nextSequence();
094    } catch ( Exception e ){
095      throw new BioException(e);
096    }
097  }
098
099  /**
100   * Not supported, You can't add sequences to a WebDB!
101   * @param seq the sequence you tried to add
102   * @throws ChangeVetoException always!
103   */
104  public void addSequence(Sequence seq)
105  throws ChangeVetoException {
106    throw new ChangeVetoException(
107      "Can't add sequences from web sequence DB: " +
108      seq.getName()
109    );
110  }
111
112  /**
113   * Not supported, you can't remove a sequence from a WebDB!
114   * @param id the sequence you tried to change.
115   * @throws ChangeVetoException always!
116   */
117  public void removeSequence(String id)
118  throws ChangeVetoException {
119    throw new ChangeVetoException(
120      "Can't remove sequences from web sequence DB: " +
121      id
122    );
123  }
124}