001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.db; 023 024import java.net.MalformedURLException; 025import java.net.URL; 026import java.net.URLConnection; 027 028import org.biojava.bio.BioException; 029import org.biojava.bio.seq.Sequence; 030import org.biojava.bio.seq.SequenceIterator; 031import org.biojava.bio.seq.io.SeqIOTools; 032import org.biojava.bio.seq.io.SequenceBuilderFactory; 033import org.biojava.bio.seq.io.SequenceFormat; 034import org.biojava.bio.seq.io.StreamReader; 035import org.biojava.bio.seq.io.SymbolTokenization; 036import org.biojava.bio.symbol.Alphabet; 037import org.biojava.utils.AbstractChangeable; 038import org.biojava.utils.ChangeVetoException; 039 040/** 041 * Functions for access to a web based database that returns sequences 042 * in a variety of formats. 043 * 044 * @author Jason Stajich 045 * @author Matthew Pocock 046 * @author Mark Schreiber 047 * @author Richard Holland 048 */ 049 050public abstract class WebSequenceDB 051extends AbstractChangeable 052implements SequenceDBLite { 053 protected abstract SequenceFormat getSequenceFormat(); 054 055 protected abstract URL getAddress(String id) 056 throws MalformedURLException; 057 058 protected abstract Alphabet getAlphabet(); 059 060 /** 061 * Gets a sequence using its unique ID (eg for GenBank this would be the GI number) 062 * @param id the unique ID 063 * @return the matching sequence 064 * @throws BioException if the ID is invalid 065 * @throws BioException if the io operation times out or has problems 066 * connecting. Can also indicate an invalid URL has been constructed. 067 */ 068 public Sequence getSequence(String id) 069 throws BioException { 070 if( id.equals("") ) { 071 throw new BioException("did not specify a valid id for getSequence"); 072 } 073 074 try { 075 URL queryURL = getAddress(id); 076 //System.err.println("query is "+ queryURL.toString()); 077 URLConnection connection = queryURL.openConnection(); 078 SequenceFormat sFormat = getSequenceFormat(); 079 080// SequenceBuilder sbuilder = new SimpleSequenceBuilder(); 081// FastaDescriptionLineParser sFact = 082// new FastaDescriptionLineParser(sbuilder); 083 084 Alphabet alpha = getAlphabet(); 085 SequenceBuilderFactory sFact = SeqIOTools.formatToFactory(sFormat,alpha); 086 SymbolTokenization rParser = alpha.getTokenization("token"); 087 //System.err.println("got data from "+ queryURL); 088 SequenceIterator seqI = new StreamReader( 089 connection.getInputStream(), 090 sFormat, rParser, sFact 091 ); 092 093 return seqI.nextSequence(); 094 } catch ( Exception e ){ 095 throw new BioException(e); 096 } 097 } 098 099 /** 100 * Not supported, You can't add sequences to a WebDB! 101 * @param seq the sequence you tried to add 102 * @throws ChangeVetoException always! 103 */ 104 public void addSequence(Sequence seq) 105 throws ChangeVetoException { 106 throw new ChangeVetoException( 107 "Can't add sequences from web sequence DB: " + 108 seq.getName() 109 ); 110 } 111 112 /** 113 * Not supported, you can't remove a sequence from a WebDB! 114 * @param id the sequence you tried to change. 115 * @throws ChangeVetoException always! 116 */ 117 public void removeSequence(String id) 118 throws ChangeVetoException { 119 throw new ChangeVetoException( 120 "Can't remove sequences from web sequence DB: " + 121 id 122 ); 123 } 124}