001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.bio.seq.db; 022 023import java.net.MalformedURLException; 024import java.net.URL; 025 026import org.biojava.bio.BioException; 027import org.biojava.bio.BioRuntimeException; 028import org.biojava.bio.seq.DNATools; 029import org.biojava.bio.seq.ProteinTools; 030import org.biojava.bio.seq.io.FastaFormat; 031import org.biojava.bio.seq.io.GenbankFormat; 032import org.biojava.bio.seq.io.SequenceFormat; 033import org.biojava.bio.symbol.Alphabet; 034 035/** 036 * @author Matthew Pocock 037 * @author Mark Schreiber 038 */ 039public class NCBISequenceDB 040extends WebSequenceDB { 041 private String server; 042 private String CGI; 043 private SequenceFormat format; 044 private String dataBase; 045 private Alphabet alpha; 046 private String formatName; 047 048 public static final String DB_NUCLEOTIDE = "nucleotide"; 049 public static final String DB_PROTEIN = "protein"; 050 051 052 /** 053 * Default constructor, querys the Genbank nucleotide database on "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi" 054 * and retrieves sequences in FastaFormat. 055 */ 056 public NCBISequenceDB(){ 057 this("http://www.ncbi.nlm.nih.gov/","entrez/query.fcgi",DB_NUCLEOTIDE,new FastaFormat()); 058 } 059 060 /** 061 * Parameterized constructor 062 * @param database must be one of "nucleotide" or "protein" (use the static DB fields) 063 * @param format must be one of <code>GenbankFormat</code> or <code>FastaFormat</code> 064 * @throws BioRuntimeException if the database or format is invalid 065 */ 066 public NCBISequenceDB(String database, SequenceFormat format){ 067 this("http://www.ncbi.nlm.nih.gov/","entrez/query.fcgi",database,format); 068 } 069 070 /** 071 * Parameterized constructor 072 * @param server eg "http://www.ncbi.nlm.nih.gov/" 073 * @param CGI eg "entrez/query.fcgi" 074 * @param database must be one of "nucleotide" or "protein" (use the static DB fields) 075 * @param format must be one of <code>GenbankFormat</code> or <code>FastaFormat</code> 076 * @throws BioRuntimeException if the database or format is invalid 077 */ 078 public NCBISequenceDB(String server, String CGI, String database, SequenceFormat format) 079 throws BioRuntimeException{ 080 081 this.server = server; 082 this.CGI = CGI; 083 try { 084 setDatabase(database); 085 } 086 catch (BioException ex) { 087 throw new BioRuntimeException( 088 "Database format must be one of {nucleotide, protein}"); 089 } 090 try { 091 setSequenceFormat(format); 092 } 093 catch (BioException ex) { 094 throw new BioRuntimeException( 095 "SequenceFormat object must be one of {FastaFormat, GenbankFormat}"); 096 } 097 098 } 099 100 101 public String getDataBase(){ 102 return dataBase; 103 } 104 105 /** 106 * 107 * @param dataBase must be one of "nucleotide" or "protein" (use the static DB fields) 108 * @throws BioException if an unknown database name is used. 109 */ 110 public void setDatabase(String dataBase) throws BioException{ 111 if (dataBase == DB_NUCLEOTIDE) { 112 113 this.dataBase = DB_NUCLEOTIDE; 114 this.alpha = DNATools.getDNA(); 115 116 } 117 else if (dataBase == DB_PROTEIN) { 118 119 this.dataBase = DB_PROTEIN; 120 this.alpha = ProteinTools.getAlphabet(); 121 122 } 123 else { 124 125 throw new BioException( 126 "Database format must be one of {nucleotide, protein}"); 127 128 } 129 130 } 131 132 public SequenceFormat getSequenceFormat() { 133 return format; 134 } 135 136 /** 137 * 138 * @param format must be one of <code>FastaFormat</code> or <code>GenbankFormat</code> 139 * @throws BioException if an unknown <code>SequenceFormat</code> is used 140 */ 141 public void setSequenceFormat(SequenceFormat format) throws BioException{ 142 143 if (format instanceof FastaFormat ) { 144 this.format = format; 145 this.formatName = "FASTA"; 146 } 147 else if(format instanceof GenbankFormat){ 148 this.format = format; 149 if (alpha == DNATools.getDNA()) { 150 this.formatName = "GenBank"; 151 } 152 else { 153 this.formatName = "GenPept"; 154 } 155 156 } 157 else { 158 throw new BioException("Only Genbank and FASTA formats currently supported"); 159 } 160 } 161 162 protected Alphabet getAlphabet() { 163 return alpha; 164 } 165 166 protected URL getAddress(String uid) 167 throws MalformedURLException { 168 String query = "cmd=text&db="+dataBase+"&uid="+uid+"&dopt="+formatName; 169 170 return new URL(server + CGI + "?" + query); 171 } 172 173 public String getName() { 174 return "NCBI-Genbank"; 175 } 176}