001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 */
020
021
022package org.biojava.bio.proteomics;
023
024
025import java.io.InputStream;
026import java.util.Collections;
027import java.util.HashMap;
028import java.util.HashSet;
029import java.util.Map;
030import java.util.MissingResourceException;
031import java.util.Set;
032
033import javax.xml.parsers.DocumentBuilder;
034import javax.xml.parsers.DocumentBuilderFactory;
035
036import org.biojava.bio.BioError;
037import org.biojava.bio.BioException;
038import org.biojava.bio.seq.io.SymbolTokenization;
039import org.biojava.bio.symbol.AlphabetManager;
040import org.biojava.bio.symbol.FiniteAlphabet;
041import org.biojava.bio.symbol.IllegalSymbolException;
042import org.biojava.bio.symbol.SimpleSymbolList;
043import org.biojava.bio.symbol.SymbolList;
044import org.biojava.utils.ClassTools;
045import org.w3c.dom.Document;
046import org.w3c.dom.Element;
047import org.w3c.dom.Node;
048import org.w3c.dom.NodeList;
049import org.xml.sax.InputSource;
050
051/**
052 * Registry and utility methods for Proteases.
053 * @author Mark Schreiber
054 */
055public final class ProteaseManager {
056  private static Map name2Protease = new HashMap();
057
058  static Document doc = null;
059
060  static {
061    try {
062      InputStream tablesStream =
063         ClassTools.getClassLoader(ProteaseManager.class).getResourceAsStream(
064            "org/biojava/bio/proteomics/ProteaseManager.xml"
065          );
066
067      if(tablesStream == null ) {
068        throw new BioException("Couldn't locate ProteaseManager.xml.");
069      }
070
071      InputSource is = new InputSource(tablesStream);
072      DocumentBuilder parser = DocumentBuilderFactory.newInstance().newDocumentBuilder();
073      doc = parser.parse(is);
074
075      NodeList children = doc.getDocumentElement().getChildNodes();
076      for(int i = 0; i < children.getLength(); i++) {
077        Node cnode = (Node) children.item(i);
078        if(! (cnode instanceof Element)) {
079          continue;
080        }
081
082        Element child = (Element) cnode;
083        if(child.getNodeName().equals("protease")) {
084
085          //Parameters
086          SymbolList cleavRes = null;
087          SymbolList exceptRes = null;
088          boolean endo = false;
089          String protName = child.getAttribute("name");
090          Protease protease = null;
091
092          NodeList proteaseNodes = child.getChildNodes();
093          for(int j = 0; j < proteaseNodes.getLength(); j++){
094            Node cnode2 = (Node) proteaseNodes.item(j);
095            if(! (cnode2 instanceof Element)) {
096              continue;
097            }
098            Element el = (Element) cnode2;
099            String name = el.getNodeName();
100            String content = el.getFirstChild().getNodeValue();
101            if(name.equals("cleaveRes")) {
102              cleavRes = createSymbolList(content.trim());
103            }else if(name.equals("exceptRes")) {
104              exceptRes = createSymbolList(content.trim());
105            }else if(name.equals("endo")) {
106              endo = new Boolean(content).booleanValue();
107            }
108
109
110            if(cleavRes == null)
111              cleavRes = createSymbolList("");
112            if(exceptRes == null){
113              exceptRes = createSymbolList("");
114            }
115            protease = new Protease(cleavRes ,endo, exceptRes, protName);
116          }
117          registerProtease(protease);
118        }
119      }
120    }catch (MissingResourceException mre) {
121      System.err.println(mre.getMessage());
122    }catch(Exception e){//err
123      e.printStackTrace();
124    }
125  }
126
127  /**
128   * Creates and registers a new Protease. In future the Protease can be recovered
129   * using the getProteaseByName() method.
130   * @param cleaveRes the cleavege residues
131   * @param endoProtease is it an endo protease?
132   * @param notCleaveRes the exceptions to the cleavage residues
133   * @param name the name of the Protease
134   * @return a reference to the new Protease
135   * @throws IllegalSymbolException if the cleaveRes or notCleaveRes are not
136   * from the PROTEIN alphabet
137   * @throws BioException if a Protease with the same name already exists.
138   */
139  public static synchronized Protease createProtease(
140      SymbolList cleaveRes,
141      boolean endoProtease,
142      SymbolList notCleaveRes,
143      String name) throws IllegalSymbolException, BioException{
144
145    Protease p = new Protease(cleaveRes, endoProtease, notCleaveRes, name);
146    registerProtease(p);
147    return p;
148  }
149
150  public static synchronized Protease createProtease(
151      SymbolList cleaveRes,
152      boolean endoProtease,
153      String name) throws IllegalSymbolException, BioException{
154
155    Protease p = new Protease(cleaveRes, endoProtease, SymbolList.EMPTY_LIST, name);
156    registerProtease(p);
157    return p;
158  }
159
160  public static synchronized Protease createProtease(
161      String cleaveRes,
162      boolean endoProtease,
163      String notCleaveRes,
164      String name) throws BioException, IllegalSymbolException{
165
166    return createProtease(createSymbolList(cleaveRes),
167                          endoProtease,
168                          createSymbolList(notCleaveRes),
169                          name);
170  }
171
172  public static synchronized Protease createProtease(
173      String cleaveRes,
174      boolean endoProtease,
175      String name) throws BioException, IllegalSymbolException{
176
177    return createProtease(createSymbolList(cleaveRes),
178                          endoProtease,
179                          SymbolList.EMPTY_LIST,
180                          name);
181  }
182
183  /**
184   * Registers a protease and ensures its flyweight status
185   * @param prot the Protease to register
186   * @throws BioException if a Protease with the same name is already registered.
187   */
188  public static synchronized void registerProtease(Protease prot)throws BioException{
189    if(registered(prot.getName()))
190       throw new BioException(
191           "A Protease has already been registered with the name "
192           +prot.getName()
193       );
194
195    name2Protease.put(prot.getName(), prot);
196  }
197
198  /**
199   * Gets a Protease instance by name.
200   * @param proteaseName the name of a registered Protease (case sensistive)
201   * @return a fly-weight Protease instance
202   * @throws BioException if no protease is registered by that name
203   */
204public static Protease getProteaseByName(String proteaseName)
205                             throws BioException {
206
207    Protease protease = (Protease)name2Protease.get(proteaseName);
208    if(protease == null){
209      throw new BioException("No protease has been registered by that name");
210    }
211    return protease;
212}
213
214/**
215 * @return an unmodifiable Set of all the registered Protease names (Strings).
216 */
217public static Set getNames(){
218  return Collections.unmodifiableSet(name2Protease.keySet());
219}
220
221/**
222 * @return an unmodifiable set of all the registered Protease objects.
223 */
224public static Set getAllProteases(){
225  return Collections.unmodifiableSet(
226      new HashSet(name2Protease.values())
227  );
228}
229
230/**
231 * Has a Protease been registered with that name?
232 * @param proteaseName the query
233 * @return true if one has, false otherwise
234 */
235public static boolean registered(String proteaseName){
236  return name2Protease.containsKey(proteaseName);
237}
238
239/**
240 * @return a reference to the singleton instance of the ProteaseManager
241 */
242public static synchronized ProteaseManager getInstance(){
243  if(singletonInstance == null){
244    singletonInstance = new ProteaseManager();
245  }
246  return singletonInstance;
247}
248
249static private SymbolList createSymbolList(String seq)
250
251                              throws IllegalSymbolException, BioException {
252    if(seq == null || seq.trim().equals("")){
253      return SymbolList.EMPTY_LIST;
254    }
255    SymbolList sList;
256
257    FiniteAlphabet prot
258
259             = (FiniteAlphabet)AlphabetManager.alphabetForName("PROTEIN");
260
261
262
263    SymbolTokenization tokenization = prot.getTokenization("token");
264
265    sList = new SimpleSymbolList (tokenization, seq);
266
267    return sList;
268
269}
270
271/**
272 * @return a flywieght instance of Trypsin
273 */
274public static Protease getTrypsin(){
275  try {
276    return getProteaseByName(TRYPSIN);
277  }
278  catch (BioException ex) {
279    throw new BioError("Cannot retreive Trypsin, AlphabetManager.xml may be corrupted", ex);
280  }
281}
282
283/**
284 * @return a flywieght instance of Lys-C
285 */
286public static Protease getLys_C(){
287  try {
288    return getProteaseByName(LYS_C);
289  }
290  catch (BioException ex) {
291    throw new BioError("Cannot retreive Lys-C, AlphabetManager.xml may be corrupted", ex);
292  }
293}
294
295/**
296 * @return a flywieght instance of Arg-C
297 */
298public static Protease getArg_C(){
299  try {
300    return getProteaseByName(ARG_C);
301  }
302  catch (BioException ex) {
303    throw new BioError("Cannot retreive Arg-C, AlphabetManager.xml may be corrupted",ex);
304  }
305}
306
307/**
308 * @return a flywieght instance of Asp-N
309 */
310public static Protease getAsp_N(){
311  try {
312    return getProteaseByName(ASP_N);
313  }
314  catch (BioException ex) {
315    throw new BioError("Cannot retreive Asp-N, AlphabetManager.xml may be corrupted",ex);
316  }
317}
318
319/**
320 * @return a flywieght instance of Glu_C_bicarbonate
321 */
322public static Protease getGlu_C_bicarbonate(){
323  try {
324    return getProteaseByName(GLU_C_BICARB);
325  }
326  catch (BioException ex) {
327    throw new BioError("Cannot retreive Glu_C_bicarbonate, AlphabetManager.xml may be corrupted", ex);
328  }
329}
330
331/**
332 * @return a flywieght instance of Glu_C_phosphate
333 */
334public static Protease getGlu_C_phosphate(){
335  try {
336    return getProteaseByName(GLU_C_PHOS);
337  }
338  catch (BioException ex) {
339    throw new BioError("Cannot retreive Glu_C_phosphate, AlphabetManager.xml may be corrupted", ex);
340  }
341}
342
343/**
344 * @return a flywieght instance of Chymotrypsin
345 */
346public static Protease getChymotrypsin(){
347  try {
348    return getProteaseByName(CHYMOTRYP);
349  }
350  catch (BioException ex) {
351    throw new BioError("Cannot retreive Chymotrypsin, AlphabetManager.xml may be corrupted", ex);
352  }
353}
354
355/**
356 * @return a flywieght instance of CNBr
357 */
358public static Protease getCNBr(){
359  try {
360    return getProteaseByName(CNBr);
361  }
362  catch (BioException ex) {
363    throw new BioError("Cannot retreive CNBr, AlphabetManager.xml may be corrupted", ex);
364  }
365}
366
367private static ProteaseManager singletonInstance;
368public static final String TRYPSIN = "Trypsin";
369public static final String LYS_C = "Lys-C";
370public static final String ARG_C = "Arg-C";
371public static final String ASP_N = "Asp-N";
372public static final String GLU_C_BICARB = "Glu-C-bicarbonate";
373public static final String GLU_C_PHOS = "Glu-C-phosphate";
374public static final String CHYMOTRYP = "Chymotrypsin";
375public static final String CNBr = "CNBr";
376
377}