001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 */ 020 021 022package org.biojava.bio.proteomics; 023 024 025import java.io.InputStream; 026import java.util.Collections; 027import java.util.HashMap; 028import java.util.HashSet; 029import java.util.Map; 030import java.util.MissingResourceException; 031import java.util.Set; 032 033import javax.xml.parsers.DocumentBuilder; 034import javax.xml.parsers.DocumentBuilderFactory; 035 036import org.biojava.bio.BioError; 037import org.biojava.bio.BioException; 038import org.biojava.bio.seq.io.SymbolTokenization; 039import org.biojava.bio.symbol.AlphabetManager; 040import org.biojava.bio.symbol.FiniteAlphabet; 041import org.biojava.bio.symbol.IllegalSymbolException; 042import org.biojava.bio.symbol.SimpleSymbolList; 043import org.biojava.bio.symbol.SymbolList; 044import org.biojava.utils.ClassTools; 045import org.w3c.dom.Document; 046import org.w3c.dom.Element; 047import org.w3c.dom.Node; 048import org.w3c.dom.NodeList; 049import org.xml.sax.InputSource; 050 051/** 052 * Registry and utility methods for Proteases. 053 * @author Mark Schreiber 054 */ 055public final class ProteaseManager { 056 private static Map name2Protease = new HashMap(); 057 058 static Document doc = null; 059 060 static { 061 try { 062 InputStream tablesStream = 063 ClassTools.getClassLoader(ProteaseManager.class).getResourceAsStream( 064 "org/biojava/bio/proteomics/ProteaseManager.xml" 065 ); 066 067 if(tablesStream == null ) { 068 throw new BioException("Couldn't locate ProteaseManager.xml."); 069 } 070 071 InputSource is = new InputSource(tablesStream); 072 DocumentBuilder parser = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 073 doc = parser.parse(is); 074 075 NodeList children = doc.getDocumentElement().getChildNodes(); 076 for(int i = 0; i < children.getLength(); i++) { 077 Node cnode = (Node) children.item(i); 078 if(! (cnode instanceof Element)) { 079 continue; 080 } 081 082 Element child = (Element) cnode; 083 if(child.getNodeName().equals("protease")) { 084 085 //Parameters 086 SymbolList cleavRes = null; 087 SymbolList exceptRes = null; 088 boolean endo = false; 089 String protName = child.getAttribute("name"); 090 Protease protease = null; 091 092 NodeList proteaseNodes = child.getChildNodes(); 093 for(int j = 0; j < proteaseNodes.getLength(); j++){ 094 Node cnode2 = (Node) proteaseNodes.item(j); 095 if(! (cnode2 instanceof Element)) { 096 continue; 097 } 098 Element el = (Element) cnode2; 099 String name = el.getNodeName(); 100 String content = el.getFirstChild().getNodeValue(); 101 if(name.equals("cleaveRes")) { 102 cleavRes = createSymbolList(content.trim()); 103 }else if(name.equals("exceptRes")) { 104 exceptRes = createSymbolList(content.trim()); 105 }else if(name.equals("endo")) { 106 endo = new Boolean(content).booleanValue(); 107 } 108 109 110 if(cleavRes == null) 111 cleavRes = createSymbolList(""); 112 if(exceptRes == null){ 113 exceptRes = createSymbolList(""); 114 } 115 protease = new Protease(cleavRes ,endo, exceptRes, protName); 116 } 117 registerProtease(protease); 118 } 119 } 120 }catch (MissingResourceException mre) { 121 System.err.println(mre.getMessage()); 122 }catch(Exception e){//err 123 e.printStackTrace(); 124 } 125 } 126 127 /** 128 * Creates and registers a new Protease. In future the Protease can be recovered 129 * using the getProteaseByName() method. 130 * @param cleaveRes the cleavege residues 131 * @param endoProtease is it an endo protease? 132 * @param notCleaveRes the exceptions to the cleavage residues 133 * @param name the name of the Protease 134 * @return a reference to the new Protease 135 * @throws IllegalSymbolException if the cleaveRes or notCleaveRes are not 136 * from the PROTEIN alphabet 137 * @throws BioException if a Protease with the same name already exists. 138 */ 139 public static synchronized Protease createProtease( 140 SymbolList cleaveRes, 141 boolean endoProtease, 142 SymbolList notCleaveRes, 143 String name) throws IllegalSymbolException, BioException{ 144 145 Protease p = new Protease(cleaveRes, endoProtease, notCleaveRes, name); 146 registerProtease(p); 147 return p; 148 } 149 150 public static synchronized Protease createProtease( 151 SymbolList cleaveRes, 152 boolean endoProtease, 153 String name) throws IllegalSymbolException, BioException{ 154 155 Protease p = new Protease(cleaveRes, endoProtease, SymbolList.EMPTY_LIST, name); 156 registerProtease(p); 157 return p; 158 } 159 160 public static synchronized Protease createProtease( 161 String cleaveRes, 162 boolean endoProtease, 163 String notCleaveRes, 164 String name) throws BioException, IllegalSymbolException{ 165 166 return createProtease(createSymbolList(cleaveRes), 167 endoProtease, 168 createSymbolList(notCleaveRes), 169 name); 170 } 171 172 public static synchronized Protease createProtease( 173 String cleaveRes, 174 boolean endoProtease, 175 String name) throws BioException, IllegalSymbolException{ 176 177 return createProtease(createSymbolList(cleaveRes), 178 endoProtease, 179 SymbolList.EMPTY_LIST, 180 name); 181 } 182 183 /** 184 * Registers a protease and ensures its flyweight status 185 * @param prot the Protease to register 186 * @throws BioException if a Protease with the same name is already registered. 187 */ 188 public static synchronized void registerProtease(Protease prot)throws BioException{ 189 if(registered(prot.getName())) 190 throw new BioException( 191 "A Protease has already been registered with the name " 192 +prot.getName() 193 ); 194 195 name2Protease.put(prot.getName(), prot); 196 } 197 198 /** 199 * Gets a Protease instance by name. 200 * @param proteaseName the name of a registered Protease (case sensistive) 201 * @return a fly-weight Protease instance 202 * @throws BioException if no protease is registered by that name 203 */ 204public static Protease getProteaseByName(String proteaseName) 205 throws BioException { 206 207 Protease protease = (Protease)name2Protease.get(proteaseName); 208 if(protease == null){ 209 throw new BioException("No protease has been registered by that name"); 210 } 211 return protease; 212} 213 214/** 215 * @return an unmodifiable Set of all the registered Protease names (Strings). 216 */ 217public static Set getNames(){ 218 return Collections.unmodifiableSet(name2Protease.keySet()); 219} 220 221/** 222 * @return an unmodifiable set of all the registered Protease objects. 223 */ 224public static Set getAllProteases(){ 225 return Collections.unmodifiableSet( 226 new HashSet(name2Protease.values()) 227 ); 228} 229 230/** 231 * Has a Protease been registered with that name? 232 * @param proteaseName the query 233 * @return true if one has, false otherwise 234 */ 235public static boolean registered(String proteaseName){ 236 return name2Protease.containsKey(proteaseName); 237} 238 239/** 240 * @return a reference to the singleton instance of the ProteaseManager 241 */ 242public static synchronized ProteaseManager getInstance(){ 243 if(singletonInstance == null){ 244 singletonInstance = new ProteaseManager(); 245 } 246 return singletonInstance; 247} 248 249static private SymbolList createSymbolList(String seq) 250 251 throws IllegalSymbolException, BioException { 252 if(seq == null || seq.trim().equals("")){ 253 return SymbolList.EMPTY_LIST; 254 } 255 SymbolList sList; 256 257 FiniteAlphabet prot 258 259 = (FiniteAlphabet)AlphabetManager.alphabetForName("PROTEIN"); 260 261 262 263 SymbolTokenization tokenization = prot.getTokenization("token"); 264 265 sList = new SimpleSymbolList (tokenization, seq); 266 267 return sList; 268 269} 270 271/** 272 * @return a flywieght instance of Trypsin 273 */ 274public static Protease getTrypsin(){ 275 try { 276 return getProteaseByName(TRYPSIN); 277 } 278 catch (BioException ex) { 279 throw new BioError("Cannot retreive Trypsin, AlphabetManager.xml may be corrupted", ex); 280 } 281} 282 283/** 284 * @return a flywieght instance of Lys-C 285 */ 286public static Protease getLys_C(){ 287 try { 288 return getProteaseByName(LYS_C); 289 } 290 catch (BioException ex) { 291 throw new BioError("Cannot retreive Lys-C, AlphabetManager.xml may be corrupted", ex); 292 } 293} 294 295/** 296 * @return a flywieght instance of Arg-C 297 */ 298public static Protease getArg_C(){ 299 try { 300 return getProteaseByName(ARG_C); 301 } 302 catch (BioException ex) { 303 throw new BioError("Cannot retreive Arg-C, AlphabetManager.xml may be corrupted",ex); 304 } 305} 306 307/** 308 * @return a flywieght instance of Asp-N 309 */ 310public static Protease getAsp_N(){ 311 try { 312 return getProteaseByName(ASP_N); 313 } 314 catch (BioException ex) { 315 throw new BioError("Cannot retreive Asp-N, AlphabetManager.xml may be corrupted",ex); 316 } 317} 318 319/** 320 * @return a flywieght instance of Glu_C_bicarbonate 321 */ 322public static Protease getGlu_C_bicarbonate(){ 323 try { 324 return getProteaseByName(GLU_C_BICARB); 325 } 326 catch (BioException ex) { 327 throw new BioError("Cannot retreive Glu_C_bicarbonate, AlphabetManager.xml may be corrupted", ex); 328 } 329} 330 331/** 332 * @return a flywieght instance of Glu_C_phosphate 333 */ 334public static Protease getGlu_C_phosphate(){ 335 try { 336 return getProteaseByName(GLU_C_PHOS); 337 } 338 catch (BioException ex) { 339 throw new BioError("Cannot retreive Glu_C_phosphate, AlphabetManager.xml may be corrupted", ex); 340 } 341} 342 343/** 344 * @return a flywieght instance of Chymotrypsin 345 */ 346public static Protease getChymotrypsin(){ 347 try { 348 return getProteaseByName(CHYMOTRYP); 349 } 350 catch (BioException ex) { 351 throw new BioError("Cannot retreive Chymotrypsin, AlphabetManager.xml may be corrupted", ex); 352 } 353} 354 355/** 356 * @return a flywieght instance of CNBr 357 */ 358public static Protease getCNBr(){ 359 try { 360 return getProteaseByName(CNBr); 361 } 362 catch (BioException ex) { 363 throw new BioError("Cannot retreive CNBr, AlphabetManager.xml may be corrupted", ex); 364 } 365} 366 367private static ProteaseManager singletonInstance; 368public static final String TRYPSIN = "Trypsin"; 369public static final String LYS_C = "Lys-C"; 370public static final String ARG_C = "Arg-C"; 371public static final String ASP_N = "Asp-N"; 372public static final String GLU_C_BICARB = "Glu-C-bicarbonate"; 373public static final String GLU_C_PHOS = "Glu-C-phosphate"; 374public static final String CHYMOTRYP = "Chymotrypsin"; 375public static final String CNBr = "CNBr"; 376 377}