001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Nov 17, 2010 021 * Author: Jianjiong Gao 022 * 023 */ 024 025package org.biojava.nbio.protmod; 026 027import org.biojava.nbio.protmod.io.ProteinModificationXmlReader; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import java.io.InputStream; 032import java.util.*; 033 034/** 035 * This class serves as a instance registry by maintaining 036 * a pool of ProteinModification instances. 037 * 038 * A list of common protein modifications were preloaded 039 * from an XML file. 040 * 041 * @author Jianjiong Gao 042 * @since 3.0 043 */ 044public class ProteinModificationRegistry { 045 046 private static final Logger logger = LoggerFactory.getLogger(ProteinModificationRegistry.class); 047 048 private static Set<ProteinModification> registry = null; 049 private static Map<String, ProteinModification> byId = null; 050 private static Map<String, Set<ProteinModification>> byResidId = null; 051 private static Map<String, Set<ProteinModification>> byPsimodId = null; 052 private static Map<String, Set<ProteinModification>> byPdbccId = null; 053 private static Map<String, Set<ProteinModification>> byKeyword = null; 054 private static Map<Component, Set<ProteinModification>> byComponent = null; 055 private static Map<ModificationCategory, Set<ProteinModification>> byCategory = null; 056 private static Map<ModificationOccurrenceType, Set<ProteinModification>> byOccurrenceType = null; 057 058 private static String DIR_XML_PTM_LIST = "ptm_list.xml"; 059 060 061 062 /** 063 * register common protein modifications from XML file. 064 */ 065 private static void registerCommonProteinModifications(InputStream inStream) { 066 try { 067 068 ProteinModificationXmlReader.registerProteinModificationFromXml(inStream); 069 } catch (Exception e) { 070 logger.error("Exception: ", e); 071 } 072 } 073 074 /** 075 * Initialization the static variables and register common modifications. 076 */ 077 public static void init() { 078 lazyInit(); 079 080 } 081 082 /** Initialization the static variables and register common modifications. 083 * Allows external user to provide alternative ptm_list.xml file instead of the one contained in this jar file. 084 * 085 * @param inStream InputStream to a XML file containing the list of PTMs (as in ptm_list.xml) 086 */ 087 088 public static void init(InputStream inStream) { 089 lazyInit(inStream); 090 } 091 092 093 094 /** 095 * Lazy Initialization the static variables and register common modifications. 096 * just opens the stream to ptm_list.xml and delegates to lazyInit(InputStream) for parsing. 097 */ 098 private static synchronized void lazyInit() { 099 if (registry==null) { 100 InputStream isXml = ProteinModification.class.getResourceAsStream(DIR_XML_PTM_LIST); 101 lazyInit(isXml); 102 } 103 } 104 105 106 /** 107 * Lazy Initialization the static variables and register common modifications. 108 */ 109 private static synchronized void lazyInit(InputStream inStream) { 110 if (registry==null) { 111 112 registry = new HashSet<ProteinModification>(); 113 byId = new HashMap<String, ProteinModification>(); 114 byResidId = new HashMap<String, Set<ProteinModification>>(); 115 byPsimodId = new HashMap<String, Set<ProteinModification>>(); 116 byPdbccId = new HashMap<String, Set<ProteinModification>>(); 117 byKeyword = new HashMap<String, Set<ProteinModification>>(); 118 byComponent = new HashMap<Component, Set<ProteinModification>>(); 119 byCategory = new EnumMap<ModificationCategory, Set<ProteinModification>>( 120 ModificationCategory.class); 121 for (ModificationCategory cat:ModificationCategory.values()) { 122 byCategory.put(cat, new HashSet<ProteinModification>()); 123 } 124 byOccurrenceType = new EnumMap<ModificationOccurrenceType, Set<ProteinModification>>( 125 ModificationOccurrenceType.class); 126 for (ModificationOccurrenceType occ:ModificationOccurrenceType.values()) { 127 byOccurrenceType.put(occ, new HashSet<ProteinModification>()); 128 } 129 registerCommonProteinModifications(inStream); 130 } 131 } 132 133 /** 134 * Register a new ProteinModification. 135 */ 136 public static void register(final ProteinModification modification) { 137 if (modification==null) throw new IllegalArgumentException("modification == null!"); 138 139 lazyInit(); 140 141 String id = modification.getId(); 142 if (byId.containsKey(id)) { 143 throw new IllegalArgumentException(id+" has already been registered."); 144 } 145 146 registry.add(modification); 147 byId.put(id, modification); 148 149 ModificationCategory cat = modification.getCategory(); 150 byCategory.get(cat).add(modification); 151 152 ModificationOccurrenceType occType = modification.getOccurrenceType(); 153 byOccurrenceType.get(occType).add(modification); 154 155 156 ModificationCondition condition = modification.getCondition(); 157 List<Component> comps = condition.getComponents(); 158 for (Component comp:comps) { 159 Set<ProteinModification> mods = byComponent.get(comp); 160 if (mods==null) { 161 mods = new HashSet<ProteinModification>(); 162 byComponent.put(comp, mods); 163 } 164 mods.add(modification); 165 } 166 167 String pdbccId = modification.getPdbccId(); 168 if (pdbccId!=null) { 169 Set<ProteinModification> mods = byPdbccId.get(pdbccId); 170 if (mods==null) { 171 mods = new HashSet<ProteinModification>(); 172 byPdbccId.put(pdbccId, mods); 173 } 174 mods.add(modification); 175 } 176 177 String residId = modification.getResidId(); 178 if (residId!=null) { 179 Set<ProteinModification> mods = byResidId.get(residId); 180 if (mods==null) { 181 mods = new HashSet<ProteinModification>(); 182 byResidId.put(residId, mods); 183 } 184 mods.add(modification); 185 } 186 187 String psimodId = modification.getPsimodId(); 188 if (psimodId!=null) { 189 Set<ProteinModification> mods = byPsimodId.get(psimodId); 190 if (mods==null) { 191 mods = new HashSet<ProteinModification>(); 192 byPsimodId.put(psimodId, mods); 193 } 194 mods.add(modification); 195 } 196 197 for (String keyword : modification.getKeywords()) { 198 Set<ProteinModification> mods = byKeyword.get(keyword); 199 if (mods==null) { 200 mods = new HashSet<ProteinModification>(); 201 byKeyword.put(keyword, mods); 202 } 203 mods.add(modification); 204 } 205 } 206 207 /** 208 * Remove a modification from registry. 209 * @param mod 210 */ 211 public static void unregister(ProteinModification modification) { 212 if (modification==null) throw new IllegalArgumentException("modification == null!"); 213 214 registry.remove(modification); 215 216 byId.remove(modification.getId()); 217 218 Set<ProteinModification> mods; 219 220 mods = byResidId.get(modification.getResidId()); 221 if (mods!=null) mods.remove(modification); 222 223 mods = byPsimodId.get(modification.getPsimodId()); 224 if (mods!=null) mods.remove(modification); 225 226 mods = byPdbccId.get(modification.getPdbccId()); 227 if (mods!=null) mods.remove(modification); 228 229 for (String keyword : modification.getKeywords()) { 230 mods = byKeyword.get(keyword); 231 if (mods!=null) mods.remove(modification); 232 } 233 234 ModificationCondition condition = modification.getCondition(); 235 List<Component> comps = condition.getComponents(); 236 for (Component comp : comps) { 237 mods = byComponent.get(comp); 238 if (mods!=null) mods.remove(modification); 239 } 240 241 byCategory.get(modification.getCategory()).remove(modification); 242 byOccurrenceType.get(modification.getOccurrenceType()).remove(modification); 243 } 244 245 /** 246 * 247 * @param id modification ID. 248 * @return ProteinModification that has the corresponding ID. 249 */ 250 public static ProteinModification getById(final String id) { 251 lazyInit(); 252 return byId.get(id); 253 } 254 255 /** 256 * 257 * @param residId RESID ID. 258 * @return a set of ProteinModifications that have the RESID ID. 259 */ 260 public static Set<ProteinModification> getByResidId(final String residId) { 261 lazyInit(); 262 return byResidId.get(residId); 263 } 264 /** 265 * 266 * @param psimodId PSI-MOD ID. 267 * @return a set of ProteinModifications that have the PSI-MOD ID. 268 */ 269 public static Set<ProteinModification> getByPsimodId(final String psimodId) { 270 lazyInit(); 271 return byPsimodId.get(psimodId); 272 } 273 274 /** 275 * 276 * @param pdbccId Protein Data Bank Chemical Component ID. 277 * @return a set of ProteinModifications that have the PDBCC ID. 278 */ 279 public static Set<ProteinModification> getByPdbccId(final String pdbccId) { 280 lazyInit(); 281 return byPdbccId.get(pdbccId); 282 } 283 284 /** 285 * 286 * @param keyword a keyword. 287 * @return a set of ProteinModifications that have the keyword. 288 */ 289 public static Set<ProteinModification> getByKeyword(final String keyword) { 290 lazyInit(); 291 return byKeyword.get(keyword); 292 } 293 294 /** 295 * Get ProteinModifications that involves one or more components. 296 * @param comp1 a {@link Component}. 297 * @param comps other {@link Component}s. 298 * @return a set of ProteinModifications that involves all the components. 299 */ 300 public static Set<ProteinModification> getByComponent(final Component comp1, 301 final Component... comps) { 302 lazyInit(); 303 Set<ProteinModification> mods = byComponent.get(comp1); 304 if (mods==null) { 305 return Collections.emptySet(); 306 } 307 308 if (comps.length==0) { 309 return Collections.unmodifiableSet(mods); 310 } else { 311 Set<ProteinModification> ret = new HashSet<ProteinModification>(mods); 312 for (Component comp:comps) { 313 mods = byComponent.get(comp); 314 if (mods==null) { 315 return Collections.emptySet(); 316 } else { 317 ret.retainAll(mods); 318 } 319 } 320 321 return ret; 322 } 323 } 324 325 /** 326 * 327 * @return set of all registered ProteinModifications. 328 */ 329 public static Set<ProteinModification> allModifications() { 330 lazyInit(); 331 return Collections.unmodifiableSet(registry); 332 } 333 334 /** 335 * 336 * @param cat {@link ModificationCategory}. 337 * @return set of registered ProteinModifications in a particular category. 338 */ 339 public static Set<ProteinModification> getByCategory(final ModificationCategory cat) { 340 lazyInit(); 341 Set<ProteinModification> ret = byCategory.get(cat); 342 return Collections.unmodifiableSet(ret); 343 } 344 345 /** 346 * 347 * @param occ {@link ModificationOccurrenceType}. 348 * @return set of registered ProteinModifications of a particular occurrence type. 349 */ 350 public static Set<ProteinModification> getByOccurrenceType(final ModificationOccurrenceType occ) { 351 lazyInit(); 352 Set<ProteinModification> ret = byOccurrenceType.get(occ); 353 return Collections.unmodifiableSet(ret); 354 } 355 356 /** 357 * 358 * @return set of IDs of all registered ProteinModifications. 359 */ 360 public static Set<String> allIds() { 361 lazyInit(); 362 Set<String> ret = byId.keySet(); 363 return Collections.unmodifiableSet(ret); 364 } 365 366 /** 367 * 368 * @return set of PDBCC IDs of all registered ProteinModifications. 369 */ 370 public static Set<String> allPdbccIds() { 371 lazyInit(); 372 Set<String> ret = byPdbccId.keySet(); 373 return Collections.unmodifiableSet(ret); 374 } 375 376 /** 377 * 378 * @return set of RESID IDs of all registered ProteinModifications. 379 */ 380 public static Set<String> allResidIds() { 381 lazyInit(); 382 Set<String> ret = byResidId.keySet(); 383 return Collections.unmodifiableSet(ret); 384 } 385 386 /** 387 * 388 * @return set of PSI-MOD IDs of all registered ProteinModifications. 389 */ 390 public static Set<String> allPsimodIds() { 391 lazyInit(); 392 Set<String> ret = byPsimodId.keySet(); 393 return Collections.unmodifiableSet(ret); 394 } 395 396 /** 397 * 398 * @return set of components involved in all registered ProteinModifications. 399 */ 400 public static Set<Component> allComponents() { 401 lazyInit(); 402 Set<Component> ret = byComponent.keySet(); 403 return Collections.unmodifiableSet(ret); 404 } 405 406 /** 407 * 408 * @return set of keywords of all registered ProteinModifications. 409 */ 410 public static Set<String> allKeywords() { 411 lazyInit(); 412 Set<String> ret = byKeyword.keySet(); 413 return Collections.unmodifiableSet(ret); 414 } 415 416 417}