001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021/** 022 * 023 */ 024package org.biojava.nbio.structure; 025 026import org.biojava.nbio.structure.align.util.URLConnectionTools; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029import org.xml.sax.*; 030import org.xml.sax.helpers.DefaultHandler; 031 032import javax.xml.parsers.ParserConfigurationException; 033import javax.xml.parsers.SAXParser; 034import javax.xml.parsers.SAXParserFactory; 035import java.io.BufferedReader; 036import java.io.IOException; 037import java.io.InputStream; 038import java.io.InputStreamReader; 039import java.net.URL; 040import java.util.*; 041 042/** 043 * Methods for getting the status of a PDB file (current, obsolete, etc) 044 * and for accessing different versions of the structure. 045 * 046 * <p>All methods query the 047 * <a href="http://www.rcsb.org/pdb/rest/idStatus?structureId=1HHB,3HHB,4HHB"> 048 * PDB website.</a> 049 * 050 * <p>PDB supersessions form a directed acyclic graph, where edges point from an 051 * obsolete ID to the entry that directly superseded it. For example, here are 052 * edges from one portion of the graph:<br/> 053 * 054 * 1CAT -> 3CAT<br/> 055 * 3CAT -> 7CAT<br/> 056 * 3CAT -> 8CAT<br/> 057 * 058 * <p>The methods {@link #getReplaces(String, boolean) getReplaces(pdbId, false)}/ 059 * {@link #getReplacement(String, boolean, boolean) getReplacement(pdbId, false, true)} 060 * just get the incoming/outgoing edges for a single node. The recursive versions 061 * ({@link #getReplaces(String, boolean) getReplaces(pdbId, true)}, 062 * {@link #getReplacement(String, boolean, boolean) getReplacement(pdbId, true, false)}) 063 * will do a depth-first search up/down the tree and return a list of all nodes ] 064 * reached. 065 * 066 * <p>Finally, the getCurrent() method returns a single PDB ID from among the 067 * results of 068 * {@link #getReplacement(String, boolean) getReplacement(pdbId, true)}. 069 * To be consistent with the old REST ordering, this is the PDB ID that occurs 070 * last alphabetically. 071 * 072 * <p>Results are cached to reduce server load. 073 * 074 * @author Spencer Bliven <sbliven@ucsd.edu> 075 * @author Amr AL-Hossary 076 * @since 3.0.2 077 */ 078public class PDBStatus { 079 080 private static final Logger logger = LoggerFactory.getLogger(PDBStatus.class); 081 082 public static final String DEFAULT_PDB_SERVER = "www.rcsb.org"; 083 public static final String PDB_SERVER_PROPERTY = "PDB.SERVER"; 084 085 /** 086 * saves the returned results for further use. 087 * 088 */ 089 //TODO Use SoftReferences to allow garbage collection 090 private static Map<String, Map<String, String>> recordsCache= new Hashtable<String, Map<String, String>>(); 091 092 /** 093 * Represents the status of PDB IDs. 'OBSOLETE' and 'CURRENT' are the most 094 * common. 095 * @author Spencer Bliven <sbliven@ucsd.edu> 096 * 097 */ 098 public enum Status { 099 OBSOLETE, 100 CURRENT, 101 AUTH, 102 HOLD, 103 HPUB, 104 POLC, 105 PROC, 106 REFI, 107 REPL, 108 WAIT, 109 WDRN, 110 MODEL, 111 UNKNOWN; 112 113 114 /** 115 * 116 * @param statusStr 117 * @return 118 * @throws IllegalArgumentException If the string is not recognized 119 */ 120 public static Status fromString(String statusStr) { 121 Status status; 122 String statusStrUpper = statusStr.toUpperCase(); 123 if(statusStrUpper.equalsIgnoreCase("OBSOLETE")) 124 status = Status.OBSOLETE; 125 else if(statusStrUpper.equalsIgnoreCase("CURRENT")) 126 status = Status.CURRENT; 127 else if(statusStrUpper.equalsIgnoreCase("AUTH")) 128 status = Status.AUTH; 129 else if(statusStrUpper.equalsIgnoreCase("HOLD")) 130 status = Status.HOLD; 131 else if(statusStrUpper.equalsIgnoreCase("HPUB")) 132 status = Status.HPUB; 133 else if(statusStrUpper.equalsIgnoreCase("POLC")) 134 status = Status.POLC; 135 else if(statusStrUpper.equalsIgnoreCase("PROC")) 136 status = Status.PROC; 137 else if(statusStrUpper.equalsIgnoreCase("REFI")) 138 status = Status.REFI; 139 else if(statusStrUpper.equalsIgnoreCase("REPL")) 140 status = Status.REPL; 141 else if(statusStrUpper.equalsIgnoreCase("WAIT")) 142 status = Status.WAIT; 143 else if(statusStrUpper.equalsIgnoreCase("WDRN")) 144 status = Status.WDRN; 145 else if(statusStrUpper.equalsIgnoreCase("MODEL")) 146 status = Status.MODEL; 147 else if(statusStrUpper.equalsIgnoreCase("UNKNOWN")) 148 status = Status.UNKNOWN; 149 else { 150 throw new IllegalArgumentException("Unable to parse status '"+statusStrUpper+"'."); 151 } 152 return status; 153 } 154 } 155 156 /** 157 * Get the status of the PDB in question. 158 * 159 * @param pdbId 160 * @return The status, or null if an error occurred. 161 */ 162 public static Status getStatus(String pdbId) { 163 Status[] statuses = getStatus(new String[] {pdbId}); 164 if(statuses != null) { 165 assert(statuses.length == 1); 166 return statuses[0]; 167 } else { 168 return null; 169 } 170 } 171 172 /** 173 * Get the status of the a collection of PDBs in question in a single query. 174 * 175 * @see #getStatus(String) 176 * @param pdbIds 177 * @return The status array, or null if an error occurred. 178 */ 179 public static Status[] getStatus(String[] pdbIds) { 180 Status[] statuses = new Status[pdbIds.length]; 181 182 List<Map<String,String>> attrList = getStatusIdRecords(pdbIds); 183 //Expect a single record 184 if(attrList == null || attrList.size() != pdbIds.length) { 185 logger.error("Error getting Status for {} from the PDB website.", Arrays.toString(pdbIds)); 186 return null; 187 } 188 189 190 for(int pdbNum = 0;pdbNum<pdbIds.length;pdbNum++) { 191 //Locate first element of attrList with matching structureId. 192 //attrList is usually short, so don't worry about performance 193 boolean foundAttr = false; 194 for( Map<String,String> attrs : attrList) { 195 196 //Check that the record matches pdbId 197 String id = attrs.get("structureId"); 198 if(id == null || !id.equalsIgnoreCase(pdbIds[pdbNum])) { 199 continue; 200 } 201 202 //Check that the status is given 203 String statusStr = attrs.get("status"); 204 Status status = null; 205 if(statusStr == null ) { 206 logger.error("No status returned for {}", pdbIds[pdbNum]); 207 statuses[pdbNum] = null; 208 } else { 209 status = Status.fromString(statusStr); 210 } 211 212 if(status == null) { 213 logger.error("Unknown status '{}'", statusStr); 214 statuses[pdbNum] = null; 215 } 216 217 statuses[pdbNum] = status; 218 foundAttr = true; 219 } 220 if(!foundAttr) { 221 logger.error("No result found for {}", pdbIds[pdbNum]); 222 statuses[pdbNum] = null; 223 } 224 } 225 226 return statuses; 227 } 228 229 /** 230 * Gets the current version of a PDB ID. This is equivalent to selecting 231 * the first element from 232 * {@link #getReplacement(String,boolean,boolean) 233 * 234 * @param oldPdbId 235 * @return The replacement for oldPdbId, or null if none are found or if an error occurred. 236 */ 237 public static String getCurrent(String oldPdbId) { 238 List<String> replacements = getReplacement(oldPdbId,true, false); 239 if(replacements != null && !replacements.isEmpty()) 240 return replacements.get(0); 241 else 242 return null; 243 } 244 245 /** 246 * Gets the PDB which superseded oldPdbId. For CURRENT IDs, this will 247 * be itself. For obsolete IDs, the behavior depends on the recursion 248 * parameter. If false, only IDs which directly supersede oldPdbId are 249 * returned. If true, the replacements for obsolete records are recursively 250 * fetched, yielding a list of all current replacements of oldPdbId. 251 * 252 * 253 * 254 * @param oldPdbId A pdb ID 255 * @param recurse Indicates whether the replacements for obsolete records 256 * should be fetched. 257 * @param includeObsolete Indicates whether obsolete records should be 258 * included in the results. 259 * @return The PDB which replaced oldPdbId. This may be oldPdbId itself, for 260 * current records. A return value of null indicates that the ID has 261 * been removed from the PDB or that an error has occurred. 262 */ 263 public static List<String> getReplacement(String oldPdbId, boolean recurse, boolean includeObsolete) { 264 List<Map<String,String>> attrList = getStatusIdRecords(new String[] {oldPdbId}); 265 //Expect a single record 266 if(attrList == null || attrList.size() != 1) { 267 logger.error("Error getting Status for {} from the PDB website.", oldPdbId); 268 return null; 269 } 270 271 Map<String,String> attrs = attrList.get(0); 272 273 //Check that the record matches pdbId 274 String id = attrs.get("structureId"); 275 if(id == null || !id.equalsIgnoreCase(oldPdbId)) { 276 logger.error("Results returned from the query don't match {}", oldPdbId); 277 return null; 278 } 279 280 //Check that the status is given 281 String statusStr = attrs.get("status"); 282 if(statusStr == null ) { 283 logger.error("No status returned for {}", oldPdbId); 284 return null; 285 } 286 287 Status status = Status.fromString(statusStr); 288 if(status == null ) { 289 logger.error("Unknown status '{}'", statusStr); 290 return null; 291 } 292 293 // If we're current, just return 294 LinkedList<String> results = new LinkedList<String>(); 295 switch(status) { 296 case CURRENT: 297 results.add(oldPdbId); 298 return results; 299 case OBSOLETE: { 300 String replacementStr = attrs.get("replacedBy"); 301 if(replacementStr == null) { 302 logger.error("{} is OBSOLETE but lacks a replacedBy attribute.", oldPdbId); 303 return null; 304 } 305 replacementStr = replacementStr.toUpperCase(); 306 //include this result 307 if(includeObsolete) { 308 results.add(oldPdbId); 309 } 310 // Some PDBs are not replaced. 311 if(replacementStr.equals("NONE")) { 312 return results; //empty 313 } 314 315 String[] replacements = replacementStr.split(" "); 316 Arrays.sort(replacements, new Comparator<String>() { 317 @Override 318 public int compare(String o1, String o2) { 319 return o2.compareToIgnoreCase(o1); 320 } 321 }); 322 for(String replacement : replacements) { 323 324 // Return the replacement. 325 if(recurse) { 326 List<String> others = PDBStatus.getReplacement(replacement, recurse, includeObsolete); 327 mergeReversed(results,others); 328 } 329 else { 330 if(includeObsolete) { 331 mergeReversed(results,Arrays.asList(replacement)); 332 } else { 333 // check status of replacement 334 Status replacementStatus = getStatus(replacement); 335 switch(replacementStatus) { 336 case OBSOLETE: 337 //ignore obsolete 338 break; 339 case CURRENT: 340 default: 341 // include it 342 mergeReversed(results,Arrays.asList(replacement)); 343 } 344 } 345 } 346 } 347 348 349 return results; 350 } 351 case UNKNOWN: 352 return null; 353 default: { //TODO handle other cases explicitly. They might have other syntax than "replacedBy" 354 String replacementStr = attrs.get("replacedBy"); 355 356 if(replacementStr == null) { 357 // If no "replacedBy" attribute, treat like we're current 358 // TODO is this correct? 359 results.add(oldPdbId); 360 return results; 361 } 362 363 replacementStr = replacementStr.toUpperCase(); 364 // Some PDBs are not replaced. 365 if(replacementStr.equals("NONE")) { 366 return null; 367 } 368 369 370 //include this result, since it's not obsolete 371 results.add(oldPdbId); 372 373 String[] replacements = replacementStr.split(" "); 374 Arrays.sort(replacements, new Comparator<String>() { 375 @Override 376 public int compare(String o1, String o2) { 377 return o2.compareToIgnoreCase(o1); 378 } 379 }); 380 for(String replacement : replacements) { 381 382 // Return the replacement. 383 if(recurse) { 384 List<String> others = PDBStatus.getReplacement(replacement, recurse, includeObsolete); 385 mergeReversed(results,others); 386 } 387 else { 388 mergeReversed(results,Arrays.asList(replacement)); 389 } 390 } 391 392 393 return results; 394 } 395 } 396 } 397 398 /** 399 * Takes two reverse sorted lists of strings and merges the second into the 400 * first. Duplicates are removed. 401 * 402 * @param merged A reverse sorted list. Modified by this method to contain 403 * the contents of other. 404 * @param other A reverse sorted list. Not modified. 405 */ 406 private static void mergeReversed(List<String> merged, 407 final List<String> other) { 408 409 if(other.isEmpty()) 410 return; 411 412 if(merged.isEmpty()) { 413 merged.addAll(other); 414 return; 415 } 416 417 ListIterator<String> m = merged.listIterator(); 418 ListIterator<String> o = other.listIterator(); 419 420 String nextM, prevO; 421 prevO = o.next(); 422 while(m.hasNext()) { 423 // peek at m 424 nextM = m.next(); 425 m.previous(); 426 427 //insert from O until exhausted or occurs after nextM 428 while(prevO.compareTo(nextM) > 0) { 429 m.add(prevO); 430 if(!o.hasNext()) { 431 return; 432 } 433 prevO = o.next(); 434 } 435 //remove duplicates 436 if(prevO.equals(nextM)) { 437 if(!o.hasNext()) { 438 return; 439 } 440 prevO = o.next(); 441 } 442 443 m.next(); 444 } 445 m.add(prevO); 446 while(o.hasNext()) { 447 m.add(o.next()); 448 } 449 450 } 451 452 453 /** 454 * Get the ID of the protein which was made obsolete by newPdbId. 455 * 456 * @param newPdbId PDB ID of the newer structure 457 * @param recurse If true, return all ancestors of newPdbId. 458 * Otherwise, just go one step newer than oldPdbId. 459 * @return A (possibly empty) list of ID(s) of the ancestor(s) of 460 * newPdbId, or <tt>null</tt> if an error occurred. 461 */ 462 public static List<String> getReplaces(String newPdbId, boolean recurse) { 463 List<Map<String,String>> attrList = getStatusIdRecords(new String[] {newPdbId}); 464 //Expect a single record 465 if(attrList == null || attrList.size() != 1) { 466 //TODO Is it possible to have multiple record per ID? 467 // They seem to be combined into one record with space-delimited 'replaces' 468 logger.error("Error getting Status for {} from the PDB website.", newPdbId); 469 return null; 470 } 471 472 Map<String,String> attrs = attrList.get(0); 473 474 //Check that the record matches pdbId 475 String id = attrs.get("structureId"); 476 if(id == null || !id.equals(newPdbId)) { 477 logger.error("Results returned from the query don't match {}", newPdbId); 478 return null; 479 } 480 481 482 String replacedList = attrs.get("replaces"); //space-delimited list 483 if(replacedList == null) { 484 // no replaces value; assume root 485 return new ArrayList<String>(); 486 } 487 String[] directDescendents = replacedList.split("\\s"); 488 489 // Not the root! Return the replaced PDB. 490 if(recurse) { 491 // Note: Assumes a proper directed acyclic graph of revisions 492 // Cycles will cause infinite loops. 493 List<String> allDescendents = new LinkedList<String>(); 494 for(String replaced : directDescendents) { 495 List<String> roots = PDBStatus.getReplaces(replaced, recurse); 496 mergeReversed(allDescendents,roots); 497 } 498 mergeReversed(allDescendents,Arrays.asList(directDescendents)); 499 500 return allDescendents; 501 } else { 502 return Arrays.asList(directDescendents); 503 } 504 } 505 506 507 /** 508 * The status of PDB IDs are cached to reduce server overload. 509 * 510 * This method clears the cached records. 511 */ 512 public static void clearCache() { 513 recordsCache.clear(); 514 } 515 516 /** 517 * Fetches the status of one or more pdbIDs from the server. 518 * 519 * <p>Returns the results as a list of Attributes. 520 * Each attribute should contain "structureId" and "status" attributes, and 521 * possibly more. 522 * 523 * <p>Example:</br> 524 * <tt>http://www.rcsb.org/pdb/rest/idStatus?structureID=1HHB,4HHB</tt></br> 525 *<pre><idStatus> 526 * <record structureId="1HHB" status="OBSOLETE" replacedBy="4HHB"/> 527 * <record structureId="4HHB" status="CURRENT" replaces="1HHB"/> 528 *</idStatus> 529 * </pre> 530 * 531 * <p>Results are not guaranteed to be returned in the same order as pdbIDs. 532 * Refer to the structureId property to match them. 533 * 534 * @param pdbIDs 535 * @return A map between attributes and values 536 */ 537 private static List<Map<String, String>> getStatusIdRecords(String[] pdbIDs) { 538 539 List<Map<String,String>> result = new ArrayList<Map<String,String>>(pdbIDs.length); 540 541 String serverName = System.getProperty(PDB_SERVER_PROPERTY); 542 543 if ( serverName == null) 544 serverName = DEFAULT_PDB_SERVER; 545 else 546 logger.info(String.format("Got System property %s=%s",PDB_SERVER_PROPERTY,serverName)); 547 548 // Build REST query URL 549 if(pdbIDs.length < 1) { 550 throw new IllegalArgumentException("No pdbIDs specified"); 551 } 552 String urlStr = String.format("http://%s/pdb/rest/idStatus?structureId=",serverName); 553 for(String pdbId : pdbIDs) { 554 pdbId = pdbId.toUpperCase(); 555 //check the cache 556 if (recordsCache.containsKey(pdbId)) { 557 //logger.debug("Fetching "+pdbId+" from Cache"); 558 result.add( recordsCache.get(pdbId) ); 559 } else { 560 urlStr += pdbId + ","; 561 } 562 } 563 564 // check if any ids still need fetching 565 if(urlStr.charAt(urlStr.length()-1) == '=') { 566 return result; 567 } 568 569 try { 570 logger.info("Fetching {}", urlStr); 571 572 URL url = new URL(urlStr); 573 574 InputStream uStream = url.openStream(); 575 576 InputSource source = new InputSource(uStream); 577 SAXParserFactory parserFactory = SAXParserFactory.newInstance(); 578 SAXParser parser = parserFactory.newSAXParser(); 579 XMLReader reader = parser.getXMLReader(); 580 581 PDBStatusXMLHandler handler = new PDBStatusXMLHandler(); 582 583 reader.setContentHandler(handler); 584 reader.parse(source); 585 586 // Fetch results of SAX parsing 587 List<Map<String,String>> records = handler.getRecords(); 588 589 //add to cache 590 for(Map<String,String> record : records) { 591 String pdbId = record.get("structureId").toUpperCase(); 592 if(pdbId != null) { 593 recordsCache.put(pdbId, record); 594 } 595 } 596 597 // return results 598 result.addAll(handler.getRecords()); 599 600 // TODO should throw these forward and let the caller log 601 } catch (IOException e){ 602 logger.error("Problem getting status for {} from PDB server. Error: {}", Arrays.toString(pdbIDs), e.getMessage()); 603 return null; 604 } catch (SAXException e) { 605 logger.error("Problem getting status for {} from PDB server. Error: {}", Arrays.toString(pdbIDs), e.getMessage()); 606 return null; 607 } catch (ParserConfigurationException e) { 608 logger.error("Problem getting status for {} from PDB server. Error: {}", Arrays.toString(pdbIDs), e.getMessage()); 609 return null; 610 } 611 612 return result; 613 } 614 615 /** 616 * Handles idStatus xml by storing attributes for all record elements. 617 * 618 * @author Spencer Bliven <sbliven@ucsd.edu> 619 * 620 */ 621 private static class PDBStatusXMLHandler extends DefaultHandler { 622 private List<Map<String,String>> records; 623 624 public PDBStatusXMLHandler() { 625 records = new ArrayList<Map<String,String>>(); 626 } 627 628 /** 629 * @param uri 630 * @param localName 631 * @param qName 632 * @param attributes 633 * @throws SAXException 634 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) 635 */ 636 @Override 637 public void startElement(String uri, String localName, String qName, 638 Attributes attributes) throws SAXException { 639 //System.out.format("Starting element: uri='%s' localName='%s' qName='%s'\n", uri, localName, qName); 640 if(qName.equals("record")) { 641 //Convert attributes into a Map, as it should have been. 642 //Important since SAX reuses Attributes objects for different calls 643 Map<String,String> attrMap = new HashMap<String,String>(attributes.getLength()*2); 644 for(int i=0;i<attributes.getLength();i++) { 645 attrMap.put(attributes.getQName(i), attributes.getValue(i)); 646 } 647 records.add(attrMap); 648 } 649 } 650 651 652 /** 653 * @param e 654 * @throws SAXException 655 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException) 656 */ 657 @Override 658 public void error(SAXParseException e) throws SAXException { 659 logger.error(e.getMessage()); 660 super.error(e); 661 } 662 663 664 public List<Map<String, String>> getRecords() { 665 return records; 666 } 667 } 668 669 /** Returns a list of current PDB IDs 670 * 671 * @return a list of PDB IDs, or null if a problem occurred 672 */ 673 674 public static SortedSet<String> getCurrentPDBIds() throws IOException { 675 676 SortedSet<String> allPDBs = new TreeSet<String>(); 677 String serverName = System.getProperty(PDB_SERVER_PROPERTY); 678 679 if ( serverName == null) 680 serverName = DEFAULT_PDB_SERVER; 681 else 682 logger.info(String.format("Got System property %s=%s",PDB_SERVER_PROPERTY,serverName)); 683 684 // Build REST query URL 685 686 String urlStr = String.format("http://%s/pdb/rest/getCurrent",serverName); 687 URL u = new URL(urlStr); 688 689 InputStream stream = URLConnectionTools.getInputStream(u, 60000); 690 691 if (stream != null) { 692 BufferedReader reader = new BufferedReader( 693 new InputStreamReader(stream)); 694 695 String line = null; 696 697 while ((line = reader.readLine()) != null) { 698 int index = line.lastIndexOf("structureId="); 699 if (index > 0) { 700 allPDBs.add(line.substring(index + 13, index + 17)); 701 } 702 } 703 } 704 return allPDBs; 705 706 } 707 708}