001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021/**
022 *
023 */
024package org.biojava.nbio.structure;
025
026import org.biojava.nbio.structure.align.util.URLConnectionTools;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029import org.xml.sax.*;
030import org.xml.sax.helpers.DefaultHandler;
031
032import javax.xml.parsers.ParserConfigurationException;
033import javax.xml.parsers.SAXParser;
034import javax.xml.parsers.SAXParserFactory;
035import java.io.BufferedReader;
036import java.io.IOException;
037import java.io.InputStream;
038import java.io.InputStreamReader;
039import java.net.URL;
040import java.util.*;
041
042/**
043 * Methods for getting the status of a PDB file (current, obsolete, etc)
044 * and for accessing different versions of the structure.
045 *
046 * <p>All methods query the
047 * <a href="http://www.rcsb.org/pdb/rest/idStatus?structureId=1HHB,3HHB,4HHB">
048 * PDB website.</a>
049 *
050 * <p>PDB supersessions form a directed acyclic graph, where edges point from an
051 * obsolete ID to the entry that directly superseded it. For example, here are
052 * edges from one portion of the graph:<br/>
053 *
054 * 1CAT -> 3CAT<br/>
055 * 3CAT -> 7CAT<br/>
056 * 3CAT -> 8CAT<br/>
057 *
058 * <p>The methods {@link #getReplaces(String, boolean) getReplaces(pdbId, false)}/
059 * {@link #getReplacement(String, boolean, boolean) getReplacement(pdbId, false, true)}
060 * just get the incoming/outgoing edges for a single node. The recursive versions
061 * ({@link #getReplaces(String, boolean) getReplaces(pdbId, true)},
062 * {@link #getReplacement(String, boolean, boolean) getReplacement(pdbId, true, false)})
063 * will do a depth-first search up/down the tree and return a list of all nodes ]
064 * reached.
065 *
066 * <p>Finally, the getCurrent() method returns a single PDB ID from among the
067 * results of
068 * {@link #getReplacement(String, boolean) getReplacement(pdbId, true)}.
069 * To be consistent with the old REST ordering, this is the PDB ID that occurs
070 * last alphabetically.
071 *
072 * <p>Results are cached to reduce server load.
073 *
074 * @author Spencer Bliven <sbliven@ucsd.edu>
075 * @author Amr AL-Hossary
076 * @since 3.0.2
077 */
078public class PDBStatus {
079
080        private static final Logger logger = LoggerFactory.getLogger(PDBStatus.class);
081
082        public static final String DEFAULT_PDB_SERVER = "www.rcsb.org";
083        public static final String PDB_SERVER_PROPERTY = "PDB.SERVER";
084
085        /**
086         * saves the returned results for further use.
087         *
088         */
089        //TODO Use SoftReferences to allow garbage collection
090        private static Map<String, Map<String, String>> recordsCache= new Hashtable<String, Map<String, String>>();
091
092        /**
093         * Represents the status of PDB IDs. 'OBSOLETE' and 'CURRENT' are the most
094         * common.
095         * @author Spencer Bliven <sbliven@ucsd.edu>
096         *
097         */
098        public enum Status {
099                OBSOLETE,
100                CURRENT,
101                AUTH,
102                HOLD,
103                HPUB,
104                POLC,
105                PROC,
106                REFI,
107                REPL,
108                WAIT,
109                WDRN,
110                MODEL,
111                UNKNOWN;
112
113
114                /**
115                 *
116                 * @param statusStr
117                 * @return
118                 * @throws IllegalArgumentException If the string is not recognized
119                 */
120                public static Status fromString(String statusStr) {
121                        Status status;
122                        String statusStrUpper = statusStr.toUpperCase();
123                        if(statusStrUpper.equalsIgnoreCase("OBSOLETE"))
124                                status = Status.OBSOLETE;
125                        else if(statusStrUpper.equalsIgnoreCase("CURRENT"))
126                                status = Status.CURRENT;
127                        else if(statusStrUpper.equalsIgnoreCase("AUTH"))
128                                status = Status.AUTH;
129                        else if(statusStrUpper.equalsIgnoreCase("HOLD"))
130                                status = Status.HOLD;
131                        else if(statusStrUpper.equalsIgnoreCase("HPUB"))
132                                status = Status.HPUB;
133                        else if(statusStrUpper.equalsIgnoreCase("POLC"))
134                                status = Status.POLC;
135                        else if(statusStrUpper.equalsIgnoreCase("PROC"))
136                                status = Status.PROC;
137                        else if(statusStrUpper.equalsIgnoreCase("REFI"))
138                                status = Status.REFI;
139                        else if(statusStrUpper.equalsIgnoreCase("REPL"))
140                                status = Status.REPL;
141                        else if(statusStrUpper.equalsIgnoreCase("WAIT"))
142                                status = Status.WAIT;
143                        else if(statusStrUpper.equalsIgnoreCase("WDRN"))
144                                status = Status.WDRN;
145                        else if(statusStrUpper.equalsIgnoreCase("MODEL"))
146                                status = Status.MODEL;
147                        else if(statusStrUpper.equalsIgnoreCase("UNKNOWN"))
148                                status = Status.UNKNOWN;
149                        else {
150                                throw new IllegalArgumentException("Unable to parse status '"+statusStrUpper+"'.");
151                        }
152                        return status;
153                }
154        }
155
156        /**
157         * Get the status of the PDB in question.
158         *
159         * @param pdbId
160         * @return The status, or null if an error occurred.
161         */
162        public static Status getStatus(String pdbId) {
163                Status[] statuses = getStatus(new String[] {pdbId});
164                if(statuses != null) {
165                        assert(statuses.length == 1);
166                        return statuses[0];
167                } else {
168                        return null;
169                }
170        }
171
172        /**
173         * Get the status of the a collection of PDBs in question in a single query.
174         *
175         * @see #getStatus(String)
176         * @param pdbIds
177         * @return The status array, or null if an error occurred.
178         */
179        public static Status[] getStatus(String[] pdbIds) {
180                Status[] statuses = new Status[pdbIds.length];
181
182                List<Map<String,String>> attrList = getStatusIdRecords(pdbIds);
183                //Expect a single record
184                if(attrList == null || attrList.size() != pdbIds.length) {
185                        logger.error("Error getting Status for {} from the PDB website.", Arrays.toString(pdbIds));
186                        return null;
187                }
188
189
190                for(int pdbNum = 0;pdbNum<pdbIds.length;pdbNum++) {
191                        //Locate first element of attrList with matching structureId.
192                        //attrList is usually short, so don't worry about performance
193                        boolean foundAttr = false;
194                        for( Map<String,String> attrs : attrList) {
195
196                                //Check that the record matches pdbId
197                                String id = attrs.get("structureId");
198                                if(id == null || !id.equalsIgnoreCase(pdbIds[pdbNum])) {
199                                        continue;
200                                }
201
202                                //Check that the status is given
203                                String statusStr = attrs.get("status");
204                                Status status = null;
205                                if(statusStr == null ) {
206                                        logger.error("No status returned for {}", pdbIds[pdbNum]);
207                                        statuses[pdbNum] = null;
208                                } else {
209                                        status = Status.fromString(statusStr);
210                                }
211
212                                if(status == null) {
213                                        logger.error("Unknown status '{}'", statusStr);
214                                        statuses[pdbNum] = null;
215                                }
216
217                                statuses[pdbNum] = status;
218                                foundAttr = true;
219                        }
220                        if(!foundAttr) {
221                                logger.error("No result found for {}", pdbIds[pdbNum]);
222                                statuses[pdbNum] = null;
223                        }
224                }
225
226                return statuses;
227        }
228
229        /**
230         * Gets the current version of a PDB ID. This is equivalent to selecting
231         * the first element from
232         * {@link #getReplacement(String,boolean,boolean)
233         *
234         * @param oldPdbId
235         * @return The replacement for oldPdbId, or null if none are found or if an error occurred.
236         */
237        public static String getCurrent(String oldPdbId) {
238                List<String> replacements =  getReplacement(oldPdbId,true, false);
239                if(replacements != null && !replacements.isEmpty())
240                        return replacements.get(0);
241                else
242                        return null;
243        }
244
245        /**
246         * Gets the PDB which superseded oldPdbId. For CURRENT IDs, this will
247         * be itself. For obsolete IDs, the behavior depends on the recursion
248         * parameter. If false, only IDs which directly supersede oldPdbId are
249         * returned. If true, the replacements for obsolete records are recursively
250         * fetched, yielding a list of all current replacements of oldPdbId.
251         *
252         *
253         *
254         * @param oldPdbId A pdb ID
255         * @param recurse Indicates whether the replacements for obsolete records
256         *              should be fetched.
257         * @param includeObsolete Indicates whether obsolete records should be
258         *              included in the results.
259         * @return The PDB which replaced oldPdbId. This may be oldPdbId itself, for
260         *              current records. A return value of null indicates that the ID has
261         *              been removed from the PDB or that an error has occurred.
262         */
263        public static List<String> getReplacement(String oldPdbId, boolean recurse, boolean includeObsolete) {
264                List<Map<String,String>> attrList = getStatusIdRecords(new String[] {oldPdbId});
265                //Expect a single record
266                if(attrList == null || attrList.size() != 1) {
267                        logger.error("Error getting Status for {} from the PDB website.", oldPdbId);
268                        return null;
269                }
270
271                Map<String,String> attrs = attrList.get(0);
272
273                //Check that the record matches pdbId
274                String id = attrs.get("structureId");
275                if(id == null || !id.equalsIgnoreCase(oldPdbId)) {
276                        logger.error("Results returned from the query don't match {}", oldPdbId);
277                        return null;
278                }
279
280                //Check that the status is given
281                String statusStr = attrs.get("status");
282                if(statusStr == null ) {
283                        logger.error("No status returned for {}", oldPdbId);
284                        return null;
285                }
286
287                Status status = Status.fromString(statusStr);
288                if(status == null ) {
289                        logger.error("Unknown status '{}'", statusStr);
290                        return null;
291                }
292
293                // If we're current, just return
294                LinkedList<String> results = new LinkedList<String>();
295                switch(status) {
296                        case CURRENT:
297                                results.add(oldPdbId);
298                                return results;
299                        case OBSOLETE: {
300                                String replacementStr = attrs.get("replacedBy");
301                                if(replacementStr == null) {
302                                        logger.error("{} is OBSOLETE but lacks a replacedBy attribute.", oldPdbId);
303                                        return null;
304                                }
305                                replacementStr = replacementStr.toUpperCase();
306                                //include this result
307                                if(includeObsolete) {
308                                        results.add(oldPdbId);
309                                }
310                                // Some PDBs are not replaced.
311                                if(replacementStr.equals("NONE")) {
312                                        return results; //empty
313                                }
314
315                                String[] replacements = replacementStr.split(" ");
316                                Arrays.sort(replacements, new Comparator<String>() {
317                                        @Override
318                                        public int compare(String o1, String o2) {
319                                                return o2.compareToIgnoreCase(o1);
320                                        }
321                                });
322                                for(String replacement : replacements) {
323
324                                        // Return the replacement.
325                                        if(recurse) {
326                                                List<String> others = PDBStatus.getReplacement(replacement, recurse, includeObsolete);
327                                                mergeReversed(results,others);
328                                        }
329                                        else {
330                                                if(includeObsolete) {
331                                                        mergeReversed(results,Arrays.asList(replacement));
332                                                } else {
333                                                        // check status of replacement
334                                                        Status replacementStatus = getStatus(replacement);
335                                                        switch(replacementStatus) {
336                                                                case OBSOLETE:
337                                                                        //ignore obsolete
338                                                                        break;
339                                                                case CURRENT:
340                                                                default:
341                                                                        // include it
342                                                                        mergeReversed(results,Arrays.asList(replacement));
343                                                        }
344                                                }
345                                        }
346                                }
347
348
349                                return results;
350                        }
351                        case UNKNOWN:
352                                return null;
353                        default: { //TODO handle other cases explicitly. They might have other syntax than "replacedBy"
354                                String replacementStr = attrs.get("replacedBy");
355
356                                if(replacementStr == null) {
357                                        // If no "replacedBy" attribute, treat like we're current
358                                        // TODO is this correct?
359                                        results.add(oldPdbId);
360                                        return results;
361                                }
362
363                                replacementStr = replacementStr.toUpperCase();
364                                // Some PDBs are not replaced.
365                                if(replacementStr.equals("NONE")) {
366                                        return null;
367                                }
368
369
370                                //include this result, since it's not obsolete
371                                results.add(oldPdbId);
372
373                                String[] replacements = replacementStr.split(" ");
374                                Arrays.sort(replacements, new Comparator<String>() {
375                                        @Override
376                                        public int compare(String o1, String o2) {
377                                                return o2.compareToIgnoreCase(o1);
378                                        }
379                                });
380                                for(String replacement : replacements) {
381
382                                        // Return the replacement.
383                                        if(recurse) {
384                                                List<String> others = PDBStatus.getReplacement(replacement, recurse, includeObsolete);
385                                                mergeReversed(results,others);
386                                        }
387                                        else {
388                                                mergeReversed(results,Arrays.asList(replacement));
389                                        }
390                                }
391
392
393                                return results;
394                        }
395                }
396        }
397
398        /**
399         * Takes two reverse sorted lists of strings and merges the second into the
400         * first. Duplicates are removed.
401         *
402         * @param merged A reverse sorted list. Modified by this method to contain
403         *              the contents of other.
404         * @param other A reverse sorted list. Not modified.
405         */
406        private static void mergeReversed(List<String> merged,
407                                          final List<String> other) {
408
409                if(other.isEmpty())
410                        return;
411
412                if(merged.isEmpty()) {
413                        merged.addAll(other);
414                        return;
415                }
416
417                ListIterator<String> m = merged.listIterator();
418                ListIterator<String> o = other.listIterator();
419
420                String nextM, prevO;
421                prevO = o.next();
422                while(m.hasNext()) {
423                        // peek at m
424                        nextM = m.next();
425                        m.previous();
426
427                        //insert from O until exhausted or occurs after nextM
428                        while(prevO.compareTo(nextM) > 0) {
429                                m.add(prevO);
430                                if(!o.hasNext()) {
431                                        return;
432                                }
433                                prevO = o.next();
434                        }
435                        //remove duplicates
436                        if(prevO.equals(nextM)) {
437                                if(!o.hasNext()) {
438                                        return;
439                                }
440                                prevO = o.next();
441                        }
442
443                        m.next();
444                }
445                m.add(prevO);
446                while(o.hasNext()) {
447                        m.add(o.next());
448                }
449
450        }
451
452
453        /**
454         * Get the ID of the protein which was made obsolete by newPdbId.
455         *
456         * @param newPdbId PDB ID of the newer structure
457         * @param recurse If true, return all ancestors of newPdbId.
458         *              Otherwise, just go one step newer than oldPdbId.
459         * @return A (possibly empty) list of ID(s) of the ancestor(s) of
460         *              newPdbId, or <tt>null</tt> if an error occurred.
461         */
462        public static List<String> getReplaces(String newPdbId, boolean recurse) {
463                List<Map<String,String>> attrList = getStatusIdRecords(new String[] {newPdbId});
464                //Expect a single record
465                if(attrList == null || attrList.size() != 1) {
466                        //TODO Is it possible to have multiple record per ID?
467                        // They seem to be combined into one record with space-delimited 'replaces'
468                        logger.error("Error getting Status for {} from the PDB website.", newPdbId);
469                        return null;
470                }
471
472                Map<String,String> attrs = attrList.get(0);
473
474                //Check that the record matches pdbId
475                String id = attrs.get("structureId");
476                if(id == null || !id.equals(newPdbId)) {
477                        logger.error("Results returned from the query don't match {}", newPdbId);
478                        return null;
479                }
480
481
482                String replacedList = attrs.get("replaces"); //space-delimited list
483                if(replacedList == null) {
484                        // no replaces value; assume root
485                        return new ArrayList<String>();
486                }
487                String[] directDescendents = replacedList.split("\\s");
488
489                // Not the root! Return the replaced PDB.
490                if(recurse) {
491                        // Note: Assumes a proper directed acyclic graph of revisions
492                        // Cycles will cause infinite loops.
493                        List<String> allDescendents = new LinkedList<String>();
494                        for(String replaced : directDescendents) {
495                                List<String> roots = PDBStatus.getReplaces(replaced, recurse);
496                                mergeReversed(allDescendents,roots);
497                        }
498                        mergeReversed(allDescendents,Arrays.asList(directDescendents));
499
500                        return allDescendents;
501                } else {
502                        return Arrays.asList(directDescendents);
503                }
504        }
505
506
507        /**
508         * The status of PDB IDs are cached to reduce server overload.
509         *
510         * This method clears the cached records.
511         */
512        public static void clearCache() {
513                recordsCache.clear();
514        }
515
516        /**
517         * Fetches the status of one or more pdbIDs from the server.
518         *
519         * <p>Returns the results as a list of Attributes.
520         * Each attribute should contain "structureId" and "status" attributes, and
521         * possibly more.
522         *
523         * <p>Example:</br>
524         * <tt>http://www.rcsb.org/pdb/rest/idStatus?structureID=1HHB,4HHB</tt></br>
525         *<pre>&lt;idStatus&gt;
526         *  &lt;record structureId="1HHB" status="OBSOLETE" replacedBy="4HHB"/&gt;
527         *  &lt;record structureId="4HHB" status="CURRENT" replaces="1HHB"/&gt;
528         *&lt;/idStatus&gt;
529         * </pre>
530         *
531         * <p>Results are not guaranteed to be returned in the same order as pdbIDs.
532         * Refer to the structureId property to match them.
533         *
534         * @param pdbIDs
535         * @return A map between attributes and values
536         */
537        private static List<Map<String, String>> getStatusIdRecords(String[] pdbIDs) {
538
539                List<Map<String,String>> result = new ArrayList<Map<String,String>>(pdbIDs.length);
540
541                String serverName = System.getProperty(PDB_SERVER_PROPERTY);
542
543                if ( serverName == null)
544                        serverName = DEFAULT_PDB_SERVER;
545                else
546                        logger.info(String.format("Got System property %s=%s",PDB_SERVER_PROPERTY,serverName));
547
548                // Build REST query URL
549                if(pdbIDs.length < 1) {
550                        throw new IllegalArgumentException("No pdbIDs specified");
551                }
552                String urlStr = String.format("http://%s/pdb/rest/idStatus?structureId=",serverName);
553                for(String pdbId : pdbIDs) {
554                        pdbId = pdbId.toUpperCase();
555                        //check the cache
556                        if (recordsCache.containsKey(pdbId)) {
557                                //logger.debug("Fetching "+pdbId+" from Cache");
558                                result.add( recordsCache.get(pdbId) );
559                        } else {
560                                urlStr += pdbId + ",";
561                        }
562                }
563
564                // check if any ids still need fetching
565                if(urlStr.charAt(urlStr.length()-1) == '=') {
566                        return result;
567                }
568
569                try {
570                        logger.info("Fetching {}", urlStr);
571
572                        URL url = new URL(urlStr);
573
574                        InputStream uStream = url.openStream();
575
576                        InputSource source = new InputSource(uStream);
577                        SAXParserFactory parserFactory = SAXParserFactory.newInstance();
578                        SAXParser parser = parserFactory.newSAXParser();
579                        XMLReader reader = parser.getXMLReader();
580
581                        PDBStatusXMLHandler handler = new PDBStatusXMLHandler();
582
583                        reader.setContentHandler(handler);
584                        reader.parse(source);
585
586                        // Fetch results of SAX parsing
587                        List<Map<String,String>> records = handler.getRecords();
588
589                        //add to cache
590                        for(Map<String,String> record : records) {
591                                String pdbId = record.get("structureId").toUpperCase();
592                                if(pdbId != null) {
593                                        recordsCache.put(pdbId, record);
594                                }
595                        }
596
597                        // return results
598                        result.addAll(handler.getRecords());
599
600                        // TODO should throw these forward and let the caller log
601                } catch (IOException e){
602                        logger.error("Problem getting status for {} from PDB server. Error: {}", Arrays.toString(pdbIDs), e.getMessage());
603                        return null;
604                } catch (SAXException e) {
605                        logger.error("Problem getting status for {} from PDB server. Error: {}", Arrays.toString(pdbIDs), e.getMessage());
606                        return null;
607                } catch (ParserConfigurationException e) {
608                        logger.error("Problem getting status for {} from PDB server. Error: {}", Arrays.toString(pdbIDs), e.getMessage());
609                        return null;
610                }
611
612                return result;
613        }
614
615        /**
616         * Handles idStatus xml by storing attributes for all record elements.
617         *
618         * @author Spencer Bliven <sbliven@ucsd.edu>
619         *
620         */
621        private static class PDBStatusXMLHandler extends DefaultHandler {
622                private List<Map<String,String>> records;
623
624                public PDBStatusXMLHandler() {
625                        records = new ArrayList<Map<String,String>>();
626                }
627
628                /**
629                 * @param uri
630                 * @param localName
631                 * @param qName
632                 * @param attributes
633                 * @throws SAXException
634                 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
635                 */
636                @Override
637                public void startElement(String uri, String localName, String qName,
638                                         Attributes attributes) throws SAXException {
639                        //System.out.format("Starting element: uri='%s' localName='%s' qName='%s'\n", uri, localName, qName);
640                        if(qName.equals("record")) {
641                                //Convert attributes into a Map, as it should have been.
642                                //Important since SAX reuses Attributes objects for different calls
643                                Map<String,String> attrMap = new HashMap<String,String>(attributes.getLength()*2);
644                                for(int i=0;i<attributes.getLength();i++) {
645                                        attrMap.put(attributes.getQName(i), attributes.getValue(i));
646                                }
647                                records.add(attrMap);
648                        }
649                }
650
651
652                /**
653                 * @param e
654                 * @throws SAXException
655                 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
656                 */
657                @Override
658                public void error(SAXParseException e) throws SAXException {
659                        logger.error(e.getMessage());
660                        super.error(e);
661                }
662
663
664                public List<Map<String, String>> getRecords() {
665                        return records;
666                }
667        }
668
669        /** Returns a list of current PDB IDs
670         *
671         * @return a list of PDB IDs, or null if a problem occurred
672         */
673
674        public static SortedSet<String> getCurrentPDBIds() throws IOException {
675
676                SortedSet<String> allPDBs = new TreeSet<String>();
677                String serverName = System.getProperty(PDB_SERVER_PROPERTY);
678
679                if ( serverName == null)
680                        serverName = DEFAULT_PDB_SERVER;
681                else
682                        logger.info(String.format("Got System property %s=%s",PDB_SERVER_PROPERTY,serverName));
683
684                // Build REST query URL
685
686                String urlStr = String.format("http://%s/pdb/rest/getCurrent",serverName);
687                URL u = new URL(urlStr);
688
689                InputStream stream = URLConnectionTools.getInputStream(u, 60000);
690
691                if (stream != null) {
692                        BufferedReader reader = new BufferedReader(
693                                        new InputStreamReader(stream));
694
695                        String line = null;
696
697                        while ((line = reader.readLine()) != null) {
698                                int index = line.lastIndexOf("structureId=");
699                                if (index > 0) {
700                                        allPDBs.add(line.substring(index + 13, index + 17));
701                                }
702                        }
703                }
704                return allPDBs;
705
706        }
707
708}