001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure;
022
023import com.fasterxml.jackson.databind.JsonNode;
024import com.fasterxml.jackson.databind.ObjectMapper;
025import com.fasterxml.jackson.databind.type.TypeFactory;
026import org.biojava.nbio.structure.align.util.URLConnectionTools;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import java.io.IOException;
031import java.io.InputStream;
032import java.net.URL;
033import java.util.*;
034
035/**
036 * Methods for getting the status of a PDB file (current, removed, unreleased)
037 * and for accessing different versions of the structure.
038 *
039 * <p>
040 * All methods query the
041 * <a href="https://data.rcsb.org">
042 * RCSB Data REST API</a>
043 * <p>
044 *
045 * @author Spencer Bliven
046 * @author Amr ALHOSSARY
047 * @author Jose Duarte
048 * @since 3.0.2
049 */
050public class PDBStatus {
051
052        private static final Logger logger = LoggerFactory.getLogger(PDBStatus.class);
053
054        public static final String DEFAULT_RCSB_DATA_API_SERVER = "data.rcsb.org";
055        public static final String ALL_CURRENT_ENDPOINT = "https://%s/rest/v1/holdings/current/entry_ids";
056        public static final String STATUS_ENDPOINT = "https://%s/rest/v1/holdings/status/%s";
057        public static final String STATUS_LIST_ENDPOINT = "https://%s/rest/v1/holdings/status?ids=%s";
058
059        /**
060         * Represents a simplified 3 state status of PDB IDs.
061         * @author Spencer Bliven
062         */
063        public enum Status {
064                // the simplified status enum in rcsb_repository_holdings_combined
065                REMOVED,
066                CURRENT,
067                UNRELEASED;
068
069                /**
070                 * @throws IllegalArgumentException If the string is not recognized
071                 */
072                public static Status fromString(String statusStr) {
073                        if (statusStr == null) throw new IllegalArgumentException("Status string can't be null");
074                        if("REMOVED".equalsIgnoreCase(statusStr))
075                                return Status.REMOVED;
076                        else if("CURRENT".equalsIgnoreCase(statusStr))
077                                return Status.CURRENT;
078                        else if("UNRELEASED".equalsIgnoreCase(statusStr))
079                                return Status.UNRELEASED;
080                        else {
081                                throw new IllegalArgumentException("Unable to parse status '"+statusStr+"'.");
082                        }
083                }
084        }
085
086        /**
087         * Get the status of a PDB id.
088         *
089         * @param pdbId the id
090         * @return The status.
091         */
092        public static Status getStatus(String pdbId) throws IOException {
093                URL url = new URL(String.format(STATUS_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER, pdbId.toUpperCase()));
094                ObjectMapper objectMapper = new ObjectMapper();
095                JsonNode node = objectMapper.readValue(url.openStream(), JsonNode.class);
096                return parseStatusRecord(node);
097        }
098
099        /**
100         * Get the status of a collection of PDB ids (in a single API query).
101         *
102         * @see #getStatus(String)
103         * @param pdbIds the ids
104         * @return The status array
105         */
106        public static Status[] getStatus(String[] pdbIds) throws IOException {
107
108                URL url = new URL(String.format(STATUS_LIST_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER, String.join(",", pdbIds)));
109
110                List<Status> statuses = new ArrayList<>();
111
112                ObjectMapper objectMapper = new ObjectMapper();
113                JsonNode node = objectMapper.readValue(url.openStream(), JsonNode.class);
114
115                if (node !=null && node.isArray()) {
116                        for (JsonNode record : node) {
117                                Status status = parseStatusRecord(record);
118                                statuses.add(status);
119                        }
120                }
121
122                if (statuses.size() != pdbIds.length) {
123                        logger.warn("RCSB status request was for {} ids, but {} were returned", pdbIds.length, statuses.size());
124                }
125
126                return statuses.toArray(new Status[0]);
127        }
128
129        private static Status parseStatusRecord(JsonNode jsonNode) {
130                // e.g.
131                // "rcsb_repository_holdings_combined": {
132                //"id_code_replaced_by_latest": "4HHB",
133                //"status": "REMOVED",
134                //"status_code": "OBS"
135                //},
136                JsonNode rcsbRepoHoldingsNode = jsonNode.get("rcsb_repository_holdings_combined");
137                return Status.fromString(rcsbRepoHoldingsNode.get("status").asText());
138        }
139
140        /**
141         * Gets the current version of a PDB ID.
142         *
143         * @param oldPdbId the id
144         * @return The replacement for oldPdbId, or null if none are found.
145         * If entry is current then the input PDB id is returned
146         */
147        public static String getCurrent(String oldPdbId) throws IOException {
148                URL url = new URL(String.format(STATUS_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER, oldPdbId.toUpperCase()));
149                ObjectMapper objectMapper = new ObjectMapper();
150                JsonNode node = objectMapper.readValue(url.openStream(), JsonNode.class);
151                JsonNode rcsbRepoHoldingsNode = node.get("rcsb_repository_holdings_combined");
152                Status st = Status.fromString(rcsbRepoHoldingsNode.get("status").asText());
153                if (st == Status.REMOVED) {
154                        JsonNode replacedByNode = rcsbRepoHoldingsNode.get("id_code_replaced_by_latest");
155                        if (replacedByNode != null)
156                                return replacedByNode.asText();
157                        else
158                                return null;
159                } else if (st == Status.CURRENT) {
160                        return oldPdbId;
161                } else {
162                        return null;
163                }
164
165        }
166
167        /**
168         * Returns all current PDB IDs
169         *
170         * @return a list of PDB IDs
171         * @throws IOException if a problem occurs retrieving the information
172         */
173        public static SortedSet<String> getCurrentPDBIds() throws IOException {
174
175                // Build REST query URL
176                String urlStr = String.format(ALL_CURRENT_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER);
177                URL u = new URL(urlStr);
178
179                InputStream stream = URLConnectionTools.getInputStream(u, 60000);
180
181                ObjectMapper objectMapper = new ObjectMapper();
182                TypeFactory typeFactory = objectMapper.getTypeFactory();
183                List<String> pdbIdList = objectMapper.readValue(stream, typeFactory.constructCollectionType(List.class, String.class));
184
185                return new TreeSet<>(pdbIdList);
186        }
187
188        public static void main(String[] args) throws Exception {
189                SortedSet<String> all = getCurrentPDBIds();
190                System.out.println("Number of current PDB ids is: " + all.size());
191        }
192}