001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure; 022 023import com.fasterxml.jackson.databind.JsonNode; 024import com.fasterxml.jackson.databind.ObjectMapper; 025import com.fasterxml.jackson.databind.type.TypeFactory; 026import org.biojava.nbio.structure.align.util.URLConnectionTools; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029 030import java.io.IOException; 031import java.io.InputStream; 032import java.net.URL; 033import java.util.*; 034 035/** 036 * Methods for getting the status of a PDB file (current, removed, unreleased) 037 * and for accessing different versions of the structure. 038 * 039 * <p> 040 * All methods query the 041 * <a href="https://data.rcsb.org"> 042 * RCSB Data REST API</a> 043 * <p> 044 * 045 * @author Spencer Bliven 046 * @author Amr ALHOSSARY 047 * @author Jose Duarte 048 * @since 3.0.2 049 */ 050public class PDBStatus { 051 052 private static final Logger logger = LoggerFactory.getLogger(PDBStatus.class); 053 054 public static final String DEFAULT_RCSB_DATA_API_SERVER = "data.rcsb.org"; 055 public static final String ALL_CURRENT_ENDPOINT = "https://%s/rest/v1/holdings/current/entry_ids"; 056 public static final String STATUS_ENDPOINT = "https://%s/rest/v1/holdings/status/%s"; 057 public static final String STATUS_LIST_ENDPOINT = "https://%s/rest/v1/holdings/status?ids=%s"; 058 059 /** 060 * Represents a simplified 3 state status of PDB IDs. 061 * @author Spencer Bliven 062 */ 063 public enum Status { 064 // the simplified status enum in rcsb_repository_holdings_combined 065 REMOVED, 066 CURRENT, 067 UNRELEASED; 068 069 /** 070 * @throws IllegalArgumentException If the string is not recognized 071 */ 072 public static Status fromString(String statusStr) { 073 if (statusStr == null) throw new IllegalArgumentException("Status string can't be null"); 074 if(statusStr.equalsIgnoreCase("REMOVED")) 075 return Status.REMOVED; 076 else if(statusStr.equalsIgnoreCase("CURRENT")) 077 return Status.CURRENT; 078 else if(statusStr.equalsIgnoreCase("UNRELEASED")) 079 return Status.UNRELEASED; 080 else { 081 throw new IllegalArgumentException("Unable to parse status '"+statusStr+"'."); 082 } 083 } 084 } 085 086 /** 087 * Get the status of a PDB id. 088 * 089 * @param pdbId the id 090 * @return The status. 091 */ 092 public static Status getStatus(String pdbId) throws IOException { 093 URL url = new URL(String.format(STATUS_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER, pdbId.toUpperCase())); 094 ObjectMapper objectMapper = new ObjectMapper(); 095 JsonNode node = objectMapper.readValue(url.openStream(), JsonNode.class); 096 return parseStatusRecord(node); 097 } 098 099 /** 100 * Get the status of a collection of PDB ids (in a single API query). 101 * 102 * @see #getStatus(String) 103 * @param pdbIds the ids 104 * @return The status array 105 */ 106 public static Status[] getStatus(String[] pdbIds) throws IOException { 107 108 URL url = new URL(String.format(STATUS_LIST_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER, String.join(",", pdbIds))); 109 110 List<Status> statuses = new ArrayList<>(); 111 112 ObjectMapper objectMapper = new ObjectMapper(); 113 JsonNode node = objectMapper.readValue(url.openStream(), JsonNode.class); 114 115 if (node !=null && node.isArray()) { 116 for (JsonNode record : node) { 117 Status status = parseStatusRecord(record); 118 statuses.add(status); 119 } 120 } 121 122 if (statuses.size() != pdbIds.length) { 123 logger.warn("RCSB status request was for {} ids, but {} were returned", pdbIds.length, statuses.size()); 124 } 125 126 return statuses.toArray(new Status[0]); 127 } 128 129 private static Status parseStatusRecord(JsonNode jsonNode) { 130 // e.g. 131 // "rcsb_repository_holdings_combined": { 132 //"id_code_replaced_by_latest": "4HHB", 133 //"status": "REMOVED", 134 //"status_code": "OBS" 135 //}, 136 JsonNode rcsbRepoHoldingsNode = jsonNode.get("rcsb_repository_holdings_combined"); 137 return Status.fromString(rcsbRepoHoldingsNode.get("status").asText()); 138 } 139 140 /** 141 * Gets the current version of a PDB ID. 142 * 143 * @param oldPdbId the id 144 * @return The replacement for oldPdbId, or null if none are found. 145 * If entry is current then the input PDB id is returned 146 */ 147 public static String getCurrent(String oldPdbId) throws IOException { 148 URL url = new URL(String.format(STATUS_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER, oldPdbId.toUpperCase())); 149 ObjectMapper objectMapper = new ObjectMapper(); 150 JsonNode node = objectMapper.readValue(url.openStream(), JsonNode.class); 151 JsonNode rcsbRepoHoldingsNode = node.get("rcsb_repository_holdings_combined"); 152 Status st = Status.fromString(rcsbRepoHoldingsNode.get("status").asText()); 153 if (st == Status.REMOVED) { 154 JsonNode replacedByNode = rcsbRepoHoldingsNode.get("id_code_replaced_by_latest"); 155 if (replacedByNode != null) 156 return replacedByNode.asText(); 157 else 158 return null; 159 } else if (st == Status.CURRENT) { 160 return oldPdbId; 161 } else { 162 return null; 163 } 164 165 } 166 167 /** 168 * Returns all current PDB IDs 169 * 170 * @return a list of PDB IDs 171 * @throws IOException if a problem occurs retrieving the information 172 */ 173 public static SortedSet<String> getCurrentPDBIds() throws IOException { 174 175 // Build REST query URL 176 String urlStr = String.format(ALL_CURRENT_ENDPOINT, DEFAULT_RCSB_DATA_API_SERVER); 177 URL u = new URL(urlStr); 178 179 InputStream stream = URLConnectionTools.getInputStream(u, 60000); 180 181 ObjectMapper objectMapper = new ObjectMapper(); 182 TypeFactory typeFactory = objectMapper.getTypeFactory(); 183 List<String> pdbIdList = objectMapper.readValue(stream, typeFactory.constructCollectionType(List.class, String.class)); 184 185 return new TreeSet<>(pdbIdList); 186 } 187 188 public static void main(String[] args) throws Exception { 189 SortedSet<String> all = getCurrentPDBIds(); 190 System.out.println("Number of current PDB ids is: " + all.size()); 191 } 192}