001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.rcsb; 022 023import org.biojava.nbio.structure.align.client.JFatCatClient; 024import org.biojava.nbio.structure.align.client.StructureName; 025import org.biojava.nbio.structure.align.util.URLConnectionTools; 026import org.biojava.nbio.structure.align.xml.RepresentativeXMLConverter; 027 028import java.io.BufferedReader; 029import java.io.InputStream; 030import java.io.InputStreamReader; 031import java.net.URL; 032import java.util.Arrays; 033import java.util.List; 034import java.util.SortedSet; 035import java.util.TreeSet; 036 037/** 038 * TODO Move this to {@link Representatives}. 039 */ 040public class GetRepresentatives { 041 042 private static String clusterUrl = "http://www.rcsb.org/pdb/rest/representatives?cluster="; 043 private static String allUrl = "http://www.rcsb.org/pdb/rest/getCurrent/"; 044 045 // available sequence clusters 046 private static List<Integer> seqIdentities = Arrays.asList(30, 40, 50, 70, 90, 95, 100); 047 048 049 /** 050 * Returns a representative set of PDB protein chains at the specified sequence 051 * identity cutoff. See http://www.pdb.org/pdb/statistics/clusterStatistics.do 052 * for more information. 053 * @param sequenceIdentity sequence identity threshold 054 * @return PdbChainKey set of representatives 055 */ 056 public static SortedSet<StructureName> getRepresentatives(int sequenceIdentity) { 057 SortedSet<StructureName> representatives = new TreeSet<StructureName>(); 058 059 if (!seqIdentities.contains(sequenceIdentity)) { 060 System.err.println("Error: representative chains are not available for %sequence identity: " 061 + sequenceIdentity); 062 return representatives; 063 } 064 065 066 try { 067 068 URL u = new URL(clusterUrl + sequenceIdentity); 069 070 InputStream stream = URLConnectionTools.getInputStream(u, 60000); 071 072 String xml = null; 073 074 if (stream != null) { 075 xml = JFatCatClient.convertStreamToString(stream); 076 077 SortedSet<String> reps = RepresentativeXMLConverter.fromXML(xml); 078 079 for (String s : reps) { 080 StructureName k = new StructureName(s); 081 representatives.add(k); 082 } 083 084 } 085 086 } catch (Exception e) { 087 e.printStackTrace(); 088 } 089 090 return representatives; 091 } 092 093 /** 094 * Returns the current list of all PDB IDs. 095 * @return PdbChainKey set of all PDB IDs. 096 */ 097 public static SortedSet<String> getAll() { 098 SortedSet<String> representatives = new TreeSet<String>(); 099 100 try { 101 102 URL u = new URL(allUrl); 103 104 InputStream stream = URLConnectionTools.getInputStream(u, 60000); 105 106 if (stream != null) { 107 BufferedReader reader = new BufferedReader( 108 new InputStreamReader(stream)); 109 110 String line = null; 111 112 while ((line = reader.readLine()) != null) { 113 int index = line.lastIndexOf("structureId="); 114 if (index > 0) { 115 representatives.add(line.substring(index + 13, index + 17)); 116 } 117 } 118 } 119 120 } catch (Exception e) { 121 e.printStackTrace(); 122 } 123 124 return representatives; 125 } 126}