001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This 005 * should be distributed with the code. If you do not have a copy, see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments. 010 * 011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page 012 * at: 013 * 014 * http://www.biojava.org/ 015 * 016 */ 017package org.biojava.nbio.structure.scop; 018 019import java.io.BufferedReader; 020import java.io.IOException; 021import java.io.InputStreamReader; 022import java.io.Reader; 023import java.lang.ref.SoftReference; 024import java.net.MalformedURLException; 025import java.net.URL; 026import java.util.*; 027import java.util.logging.Level; 028import java.util.logging.Logger; 029 030 031/** 032 * Provides programmatic access to ASTRAL representative sets. See the paper by <a 033 * href="http://scop.berkeley.edu/references/2004-nar-astral.pdf">Chandonia et. al.</a> for more information. Example: 034 * 035 * <pre> 036 * Set<String> astralSet = Astral.getRepresentatives(Astral.AstralSet.NINETY_FIVE_175B); 037 * </pre> 038 * 039 * This class uses a multiton pattern with soft references for caching. In short: the first time you call the above, it 040 * will fetch the data from ASTRAL; the second time will (probably) not have to; and the instances can still be 041 * garbage-collected if necessary (meaning they don't <em>require</em> heap memory). 042 * 043 * @author dmyerstu 044 * @since 3.0.6 045 */ 046public class Astral { 047 048 /** 049 * An ASTRAL sequence-identity cutoff with an identifier such as: 050 * 051 * <pre> 052 * 1.75A_95 053 * </pre> 054 * 055 * Also contains a URL pointing to a FASTA file containing the representatives. Every character before the first 056 * whitespace character of each header in the FASTA file is expected to be a representative's name. 057 * 058 * @author dmyersturnbull 059 * 060 */ 061 public static enum AstralSet { 062 FORTY_175("1.75_40", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-40-1.75.fa"), 063 NINETY_FIVE_175("1.75_95", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-95-1.75.fa"), 064 FORTY_175A("1.75A_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"), 065 NINETY_FIVE_175A("1.75A_95","http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"), 066 FORTY_175B("1.75B_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"), 067 NINETY_FIVE_175B("1.75B_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"), 068 FORTY_201("2.01_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"), 069 NINETY_FIVE_201("2.01_95", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"), 070 FORTY_202("2.02_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"), 071 NINETY_FIVE_202("2.02_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"), 072 FORTY_203("2.03_40", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-40-2.03.fa"), 073 NINETY_FIVE_203("2.03_95", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-95-2.03.fa"); 074 private String id; 075 private String url; 076 077 public static AstralSet parse(String str) { 078 for (AstralSet c : AstralSet.class.getEnumConstants()) { 079 if (c.getId().equals(str)) return c; 080 } 081 throw new IllegalArgumentException("No ASTRAL set with id " + str); 082 } 083 084 AstralSet(String id, String url) { 085 this.url = url; 086 this.id = id; 087 } 088 089 public String getId() { 090 return id; 091 } 092 093 public String getUrl() { 094 return url; 095 } 096 097 @Override 098 public String toString() { 099 return id; 100 } 101 } 102 103 private static Map<String, SoftReference<Astral>> instances = new HashMap<String, SoftReference<Astral>>(); 104 105 private static final Logger logger = Logger.getLogger(Astral.class.getName()); 106 107 private Set<String> names; 108 private LinkedHashMap<Integer,String> failedLines; 109 110 /** 111 * Get a list of representatives' names for the specified ASTRAL cutoff. 112 */ 113 public static Set<String> getRepresentatives(AstralSet cutoff) { 114 if (instances.containsKey(cutoff.getId()) && instances.get(cutoff.getId()).get() != null) { 115 return instances.get(cutoff.getId()).get().getNames(); 116 } 117 Astral astral = new Astral(cutoff); 118 instances.put(cutoff.getId(), new SoftReference<Astral>(astral)); 119 return astral.getNames(); 120 } 121 122 /** 123 * Get a list of representatives' names for the specified ASTRAL cutoff. 124 * @param id An ASTRAL Id, such as 1.75A_95. 125 */ 126 public static Set<String> getRepresentatives(String id) { 127 return getRepresentatives(AstralSet.parse(id)); 128 } 129 130 /** 131 * Constructs a new Astral object. Generally, client code should prefer calling 132 * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not 133 * included in {@link #Astral(AstralSet)} is required. 134 * 135 * @param cutoff 136 * The ASTRAL sequence-identity cutoff required 137 * @throws RuntimeException 138 * If the Astral set could not be parsed or accessed for any reason 139 */ 140 public Astral(AstralSet cutoff) { 141 URL url; 142 try { 143 url = new URL(cutoff.getUrl()); 144 } catch (MalformedURLException e) { 145 throw new RuntimeException("The URL was invalid!", e); 146 } 147 Reader reader; 148 try { 149 reader = new InputStreamReader(url.openStream()); 150 } catch (IOException e) { 151 throw new RuntimeException("Couldn't open stream to URL " + url, e); 152 } 153 init(reader); 154 } 155 156 /** 157 * Constructs a new Astral object. Generally, client code should prefer calling 158 * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not 159 * included in {@link #Astral(AstralSet)} is required. 160 * 161 * @throws RuntimeException 162 * If the Astral set could not be parsed or accessed for any reason 163 */ 164 public Astral(String id, URL url) { 165 Reader reader; 166 try { 167 reader = new InputStreamReader(url.openStream()); 168 } catch (IOException e) { 169 throw new RuntimeException("Couldn't open stream to URL " + url, e); 170 } 171 init(reader); 172 } 173 174 /** 175 * Constructs a new Astral object. Generally, client code should prefer calling 176 * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not 177 * included in {@link #Astral(AstralSet)} is required. 178 * 179 * @throws RuntimeException 180 * If the Astral set could not be parsed or accessed for any reason 181 */ 182 public Astral(String id, Reader reader) { 183 init(reader); 184 } 185 186 /** 187 * @return The names of representatives in this ASTRAL set. 188 */ 189 public Set<String> getNames() { 190 return names; 191 } 192 193 /** 194 * Gets a map describing lines read in the file that weren't understood. 195 * @return A LinkedHashMap mapping line numbers of failures to the lines themselves 196 */ 197 public LinkedHashMap<Integer, String> getFailedLines() { 198 return failedLines; 199 } 200 201 /** 202 * Parses the FASTA file opened by reader. 203 */ 204 private void init(Reader reader) { 205 names = new TreeSet<String>(); 206 failedLines = new LinkedHashMap<Integer,String>(); 207 208 BufferedReader br = null; 209 210 try { 211 212 br = new BufferedReader(reader); 213 214 logger.info("Reading ASTRAL file..."); 215 216 String line = ""; 217 int i = 0; 218 while ((line = br.readLine()) != null) { 219 if (line.startsWith(">")) { 220 try { 221 String scopId = line.split("\\s")[0].substring(1); 222 names.add(scopId); 223 if (i % 1000 == 0) { 224 logger.log(Level.FINE,"Reading ASTRAL line for " + scopId); 225 } 226 i++; 227 } catch (RuntimeException e) { 228 failedLines.put(i, line); 229 logger.log(Level.WARNING,"Couldn't read line " + line, e); 230 } 231 } 232 } 233 234 br.close(); 235 236 } catch (IOException e) { 237 throw new RuntimeException("Couldn't read the input stream ", e); 238 } finally { 239 if (br != null) { 240 try { 241 br.close(); 242 } catch (IOException e) { 243 logger.log(Level.WARNING,"Could not close stream", e); 244 } 245 } 246 } 247 248 } 249 250}