001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This 005 * should be distributed with the code. If you do not have a copy, see: 006 * 007 * http://www.gnu.org/copyleft/lesser.html 008 * 009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments. 010 * 011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page 012 * at: 013 * 014 * http://www.biojava.org/ 015 * 016 */ 017package org.biojava.nbio.structure.scop; 018 019import org.slf4j.Logger; 020import org.slf4j.LoggerFactory; 021 022import java.io.BufferedReader; 023import java.io.IOException; 024import java.io.InputStreamReader; 025import java.io.Reader; 026import java.lang.ref.SoftReference; 027import java.net.MalformedURLException; 028import java.net.URL; 029import java.util.*; 030 031 032/** 033 * Provides programmatic access to ASTRAL representative sets. See the paper by <a 034 * href="http://scop.berkeley.edu/references/2004-nar-astral.pdf">Chandonia et. al.</a> for more information. Example: 035 * 036 * <pre> 037 * Set<String> astralSet = Astral.getRepresentatives(Astral.AstralSet.NINETY_FIVE_175B); 038 * </pre> 039 * 040 * This class uses a multiton pattern with soft references for caching. In short: the first time you call the above, it 041 * will fetch the data from ASTRAL; the second time will (probably) not have to; and the instances can still be 042 * garbage-collected if necessary (meaning they don't <em>require</em> heap memory). 043 * 044 * @author dmyerstu 045 * @since 3.0.6 046 */ 047public class Astral { 048 049 /** 050 * An ASTRAL sequence-identity cutoff with an identifier such as: 051 * 052 * <pre> 053 * 1.75A_95 054 * </pre> 055 * 056 * Also contains a URL pointing to a FASTA file containing the representatives. Every character before the first 057 * whitespace character of each header in the FASTA file is expected to be a representative's name. 058 * 059 * @author dmyersturnbull 060 * 061 */ 062 public static enum AstralSet { 063 FORTY_175("1.75_40", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-40-1.75.fa"), 064 NINETY_FIVE_175("1.75_95", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-95-1.75.fa"), 065 FORTY_175A("1.75A_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"), 066 NINETY_FIVE_175A("1.75A_95","http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"), 067 FORTY_175B("1.75B_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"), 068 NINETY_FIVE_175B("1.75B_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"), 069 FORTY_201("2.01_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"), 070 NINETY_FIVE_201("2.01_95", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"), 071 FORTY_202("2.02_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"), 072 NINETY_FIVE_202("2.02_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"), 073 FORTY_203("2.03_40", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-40-2.03.fa"), 074 NINETY_FIVE_203("2.03_95", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-95-2.03.fa"); 075 private String id; 076 private String url; 077 078 public static AstralSet parse(String str) { 079 for (AstralSet c : AstralSet.class.getEnumConstants()) { 080 if (c.getId().equals(str)) return c; 081 } 082 throw new IllegalArgumentException("No ASTRAL set with id " + str); 083 } 084 085 AstralSet(String id, String url) { 086 this.url = url; 087 this.id = id; 088 } 089 090 public String getId() { 091 return id; 092 } 093 094 public String getUrl() { 095 return url; 096 } 097 098 @Override 099 public String toString() { 100 return id; 101 } 102 } 103 104 private static Map<String, SoftReference<Astral>> instances = new HashMap<>(); 105 106 private static final Logger logger = LoggerFactory.getLogger(Astral.class); 107 108 private Set<String> names; 109 private LinkedHashMap<Integer,String> failedLines; 110 111 /** 112 * Get a list of representatives' names for the specified ASTRAL cutoff. 113 */ 114 public static Set<String> getRepresentatives(AstralSet cutoff) { 115 if (instances.containsKey(cutoff.getId()) && instances.get(cutoff.getId()).get() != null) { 116 return instances.get(cutoff.getId()).get().getNames(); 117 } 118 Astral astral = new Astral(cutoff); 119 instances.put(cutoff.getId(), new SoftReference<Astral>(astral)); 120 return astral.getNames(); 121 } 122 123 /** 124 * Get a list of representatives' names for the specified ASTRAL cutoff. 125 * @param id An ASTRAL Id, such as 1.75A_95. 126 */ 127 public static Set<String> getRepresentatives(String id) { 128 return getRepresentatives(AstralSet.parse(id)); 129 } 130 131 /** 132 * Constructs a new Astral object. Generally, client code should prefer calling 133 * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not 134 * included in {@link #Astral(AstralSet)} is required. 135 * 136 * @param cutoff 137 * The ASTRAL sequence-identity cutoff required 138 * @throws RuntimeException 139 * If the Astral set could not be parsed or accessed for any reason 140 */ 141 public Astral(AstralSet cutoff) { 142 URL url; 143 try { 144 url = new URL(cutoff.getUrl()); 145 } catch (MalformedURLException e) { 146 throw new RuntimeException("The URL was invalid!", e); 147 } 148 Reader reader; 149 try { 150 reader = new InputStreamReader(url.openStream()); 151 } catch (IOException e) { 152 throw new RuntimeException("Couldn't open stream to URL " + url, e); 153 } 154 init(reader); 155 } 156 157 /** 158 * Constructs a new Astral object. Generally, client code should prefer calling 159 * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not 160 * included in {@link #Astral(AstralSet)} is required. 161 * 162 * @throws RuntimeException 163 * If the Astral set could not be parsed or accessed for any reason 164 */ 165 public Astral(String id, URL url) { 166 Reader reader; 167 try { 168 reader = new InputStreamReader(url.openStream()); 169 } catch (IOException e) { 170 throw new RuntimeException("Couldn't open stream to URL " + url, e); 171 } 172 init(reader); 173 } 174 175 /** 176 * Constructs a new Astral object. Generally, client code should prefer calling 177 * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not 178 * included in {@link #Astral(AstralSet)} is required. 179 * 180 * @throws RuntimeException 181 * If the Astral set could not be parsed or accessed for any reason 182 */ 183 public Astral(String id, Reader reader) { 184 init(reader); 185 } 186 187 /** 188 * @return The names of representatives in this ASTRAL set. 189 */ 190 public Set<String> getNames() { 191 return names; 192 } 193 194 /** 195 * Gets a map describing lines read in the file that weren't understood. 196 * @return A LinkedHashMap mapping line numbers of failures to the lines themselves 197 */ 198 public LinkedHashMap<Integer, String> getFailedLines() { 199 return failedLines; 200 } 201 202 /** 203 * Parses the FASTA file opened by reader. 204 */ 205 private void init(Reader reader) { 206 names = new TreeSet<>(); 207 failedLines = new LinkedHashMap<>(); 208 209 BufferedReader br = null; 210 211 try { 212 213 br = new BufferedReader(reader); 214 215 logger.info("Reading ASTRAL file..."); 216 217 String line = ""; 218 int i = 0; 219 while ((line = br.readLine()) != null) { 220 if (line.startsWith(">")) { 221 try { 222 String scopId = line.split("\\s")[0].substring(1); 223 names.add(scopId); 224 if (i % 1000 == 0) { 225 logger.debug("Reading ASTRAL line for {}", scopId); 226 } 227 i++; 228 } catch (RuntimeException e) { 229 failedLines.put(i, line); 230 logger.warn("Couldn't read line " + line, e); 231 } 232 } 233 } 234 235 br.close(); 236 237 } catch (IOException e) { 238 throw new RuntimeException("Couldn't read the input stream ", e); 239 } finally { 240 if (br != null) { 241 try { 242 br.close(); 243 } catch (IOException e) { 244 logger.warn("Could not close stream", e); 245 } 246 } 247 } 248 249 } 250 251}