001/**
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This
005 * should be distributed with the code. If you do not have a copy, see:
006 *
007 * http://www.gnu.org/copyleft/lesser.html
008 *
009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments.
010 *
011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page
012 * at:
013 *
014 * http://www.biojava.org/
015 *
016 */
017package org.biojava.nbio.structure.scop;
018
019import org.slf4j.Logger;
020import org.slf4j.LoggerFactory;
021
022import java.io.BufferedReader;
023import java.io.IOException;
024import java.io.InputStreamReader;
025import java.io.Reader;
026import java.lang.ref.SoftReference;
027import java.net.MalformedURLException;
028import java.net.URL;
029import java.util.*;
030
031
032/**
033 * Provides programmatic access to ASTRAL representative sets. See the paper by <a
034 * href="http://scop.berkeley.edu/references/2004-nar-astral.pdf">Chandonia et. al.</a> for more information. Example:
035 *
036 * <pre>
037 * Set&lt;String&gt; astralSet = Astral.getRepresentatives(Astral.AstralSet.NINETY_FIVE_175B);
038 * </pre>
039 *
040 * This class uses a multiton pattern with soft references for caching. In short: the first time you call the above, it
041 * will fetch the data from ASTRAL; the second time will (probably) not have to; and the instances can still be
042 * garbage-collected if necessary (meaning they don't <em>require</em> heap memory).
043 *
044 * @author dmyerstu
045 * @since 3.0.6
046 */
047public class Astral {
048
049        /**
050         * An ASTRAL sequence-identity cutoff with an identifier such as:
051         *
052         * <pre>
053         * 1.75A_95
054         * </pre>
055         *
056         * Also contains a URL pointing to a FASTA file containing the representatives. Every character before the first
057         * whitespace character of each header in the FASTA file is expected to be a representative's name.
058         *
059         * @author dmyersturnbull
060         *
061         */
062        public static enum AstralSet {
063                FORTY_175("1.75_40", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-40-1.75.fa"),
064                NINETY_FIVE_175("1.75_95", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-95-1.75.fa"),
065                FORTY_175A("1.75A_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"),
066                NINETY_FIVE_175A("1.75A_95","http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"),
067                FORTY_175B("1.75B_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"),
068                NINETY_FIVE_175B("1.75B_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"),
069                FORTY_201("2.01_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"),
070                NINETY_FIVE_201("2.01_95", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"),
071                FORTY_202("2.02_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"),
072                NINETY_FIVE_202("2.02_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"),
073                FORTY_203("2.03_40", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-40-2.03.fa"),
074                NINETY_FIVE_203("2.03_95", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-95-2.03.fa");
075                private String id;
076                private String url;
077
078                public static AstralSet parse(String str) {
079                        for (AstralSet c : AstralSet.class.getEnumConstants()) {
080                                if (c.getId().equals(str)) return c;
081                        }
082                        throw new IllegalArgumentException("No ASTRAL set with id " + str);
083                }
084
085                AstralSet(String id, String url) {
086                        this.url = url;
087                        this.id = id;
088                }
089
090                public String getId() {
091                        return id;
092                }
093
094                public String getUrl() {
095                        return url;
096                }
097
098                @Override
099                public String toString() {
100                        return id;
101                }
102        }
103
104        private static Map<String, SoftReference<Astral>> instances = new HashMap<>();
105
106        private static final Logger logger = LoggerFactory.getLogger(Astral.class);
107
108        private Set<String> names;
109        private LinkedHashMap<Integer,String> failedLines;
110
111        /**
112         * Get a list of representatives' names for the specified ASTRAL cutoff.
113         */
114        public static Set<String> getRepresentatives(AstralSet cutoff) {
115                if (instances.containsKey(cutoff.getId()) && instances.get(cutoff.getId()).get() != null) {
116                        return instances.get(cutoff.getId()).get().getNames();
117                }
118                Astral astral = new Astral(cutoff);
119                instances.put(cutoff.getId(), new SoftReference<Astral>(astral));
120                return astral.getNames();
121        }
122
123        /**
124         * Get a list of representatives' names for the specified ASTRAL cutoff.
125         * @param id An ASTRAL Id, such as 1.75A_95.
126         */
127        public static Set<String> getRepresentatives(String id) {
128                return getRepresentatives(AstralSet.parse(id));
129        }
130
131        /**
132         * Constructs a new Astral object. Generally, client code should prefer calling
133         * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not
134         * included in {@link #Astral(AstralSet)} is required.
135         *
136         * @param cutoff
137         *            The ASTRAL sequence-identity cutoff required
138         * @throws RuntimeException
139         *             If the Astral set could not be parsed or accessed for any reason
140         */
141        public Astral(AstralSet cutoff) {
142                URL url;
143                try {
144                        url = new URL(cutoff.getUrl());
145                } catch (MalformedURLException e) {
146                        throw new RuntimeException("The URL was invalid!", e);
147                }
148                Reader reader;
149                try {
150                        reader = new InputStreamReader(url.openStream());
151                } catch (IOException e) {
152                        throw new RuntimeException("Couldn't open stream to URL " + url, e);
153                }
154                init(reader);
155        }
156
157        /**
158         * Constructs a new Astral object. Generally, client code should prefer calling
159         * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not
160         * included in {@link #Astral(AstralSet)} is required.
161         *
162         * @throws RuntimeException
163         *             If the Astral set could not be parsed or accessed for any reason
164         */
165        public Astral(String id, URL url) {
166                Reader reader;
167                try {
168                        reader = new InputStreamReader(url.openStream());
169                } catch (IOException e) {
170                        throw new RuntimeException("Couldn't open stream to URL " + url, e);
171                }
172                init(reader);
173        }
174
175        /**
176         * Constructs a new Astral object. Generally, client code should prefer calling
177         * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not
178         * included in {@link #Astral(AstralSet)} is required.
179         *
180         * @throws RuntimeException
181         *             If the Astral set could not be parsed or accessed for any reason
182         */
183        public Astral(String id, Reader reader) {
184                init(reader);
185        }
186
187        /**
188         * @return The names of representatives in this ASTRAL set.
189         */
190        public Set<String> getNames() {
191                return names;
192        }
193
194        /**
195         * Gets a map describing lines read in the file that weren't understood.
196         * @return A LinkedHashMap mapping line numbers of failures to the lines themselves
197         */
198        public LinkedHashMap<Integer, String> getFailedLines() {
199                return failedLines;
200        }
201
202        /**
203         * Parses the FASTA file opened by reader.
204         */
205        private void init(Reader reader) {
206                names = new TreeSet<>();
207                failedLines = new LinkedHashMap<>();
208
209                BufferedReader br = null;
210
211                try {
212
213                        br = new BufferedReader(reader);
214
215                        logger.info("Reading ASTRAL file...");
216
217                        String line = "";
218                        int i = 0;
219                        while ((line = br.readLine()) != null) {
220                                if (line.startsWith(">")) {
221                                        try {
222                                                String scopId = line.split("\\s")[0].substring(1);
223                                                names.add(scopId);
224                                                if (i % 1000 == 0) {
225                                                        logger.debug("Reading ASTRAL line for {}", scopId);
226                                                }
227                                                i++;
228                                        } catch (RuntimeException e) {
229                                                failedLines.put(i, line);
230                                                logger.warn("Couldn't read line " + line, e);
231                                        }
232                                }
233                        }
234
235                        br.close();
236
237                } catch (IOException e) {
238                        throw new RuntimeException("Couldn't read the input stream ", e);
239                } finally {
240                        if (br != null) {
241                                try {
242                                        br.close();
243                                } catch (IOException e) {
244                                        logger.warn("Could not close stream", e);
245                                }
246                        }
247                }
248
249        }
250
251}