001/**
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the terms of the GNU Lesser General Public Licence. This
005 * should be distributed with the code. If you do not have a copy, see:
006 *
007 * http://www.gnu.org/copyleft/lesser.html
008 *
009 * Copyright for this code is held jointly by the individual authors. These should be listed in @author doc comments.
010 *
011 * For more information on the BioJava project and its aims, or to join the biojava-l mailing list, visit the home page
012 * at:
013 *
014 * http://www.biojava.org/
015 *
016 */
017package org.biojava.nbio.structure.scop;
018
019import java.io.BufferedReader;
020import java.io.IOException;
021import java.io.InputStreamReader;
022import java.io.Reader;
023import java.lang.ref.SoftReference;
024import java.net.MalformedURLException;
025import java.net.URL;
026import java.util.*;
027import java.util.logging.Level;
028import java.util.logging.Logger;
029
030
031/**
032 * Provides programmatic access to ASTRAL representative sets. See the paper by <a
033 * href="http://scop.berkeley.edu/references/2004-nar-astral.pdf">Chandonia et. al.</a> for more information. Example:
034 *
035 * <pre>
036 * Set&lt;String&gt; astralSet = Astral.getRepresentatives(Astral.AstralSet.NINETY_FIVE_175B);
037 * </pre>
038 *
039 * This class uses a multiton pattern with soft references for caching. In short: the first time you call the above, it
040 * will fetch the data from ASTRAL; the second time will (probably) not have to; and the instances can still be
041 * garbage-collected if necessary (meaning they don't <em>require</em> heap memory).
042 *
043 * @author dmyerstu
044 * @since 3.0.6
045 */
046public class Astral {
047
048        /**
049         * An ASTRAL sequence-identity cutoff with an identifier such as:
050         *
051         * <pre>
052         * 1.75A_95
053         * </pre>
054         *
055         * Also contains a URL pointing to a FASTA file containing the representatives. Every character before the first
056         * whitespace character of each header in the FASTA file is expected to be a representative's name.
057         *
058         * @author dmyersturnbull
059         *
060         */
061        public static enum AstralSet {
062                FORTY_175("1.75_40", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-40-1.75.fa"),
063                NINETY_FIVE_175("1.75_95", "http://scop.berkeley.edu/downloads/scopseq-1.75/astral-scopdom-seqres-gd-sel-gs-bib-95-1.75.fa"),
064                FORTY_175A("1.75A_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"),
065                NINETY_FIVE_175A("1.75A_95","http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"),
066                FORTY_175B("1.75B_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"),
067                NINETY_FIVE_175B("1.75B_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"),
068                FORTY_201("2.01_40", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-40-2.01.fa"),
069                NINETY_FIVE_201("2.01_95", "http://scop.berkeley.edu/downloads/scopeseq-2.01/astral-scopedom-seqres-gd-sel-gs-bib-95-2.01.fa"),
070                FORTY_202("2.02_40", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-40-2.02.fa"),
071                NINETY_FIVE_202("2.02_95", "http://scop.berkeley.edu/downloads/scopeseq-2.02/astral-scopedom-seqres-gd-sel-gs-bib-95-2.02.fa"),
072                FORTY_203("2.03_40", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-40-2.03.fa"),
073                NINETY_FIVE_203("2.03_95", "http://scop.berkeley.edu/downloads/scopeseq-2.03/astral-scopedom-seqres-gd-sel-gs-bib-95-2.03.fa");
074                private String id;
075                private String url;
076
077                public static AstralSet parse(String str) {
078                        for (AstralSet c : AstralSet.class.getEnumConstants()) {
079                                if (c.getId().equals(str)) return c;
080                        }
081                        throw new IllegalArgumentException("No ASTRAL set with id " + str);
082                }
083
084                AstralSet(String id, String url) {
085                        this.url = url;
086                        this.id = id;
087                }
088
089                public String getId() {
090                        return id;
091                }
092
093                public String getUrl() {
094                        return url;
095                }
096
097                @Override
098                public String toString() {
099                        return id;
100                }
101        }
102
103        private static Map<String, SoftReference<Astral>> instances = new HashMap<String, SoftReference<Astral>>();
104
105        private static final Logger logger = Logger.getLogger(Astral.class.getName());
106
107        private Set<String> names;
108        private LinkedHashMap<Integer,String> failedLines;
109
110        /**
111         * Get a list of representatives' names for the specified ASTRAL cutoff.
112         */
113        public static Set<String> getRepresentatives(AstralSet cutoff) {
114                if (instances.containsKey(cutoff.getId()) && instances.get(cutoff.getId()).get() != null) {
115                        return instances.get(cutoff.getId()).get().getNames();
116                }
117                Astral astral = new Astral(cutoff);
118                instances.put(cutoff.getId(), new SoftReference<Astral>(astral));
119                return astral.getNames();
120        }
121
122        /**
123         * Get a list of representatives' names for the specified ASTRAL cutoff.
124         * @param id An ASTRAL Id, such as 1.75A_95.
125         */
126        public static Set<String> getRepresentatives(String id) {
127                return getRepresentatives(AstralSet.parse(id));
128        }
129
130        /**
131         * Constructs a new Astral object. Generally, client code should prefer calling
132         * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not
133         * included in {@link #Astral(AstralSet)} is required.
134         *
135         * @param cutoff
136         *            The ASTRAL sequence-identity cutoff required
137         * @throws RuntimeException
138         *             If the Astral set could not be parsed or accessed for any reason
139         */
140        public Astral(AstralSet cutoff) {
141                URL url;
142                try {
143                        url = new URL(cutoff.getUrl());
144                } catch (MalformedURLException e) {
145                        throw new RuntimeException("The URL was invalid!", e);
146                }
147                Reader reader;
148                try {
149                        reader = new InputStreamReader(url.openStream());
150                } catch (IOException e) {
151                        throw new RuntimeException("Couldn't open stream to URL " + url, e);
152                }
153                init(reader);
154        }
155
156        /**
157         * Constructs a new Astral object. Generally, client code should prefer calling
158         * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not
159         * included in {@link #Astral(AstralSet)} is required.
160         *
161         * @throws RuntimeException
162         *             If the Astral set could not be parsed or accessed for any reason
163         */
164        public Astral(String id, URL url) {
165                Reader reader;
166                try {
167                        reader = new InputStreamReader(url.openStream());
168                } catch (IOException e) {
169                        throw new RuntimeException("Couldn't open stream to URL " + url, e);
170                }
171                init(reader);
172        }
173
174        /**
175         * Constructs a new Astral object. Generally, client code should prefer calling
176         * {@link #getRepresentatives(AstralSet)} instead. This constructor should only be used when an ASTRAL set not
177         * included in {@link #Astral(AstralSet)} is required.
178         *
179         * @throws RuntimeException
180         *             If the Astral set could not be parsed or accessed for any reason
181         */
182        public Astral(String id, Reader reader) {
183                init(reader);
184        }
185
186        /**
187         * @return The names of representatives in this ASTRAL set.
188         */
189        public Set<String> getNames() {
190                return names;
191        }
192
193        /**
194         * Gets a map describing lines read in the file that weren't understood.
195         * @return A LinkedHashMap mapping line numbers of failures to the lines themselves
196         */
197        public LinkedHashMap<Integer, String> getFailedLines() {
198                return failedLines;
199        }
200
201        /**
202         * Parses the FASTA file opened by reader.
203         */
204        private void init(Reader reader) {
205                names = new TreeSet<String>();
206                failedLines = new LinkedHashMap<Integer,String>();
207
208                BufferedReader br = null;
209
210                try {
211
212                        br = new BufferedReader(reader);
213
214                        logger.info("Reading ASTRAL file...");
215
216                        String line = "";
217                        int i = 0;
218                        while ((line = br.readLine()) != null) {
219                                if (line.startsWith(">")) {
220                                        try {
221                                                String scopId = line.split("\\s")[0].substring(1);
222                                                names.add(scopId);
223                                                if (i % 1000 == 0) {
224                                                        logger.log(Level.FINE,"Reading ASTRAL line for " + scopId);
225                                                }
226                                                i++;
227                                        } catch (RuntimeException e) {
228                                                failedLines.put(i, line);
229                                                logger.log(Level.WARNING,"Couldn't read line " + line, e);
230                                        }
231                                }
232                        }
233
234                        br.close();
235
236                } catch (IOException e) {
237                        throw new RuntimeException("Couldn't read the input stream ", e);
238                } finally {
239                        if (br != null) {
240                                try {
241                                        br.close();
242                                } catch (IOException e) {
243                                        logger.log(Level.WARNING,"Could not close stream", e);
244                                }
245                        }
246                }
247
248        }
249
250}