001/**
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Dec 7, 2013
021 * Created by Douglas Myers-Turnbull
022 *
023 */
024package org.biojava.nbio.structure.io.sifts;
025
026import org.biojava.nbio.structure.align.util.UserConfiguration;
027import org.biojava.nbio.core.sequence.io.util.IOUtils;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.io.*;
032import java.net.MalformedURLException;
033import java.net.URL;
034import java.util.Collection;
035import java.util.HashMap;
036import java.util.Map;
037import java.util.Map.Entry;
038import java.util.Set;
039import java.util.zip.GZIPInputStream;
040
041/**
042 * A mapping between UniProt entries and PDB chains.
043 * For example
044 * <pre>
045 * SiftsChainToUniprot sifts = SiftsChainToUniprot.load();
046 * SiftsChainEntry entry1 = sifts.getByUniProtId("P04585");
047 * System.out.println(entry1.getPdbId() + "." + entry1.getChainId()); // 1hiv.A
048 * System.out.println(entry1.getPdbStart() + "-" + entry1.getPdbStop()); // 1-99
049 * SiftsChainEntry entry2 = sifts.getByChainId("1hiv", "A");
050 * System.out.println(entry1.equals(entry2)); // true
051 * </pre>
052 *
053 * @author dmyersturnbull
054 * @see SiftsChainEntry
055 * @since 3.0.7
056 */
057public class SiftsChainToUniprotMapping {
058
059        private final static Logger logger = LoggerFactory.getLogger(SiftsChainToUniprotMapping.class);
060
061
062        private static File DEFAULT_FILE;
063
064        private static final String DEFAULT_FILENAME = "pdb_chain_uniprot.tsv";
065        private static final URL DEFAULT_URL;
066
067        static {
068                try {
069                        DEFAULT_URL = new URL("ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/tsv/pdb_chain_uniprot.tsv.gz");
070                } catch (MalformedURLException e) {
071                        throw new RuntimeException(e);
072                }
073        }
074
075        /**
076         * Loads the SIFTS mapping.
077         * Attempts to load the mapping file in the PDB cache directory.
078         * If the file does not exist or could not be parsed, downloads and stores a GZ-compressed file.
079         * @return
080         * @throws IOException If the local file could not be read and could not be downloaded
081         */
082        public static SiftsChainToUniprotMapping load() throws IOException {
083                return load(false);
084        }
085
086        /**
087         * Loads the SIFTS mapping.
088         * Attempts to load the mapping file in the PDB cache directory.
089         * If the file does not exist or could not be parsed, downloads and stores a GZ-compressed file.
090         * @param useOnlyLocal If true, will throw an IOException if the file needs to be downloaded
091         * @return
092         * @throws IOException If the local file could not be read and could not be downloaded (including if onlyLocal is true)
093         */
094        public static SiftsChainToUniprotMapping load(boolean useOnlyLocal) throws IOException {
095
096                UserConfiguration config = new UserConfiguration();
097                File cacheDir = new File(config.getCacheFilePath());
098
099                DEFAULT_FILE = new File(cacheDir, DEFAULT_FILENAME);
100
101
102                if (!DEFAULT_FILE.exists() || DEFAULT_FILE.length() == 0) {
103                        if (useOnlyLocal) throw new IOException(DEFAULT_FILE + " does not exist, and did not download");
104                        download();
105                }
106                try {
107                        return build();
108                } catch (IOException e) {
109                        logger.info("Caught IOException while reading {}. Error: {}",DEFAULT_FILE,e.getMessage());
110                        if (useOnlyLocal) throw new IOException(DEFAULT_FILE + " could not be read, and did not redownload");
111                        download();
112                        return build();
113                }
114        }
115
116        private static SiftsChainToUniprotMapping build() throws IOException {
117                SiftsChainToUniprotMapping sifts = new SiftsChainToUniprotMapping();
118                BufferedReader br = new BufferedReader(new FileReader(DEFAULT_FILE));
119                String line = "";
120                while ((line = br.readLine()) != null) {
121                        if (line.isEmpty() || line.startsWith("#") || line.startsWith("PDB")) continue;
122                        String[] parts = line.split("\t");
123                        String pdbId = parts[0];
124                        String chainId = parts[1];
125                        String uniProtId = parts[2];
126                        String seqresStart = parts[3];
127                        String seqresEnd = parts[4];
128                        String pdbStart = parts[5];
129                        String pdbEnd = parts[6];
130                        String uniprotStart = parts[7];
131                        String uniprotEnd = parts[8];
132                        SiftsChainEntry entry = new SiftsChainEntry(pdbId, chainId, uniProtId, seqresStart, seqresEnd,
133                                        pdbStart, pdbEnd, uniprotStart, uniprotEnd);
134                        sifts.byChainId.put(pdbId + "." + chainId, entry);
135                        sifts.byUniProtId.put(uniProtId, entry);
136                }
137                br.close();
138                return sifts;
139        }
140
141        private static void download() throws IOException {
142
143                logger.info("Downloading {} to {}",DEFAULT_URL.toString(),DEFAULT_FILE);
144
145                InputStream in = null;
146                OutputStream out = null;
147
148                in = new GZIPInputStream(DEFAULT_URL.openStream());
149                out = new FileOutputStream(DEFAULT_FILE);
150                IOUtils.copy(in, out);
151
152        }
153
154        private Map<String, SiftsChainEntry> byChainId = new HashMap<String, SiftsChainEntry>();
155
156        private Map<String, SiftsChainEntry> byUniProtId = new HashMap<String, SiftsChainEntry>();
157
158        private SiftsChainToUniprotMapping() {
159
160        }
161
162        public Set<Entry<String, SiftsChainEntry>> chainEntrySet() {
163                return byChainId.entrySet();
164        }
165
166        public boolean containsChainId(String pdbId, String chainId) {
167                return byChainId.containsKey(pdbId + "." + chainId);
168        }
169
170        public boolean containsUniProtId(String uniProtId) {
171                return byUniProtId.containsKey(uniProtId);
172        }
173
174        public SiftsChainEntry getByChainId(String pdbId, String chainId) {
175                return byChainId.get(pdbId + "." + chainId);
176        }
177
178        public SiftsChainEntry getByUniProtId(String uniProtId) {
179                return byUniProtId.get(uniProtId);
180        }
181
182        public Set<String> keySet() {
183                return byChainId.keySet();
184        }
185
186        /**
187         * Returns the number of mapped entries.
188         */
189        public int size() {
190                return byChainId.size();
191        }
192
193        public Set<Entry<String, SiftsChainEntry>> uniProtEntrySet() {
194                return byChainId.entrySet();
195        }
196
197        public Collection<SiftsChainEntry> values() {
198                return byChainId.values();
199        }
200}