001/**
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Dec 7, 2013
021 * Created by Douglas Myers-Turnbull
022 *
023 */
024package org.biojava.nbio.structure.io.sifts;
025
026import org.biojava.nbio.structure.align.util.UserConfiguration;
027import org.biojava.nbio.core.sequence.io.util.IOUtils;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import java.io.*;
032import java.net.MalformedURLException;
033import java.net.URL;
034import java.util.Collection;
035import java.util.HashMap;
036import java.util.Map;
037import java.util.Map.Entry;
038import java.util.Set;
039import java.util.zip.GZIPInputStream;
040
041/**
042 * A mapping between UniProt entries and PDB chains.
043 * For example
044 * <pre>
045 * SiftsChainToUniprot sifts = SiftsChainToUniprot.load();
046 * SiftsChainEntry entry1 = sifts.getByUniProtId("P04585");
047 * System.out.println(entry1.getPdbId() + "." + entry1.getChainName()); // 1hiv.A
048 * System.out.println(entry1.getPdbStart() + "-" + entry1.getPdbStop()); // 1-99
049 * SiftsChainEntry entry2 = sifts.getByChainId("1hiv", "A");
050 * System.out.println(entry1.equals(entry2)); // true
051 * </pre>
052 * See SIFTS project documentation: https://www.ebi.ac.uk/pdbe/docs/sifts/
053 * @author dmyersturnbull
054 * @see SiftsChainEntry
055 * @since 3.0.7
056 */
057public class SiftsChainToUniprotMapping {
058
059        private final static Logger logger = LoggerFactory.getLogger(SiftsChainToUniprotMapping.class);
060
061
062        protected static File DEFAULT_FILE;
063
064        private static final String DEFAULT_FILENAME = "pdb_chain_uniprot.tsv";
065        private static final URL DEFAULT_URL;
066
067        static {
068                try {
069                        DEFAULT_URL = new URL("ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/tsv/pdb_chain_uniprot.tsv.gz");
070                } catch (MalformedURLException e) {
071                        throw new RuntimeException(e);
072                }
073        }
074
075        /**
076         * Loads the SIFTS mapping.
077         * Attempts to load the mapping file in the PDB cache directory.
078         * If the file does not exist or could not be parsed, downloads and stores a GZ-compressed file.
079         * @return
080         * @throws IOException If the local file could not be read and could not be downloaded
081         */
082        public static SiftsChainToUniprotMapping load() throws IOException {
083                return load(false);
084        }
085
086        /**
087         * Loads the SIFTS mapping.
088         * Attempts to load the mapping file in the PDB cache directory.
089         * If the file does not exist or could not be parsed, downloads and stores a GZ-compressed file.
090         * @param useOnlyLocal If true, will throw an IOException if the file needs to be downloaded
091         * @return
092         * @throws IOException If the local file could not be read and could not be downloaded (including if onlyLocal is true)
093         */
094        public static SiftsChainToUniprotMapping load(boolean useOnlyLocal) throws IOException {
095
096                UserConfiguration config = new UserConfiguration();
097                File cacheDir = new File(config.getCacheFilePath());
098
099                DEFAULT_FILE = new File(cacheDir, DEFAULT_FILENAME);
100
101
102                if (!DEFAULT_FILE.exists() || DEFAULT_FILE.length() == 0) {
103                        if (useOnlyLocal) throw new IOException(DEFAULT_FILE + " does not exist, and did not download");
104                        download();
105                }
106                try {
107                        return build();
108                } catch (IOException e) {
109                        logger.info("Caught IOException while reading {}. Error: {}",DEFAULT_FILE,e.getMessage());
110                        if (useOnlyLocal) throw new IOException(DEFAULT_FILE + " could not be read, and did not redownload");
111                        download();
112                        return build();
113                }
114        }
115
116        /**
117         * Builds the mapping by reading SIFTS the tsv file set in {@link #DEFAULT_FILE} variable.
118         * @return
119         * @throws IOException
120         */
121        protected static SiftsChainToUniprotMapping build() throws IOException {
122                SiftsChainToUniprotMapping sifts = new SiftsChainToUniprotMapping();
123                BufferedReader br = new BufferedReader(new FileReader(DEFAULT_FILE));
124                String line = "";
125                while ((line = br.readLine()) != null) {
126                        if (line.isEmpty() || line.startsWith("#") || line.startsWith("PDB")) continue;
127                        String[] parts = line.split("\t");
128                        String pdbId = parts[0];
129                        String chainId = parts[1];
130                        String uniProtId = parts[2];
131                        String seqresStart = parts[3];
132                        String seqresEnd = parts[4];
133                        String pdbStart = parts[5];
134                        String pdbEnd = parts[6];
135                        String uniprotStart = parts[7];
136                        String uniprotEnd = parts[8];
137                        SiftsChainEntry entry = new SiftsChainEntry(pdbId, chainId, uniProtId, seqresStart, seqresEnd,
138                                        pdbStart, pdbEnd, uniprotStart, uniprotEnd);
139                        sifts.byChainId.put(pdbId + "." + chainId, entry);
140                        sifts.byUniProtId.put(uniProtId, entry);
141                }
142                br.close();
143                return sifts;
144        }
145
146        private static void download() throws IOException {
147
148                logger.info("Downloading {} to {}",DEFAULT_URL.toString(),DEFAULT_FILE);
149
150                InputStream in = null;
151                OutputStream out = null;
152
153                in = new GZIPInputStream(DEFAULT_URL.openStream());
154                out = new FileOutputStream(DEFAULT_FILE);
155                IOUtils.copy(in, out);
156
157        }
158
159        private Map<String, SiftsChainEntry> byChainId = new HashMap<String, SiftsChainEntry>();
160
161        private Map<String, SiftsChainEntry> byUniProtId = new HashMap<String, SiftsChainEntry>();
162
163        private SiftsChainToUniprotMapping() {
164
165        }
166
167        public Set<Entry<String, SiftsChainEntry>> chainEntrySet() {
168                return byChainId.entrySet();
169        }
170
171        public boolean containsChainId(String pdbId, String chainId) {
172                return byChainId.containsKey(pdbId + "." + chainId);
173        }
174
175        public boolean containsUniProtId(String uniProtId) {
176                return byUniProtId.containsKey(uniProtId);
177        }
178
179        public SiftsChainEntry getByChainId(String pdbId, String chainId) {
180                return byChainId.get(pdbId + "." + chainId);
181        }
182
183        public SiftsChainEntry getByUniProtId(String uniProtId) {
184                return byUniProtId.get(uniProtId);
185        }
186
187        public Set<String> keySet() {
188                return byChainId.keySet();
189        }
190
191        /**
192         * Returns the number of mapped entries.
193         */
194        public int size() {
195                return byChainId.size();
196        }
197
198        public Set<Entry<String, SiftsChainEntry>> uniProtEntrySet() {
199                return byChainId.entrySet();
200        }
201
202        public Collection<SiftsChainEntry> values() {
203                return byChainId.values();
204        }
205}