001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * @author Karl Nicholas <github:karlnicholas>
015 *
016 * For more information on the BioJava project and its aims,
017 * or to join the biojava-l mailing list, visit the home page
018 * at:
019 *
020 *      http://www.biojava.org/
021 *
022 * Created on 08-08-2013
023 *
024 */
025package org.biojava.nbio.core.sequence.loader;
026
027import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
028import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
029import org.biojava.nbio.core.sequence.features.*;
030import org.biojava.nbio.core.sequence.io.GenbankSequenceParser;
031import org.biojava.nbio.core.sequence.io.GenericGenbankHeaderParser;
032import org.biojava.nbio.core.sequence.template.AbstractSequence;
033import org.biojava.nbio.core.sequence.template.Compound;
034import org.biojava.nbio.core.sequence.template.CompoundSet;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import java.io.*;
039import java.net.URL;
040import java.net.URLConnection;
041import java.util.List;
042import java.util.Map;
043
044/**
045 * @author Karl Nicholas <github:karlnicholas>
046 * @author Jacek Grzebyta <github:jgrzebyta>
047 */
048public class GenbankProxySequenceReader<C extends Compound> extends StringProxySequenceReader<C> implements FeaturesKeyWordInterface, DatabaseReferenceInterface, FeatureRetriever {
049
050        private static final Logger logger = LoggerFactory.getLogger(GenbankProxySequenceReader.class);
051
052        private static final String eutilBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; //
053        private String genbankDirectoryCache = null;
054        private GenbankSequenceParser<AbstractSequence<C>, C> genbankParser;
055        private GenericGenbankHeaderParser<AbstractSequence<C>, C> headerParser;
056        private String header;
057        private Map<String, List<AbstractFeature<AbstractSequence<C>, C>>> features;
058
059
060        /**
061         *
062         * @throws InterruptedException
063         * @throws IOException
064         * @throws CompoundNotFoundException
065         */
066        public GenbankProxySequenceReader(
067                        String genbankDirectoryCache,
068                        String accessionID,
069                        CompoundSet<C> compoundSet ) throws IOException, InterruptedException, CompoundNotFoundException {
070
071                setGenbankDirectoryCache(genbankDirectoryCache);
072                setCompoundSet(compoundSet);
073
074                String db = compoundSet instanceof AminoAcidCompoundSet ? "protein" : "nuccore";
075
076                InputStream inStream = getBufferedInputStream(accessionID, db);
077                genbankParser = new GenbankSequenceParser<AbstractSequence<C>, C>();
078
079                setContents(genbankParser.getSequence(new BufferedReader(new InputStreamReader(inStream)), 0));
080                headerParser = genbankParser.getSequenceHeaderParser();
081                header = genbankParser.getHeader();
082                features = genbankParser.getFeatures();
083
084                if (compoundSet.getClass().equals(AminoAcidCompoundSet.class)) {
085                        if (!genbankParser.getCompoundType().equals(compoundSet)) {
086                                logger.error("Declared compount type {} does not mach the real: {}", genbankParser.getCompoundType().toString(), compoundSet.toString());
087                                throw new IOException("Wrong declared compound type for: " + accessionID);
088                        }
089                }
090
091                inStream.close();
092        }
093
094        private BufferedInputStream getBufferedInputStream(String accessionID, String db) throws IOException, InterruptedException {
095                BufferedInputStream inStream = null;
096                if (genbankDirectoryCache != null && genbankDirectoryCache.length() > 0) {
097                        File f = new File(genbankDirectoryCache + File.separatorChar + accessionID + ".gb");
098                        if (f.exists()) {
099                                logger.debug("Reading: {}", f.toString());
100                                inStream = new BufferedInputStream(new FileInputStream(f));
101                        } else {
102                                InputStream in = getEutilsInputStream(accessionID, db);
103                                copyInputStreamToFile(in, f);
104                                inStream = new BufferedInputStream(new FileInputStream(f));
105                        }
106                } else {
107                        inStream = new BufferedInputStream(getEutilsInputStream(accessionID, db));
108                }
109                return inStream;
110        }
111
112        private void copyInputStreamToFile(InputStream in, File f) throws IOException, InterruptedException {
113                try (FileOutputStream out = new FileOutputStream(f)) {
114                        byte[] buffer = new byte[1024];
115                        int len = in.read(buffer);
116                        while (len != -1) {
117                                out.write(buffer, 0, len);
118                                len = in.read(buffer);
119                                if (Thread.interrupted()) {
120                                        in.close();
121                                        out.close();
122                                        throw new InterruptedException();
123                                }
124                        }
125                        in.close();
126                }
127        }
128
129        private InputStream getEutilsInputStream(String accessionID, String db) throws IOException {
130                String genbankURL = eutilBaseURL + "efetch.fcgi?db=" + db + "&id=" + accessionID + "&rettype=gb&retmode=text";
131                logger.trace("Loading: {}", genbankURL);
132                URL genbank = new URL(genbankURL);
133                URLConnection genbankConnection = genbank.openConnection();
134                return genbankConnection.getInputStream();
135        }
136
137        /**
138         * Local directory cache of Genbank that can be downloaded
139         *
140         * @return the uniprotDirectoryCache
141         */
142        public String getGenbankDirectoryCache() {
143                return genbankDirectoryCache;
144        }
145
146        /**
147         * @param genbankDirectoryCache
148         */
149        public void setGenbankDirectoryCache(String genbankDirectoryCache) {
150                if (genbankDirectoryCache != null) {
151                        File f = new File(genbankDirectoryCache);
152                        if (!f.exists()) {
153                                f.mkdirs();
154                        }
155                }
156                this.genbankDirectoryCache = genbankDirectoryCache;
157        }
158
159        public String getHeader() {
160                return header;
161        }
162
163        public GenericGenbankHeaderParser<AbstractSequence<C>, C> getHeaderParser() {
164                return headerParser;
165        }
166        @Override
167        public Map<String, List<AbstractFeature<AbstractSequence<C>, C>>> getFeatures() {
168                return features;
169        }
170
171        @Override
172        public Map<String, List<DBReferenceInfo>> getDatabaseReferences() {
173                return genbankParser.getDatabaseReferences();
174        }
175
176        @Override
177        public List<String> getKeyWords() {
178                return genbankParser.getKeyWords();
179        }
180        
181}