001/**
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the terms of the GNU
005 * Lesser General Public Licence. This should be distributed with the code. If
006 * you do not have a copy, see:
007 *
008 * http://www.gnu.org/copyleft/lesser.html
009 *
010 * Copyright for this code is held jointly by the individual authors. These
011 * should be listed in @author doc comments.
012 *
013 * For more information on the BioJava project and its aims, or to join the
014 * biojava-l mailing list, visit the home page at:
015 *
016 * http://www.biojava.org/
017 *
018 * Created on Feb 23, 2012 Created by Andreas Prlic
019 *
020 * @since 3.0.2
021 */
022package org.biojava.nbio.core.util;
023
024import java.io.File;
025import java.io.FileNotFoundException;
026import java.io.FileOutputStream;
027import java.io.FilenameFilter;
028import java.io.IOException;
029import java.io.InputStream;
030import java.io.PrintStream;
031import java.net.HttpURLConnection;
032import java.net.SocketTimeoutException;
033import java.net.URL;
034import java.net.URLConnection;
035import java.nio.channels.Channels;
036import java.nio.channels.ReadableByteChannel;
037import java.nio.file.*;
038import java.nio.file.attribute.BasicFileAttributes;
039import java.util.Scanner;
040
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043
044public class FileDownloadUtils {
045
046        private static final String SIZE_EXT = ".size";
047        private static final String HASH_EXT = ".hash";
048        private static final Logger logger = LoggerFactory.getLogger(FileDownloadUtils.class);
049
050        public enum Hash{
051                MD5, SHA1, SHA256, UNKNOWN
052        }
053
054        /**
055         * Gets the file extension of a file, excluding '.'.
056         * If the file name has no extension the file name is returned.
057         * @param f a File
058         * @return The extension
059         */
060        public static String getFileExtension(File f) {
061                String fileName = f.getName();
062                String ext = "";
063                int mid = fileName.lastIndexOf(".");
064                ext = fileName.substring(mid + 1);
065                return ext;
066        }
067
068        /**
069         * Gets the file name up to and excluding the first
070         * '.' character. If there is no extension, the full filename
071         * is returned.
072         * @param f A file
073         * @return A possibly empty but non-null String.
074         */
075        public static String getFilePrefix(File f) {
076                String fileName = f.getName();
077                int mid = fileName.indexOf(".");
078                if (mid < 0) {
079                        return fileName;
080                }
081                return fileName.substring(0, mid);
082        }
083
084        /**
085         * Download the content provided at URL url and store the result to a local
086         * file, using a temp file to cache the content in case something goes wrong
087         * in download. A timeout of 60 seconds is hard-coded and 10 retries are attempted.
088         *
089         * @param url
090         * @param destination
091         * @throws IOException
092         */
093        public static void downloadFile(URL url, File destination) throws IOException {
094                int count = 0;
095                int maxTries = 10;
096                int timeout = 60000; //60 sec
097
098                File tempFile = Files.createTempFile(getFilePrefix(destination), "." + getFileExtension(destination)).toFile();
099
100                // Took following recipe from stackoverflow:
101                // http://stackoverflow.com/questions/921262/how-to-download-and-save-a-file-from-internet-using-java
102                // It seems to be the most efficient way to transfer a file
103                // See: http://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.html
104                ReadableByteChannel rbc = null;
105                FileOutputStream fos = null;
106                while (true) {
107                        try {
108                                URLConnection connection = prepareURLConnection(url.toString(), timeout);
109                                connection.connect();
110                                InputStream inputStream = connection.getInputStream();
111
112                                rbc = Channels.newChannel(inputStream);
113                                fos = new FileOutputStream(tempFile);
114                                fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
115                                break;
116                        } catch (SocketTimeoutException e) {
117                                if (++count == maxTries) throw e;
118                        } finally {
119                                if (rbc != null) {
120                                        rbc.close();
121                                }
122                                if (fos != null) {
123                                        fos.close();
124                                }
125                        }
126                }
127
128                logger.debug("Copying temp file [{}] to final location [{}]", tempFile, destination);
129                Files.copy(tempFile.toPath(), destination.toPath(), StandardCopyOption.REPLACE_EXISTING);
130
131                // delete the tmp file
132                tempFile.delete();
133
134        }
135        
136        /**
137         * Creates validation files beside a file to be downloaded.<br>
138         * Whenever possible, for a <code>file.ext</code> file, it creates 
139         * <code>file.ext.size</code> and <code>file.hash</code> for in the same 
140         * folder where <code>file.ext</code> exists.
141         * If the file connection size could not be deduced from the URL, no size file is created. 
142         * If <code>hashURL</code> is <code>null</code>, no hash file is created.
143         * @param url the remote file URL to download
144         * @param localDestination the local file to download into
145         * @param hashURL the URL of the hash file to download. Can be <code>null</code>.
146         * @param hash The Hashing algorithm. Ignored if <code>hashURL</code> is <code>null</code>.
147         */
148        public static void createValidationFiles(URL url, File localDestination, URL hashURL, Hash hash){
149                try {
150                        URLConnection resourceConnection = url.openConnection();
151                        createValidationFiles(resourceConnection, localDestination, hashURL, FileDownloadUtils.Hash.UNKNOWN);
152                } catch (IOException e) {
153                        logger.warn("could not open connection to resource file due to exception: {}", e.getMessage());
154                }
155        }
156        /**
157         * Creates validation files beside a file to be downloaded.<br>
158         * Whenever possible, for a <code>file.ext</code> file, it creates 
159         * <code>file.ext.size</code> and <code>file.hash_XXXX</code> in the same 
160         * folder where <code>file.ext</code> exists (XXXX may be DM5, SHA1, or SHA256).
161         * If the file connection size could not be deduced from the resourceUrlConnection 
162         * {@link URLConnection}, no size file is created. 
163         * If <code>hashURL</code> is <code>null</code>, no hash file is created.<br>
164         * <b>N.B.</b> None of the hashing algorithms is implemented (yet), because we did not need any of them yet.
165         * @param resourceUrlConnection the remote file URLConnection to download
166         * @param localDestination the local file to download into
167         * @param hashURL the URL of the hash file to download. Can be <code>null</code>.
168         * @param hash The Hashing algorithm. Ignored if <code>hashURL</code> is <code>null</code>.
169         * @since 7.0.0
170         */
171        public static void createValidationFiles(URLConnection resourceUrlConnection, File localDestination, URL hashURL, Hash hash){
172                long size = resourceUrlConnection.getContentLengthLong();
173                if(size == -1) {
174                        logger.debug("Could not find expected file size for resource {}. Size validation metadata file won't be available for this download.", resourceUrlConnection.getURL());
175                } else {
176                        logger.debug("Content-Length: {}", size);
177                        File sizeFile = new File(localDestination.getParentFile(), localDestination.getName() + SIZE_EXT);
178                        try (PrintStream sizePrintStream = new PrintStream(sizeFile)) {
179                                sizePrintStream.print(size);
180                        } catch (FileNotFoundException e) {
181                                logger.warn("Could not write size validation metadata file due to exception: {}", e.getMessage());
182                        }
183                }
184                
185                if(hashURL == null)
186                        return;
187
188                if(hash == Hash.UNKNOWN)
189                        throw new IllegalArgumentException("Hash URL given but algorithm is unknown");
190                try {
191                        File hashFile = new File(localDestination.getParentFile(), String.format("%s%s_%s", localDestination.getName(), HASH_EXT, hash));
192                        downloadFile(hashURL, hashFile);
193                } catch (IOException e) {
194                        logger.warn("Could not write validation hash file due to exception: {}", e.getMessage());
195                }
196        }
197        
198        /**
199         * Validate a local file based on pre-existing metadata files for size and hash.<br>
200         * If the passed in <code>localFile</code> parameter is a file named <code>file.ext</code>, the function searches in the same folder for:
201         * <ul>
202         * <li><code>file.ext.size</code>: If found, it compares the size stored in it to the length of <code>localFile</code> (in bytes).</li>
203         * <li><code>file.ext.hash_XXXX (where XXXX is DM5, SHA1, or SHA256)</code>: If found, it compares the size stored in it to the hash code of <code>localFile</code>.</li>
204         * </ul>
205         * If any of these comparisons fail, the function returns <code>false</code>. otherwise it returns true.
206         * <p>
207         * <b>N.B.</b> None of the 3 common verification hashing algorithms are implement yet.
208         * @param localFile The file to validate
209         * @return <code>false</code> if any of the size or hash code metadata files exists but its contents does not match the expected value in the file, <code>true</code> otherwise.
210         * @since 7.0.0
211         */
212        public static boolean validateFile(File localFile) {
213                File sizeFile = new File(localFile.getParentFile(), localFile.getName() + SIZE_EXT);
214                if(sizeFile.exists()) {
215            try (Scanner scanner = new Scanner(sizeFile)) {
216                long expectedSize = scanner.nextLong();
217                long actualSize = localFile.length();
218                if (expectedSize != actualSize) {
219                    logger.warn("File [{}] size ({}) does not match expected size ({}).", localFile, actualSize, expectedSize);
220                    return false;
221                }
222            } catch (FileNotFoundException e) {
223                logger.warn("could not validate size of file [{}] because no size metadata file exists.", localFile);
224            }
225                }
226
227                File[] hashFiles = localFile.getParentFile().listFiles(new FilenameFilter() {
228                        final String hashPattern = String.format("%s%s_(%s|%s|%s)", localFile.getName(), HASH_EXT, Hash.MD5, Hash.SHA1, Hash.SHA256);
229                        @Override
230                        public boolean accept(File dir, String name) {
231                                return name.matches(hashPattern);
232                        }
233                });
234                if(hashFiles.length > 0) {
235                        File hashFile = hashFiles[0];
236                        String name = hashFile.getName();
237                        String algo = name.substring(name.lastIndexOf('_') + 1);
238                        switch (Hash.valueOf(algo)) {
239                        case MD5:
240                        case SHA1:
241                        case SHA256:
242                                throw new UnsupportedOperationException("Not yet implemented");
243                        case UNKNOWN:
244                        default: // No need. Already checked above
245                                throw new IllegalArgumentException("Hashing algorithm not known: " + algo);
246                        }
247                }
248                
249                return true;
250        }
251
252        /**
253         * Converts path to Unix convention and adds a terminating slash if it was
254         * omitted. 
255         *
256         * @param path original platform dependent path
257         * @return path in Unix convention
258         * @author Peter Rose
259         * @since 3.2
260         */
261        public static String toUnixPath(String path) {
262                String uPath = path;
263                if (uPath.contains("\\")) {
264                        uPath = uPath.replaceAll("\\\\", "/");
265                }
266                // this should be removed, it's need since "\" is added AtomCache code
267                if (uPath.endsWith("//")) {
268                        uPath = uPath.substring(0, uPath.length() - 1);
269                }
270                if (!uPath.endsWith("/")) {
271                        uPath = uPath + "/";
272                }
273                return uPath;
274        }
275
276        /**
277         * Expands ~ in paths to the user's home directory.
278         *
279         * <p>
280         * This does not work for some special cases for paths: Other users' homes
281         * (~user/...), and Tilde expansion within the path (/.../~/...). In these cases
282         *  the original argument is returned.
283         *
284         * @param file A filepath starting with a tilde
285         * @return An absolute path
286         */
287        public static String expandUserHome(String file) {
288                // replace any / with the proper separator (/ or \ for Linux and Windows respectively).
289                file = file.replaceAll("/", "\\"+File.separator); //The "\\" is to escape the separator if needed.
290                if (file.startsWith("~") && (file.length() == 1 || File.separator.equals(file.substring(1, 2)))) {
291                        file = System.getProperty("user.home") + file.substring(1);
292                }
293                return file;
294        }
295
296        /**
297         * Pings a HTTP URL. This effectively sends a HEAD request and returns
298         * <code>true</code> if the response code is in the 200-399 range.
299         *
300         * @param url The HTTP URL to be pinged.
301         * @param timeout The timeout in millis for both the connection timeout and
302         * the response read timeout. Note that the total timeout is effectively two
303         * times the given timeout.
304         * @return <code>true</code> if the given HTTP URL has returned response
305         * code 200-399 on a HEAD request within the given timeout, otherwise
306         * <code>false</code>.
307         * @author BalusC,
308         * http://stackoverflow.com/questions/3584210/preferred-java-way-to-ping-a-http-url-for-availability
309         */
310        public static boolean ping(String url, int timeout) {
311                //url = url.replaceFirst("https", "http"); // Otherwise an exception may be thrown on invalid SSL certificates.
312
313                try {
314                        HttpURLConnection connection = (HttpURLConnection) prepareURLConnection(url, timeout);
315                        connection.setRequestMethod("HEAD");
316                        int responseCode = connection.getResponseCode();
317                        return (200 <= responseCode && responseCode <= 399);
318                } catch (IOException exception) {
319                        return false;
320                }
321        }
322
323        /**
324         * Prepare {@link URLConnection} with customised timeouts.
325         *
326         * @param url The URL
327         * @param timeout The timeout in millis for both the connection timeout and
328         * the response read timeout. Note that the total timeout is effectively two
329         * times the given timeout.
330         *
331         * <p>
332         * Example of code.      <code>
333                 * UrlConnection conn = prepareURLConnection("http://www.google.com/", 20000);
334         * conn.connect();
335         * conn.getInputStream();
336         * </code>
337         * <p>
338         *
339         * <strong>NB. User should execute connect() method before getting input
340         * stream.</strong>
341         * @return
342         * @throws IOException
343         * @author Jacek Grzebyta
344         */
345        public static URLConnection prepareURLConnection(String url, int timeout) throws IOException {
346                URLConnection connection = new URL(url).openConnection();
347                connection.setReadTimeout(timeout);
348                connection.setConnectTimeout(timeout);
349                return connection;
350        }
351
352        /**
353         * Recursively delete a folder &amp; contents
354         *
355         * @param dir directory to delete
356         */
357        public static void deleteDirectory(Path dir) throws IOException {
358                if(dir == null || !Files.exists(dir))
359                        return;
360                Files.walkFileTree(dir, new SimpleFileVisitor<>() {
361                @Override
362                public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
363                    Files.delete(file);
364                    return FileVisitResult.CONTINUE;
365                }
366
367                @Override
368                public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException {
369                    if (e != null) {
370                        throw e;
371                    }
372                    Files.delete(dir);
373                    return FileVisitResult.CONTINUE;
374                }
375            });
376        }
377        /**
378         * Recursively delete a folder &amp; contents
379         *
380         * @param dir directory to delete
381         */
382        public static void deleteDirectory(String dir) throws IOException {
383                deleteDirectory(Paths.get(dir));
384        }
385
386}