001/** 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the terms of the GNU 005 * Lesser General Public Licence. This should be distributed with the code. If 006 * you do not have a copy, see: 007 * 008 * http://www.gnu.org/copyleft/lesser.html 009 * 010 * Copyright for this code is held jointly by the individual authors. These 011 * should be listed in @author doc comments. 012 * 013 * For more information on the BioJava project and its aims, or to join the 014 * biojava-l mailing list, visit the home page at: 015 * 016 * http://www.biojava.org/ 017 * 018 * Created on Feb 23, 2012 Created by Andreas Prlic 019 * 020 * @since 3.0.2 021 */ 022package org.biojava.nbio.core.util; 023 024import java.io.File; 025import java.io.FileNotFoundException; 026import java.io.FileOutputStream; 027import java.io.FilenameFilter; 028import java.io.IOException; 029import java.io.InputStream; 030import java.io.PrintStream; 031import java.net.HttpURLConnection; 032import java.net.SocketTimeoutException; 033import java.net.URL; 034import java.net.URLConnection; 035import java.nio.channels.Channels; 036import java.nio.channels.ReadableByteChannel; 037import java.nio.file.*; 038import java.nio.file.attribute.BasicFileAttributes; 039import java.util.Scanner; 040 041import org.slf4j.Logger; 042import org.slf4j.LoggerFactory; 043 044public class FileDownloadUtils { 045 046 private static final String SIZE_EXT = ".size"; 047 private static final String HASH_EXT = ".hash"; 048 private static final Logger logger = LoggerFactory.getLogger(FileDownloadUtils.class); 049 050 public enum Hash{ 051 MD5, SHA1, SHA256, UNKNOWN 052 } 053 054 /** 055 * Gets the file extension of a file, excluding '.'. 056 * If the file name has no extension the file name is returned. 057 * @param f a File 058 * @return The extension 059 */ 060 public static String getFileExtension(File f) { 061 String fileName = f.getName(); 062 String ext = ""; 063 int mid = fileName.lastIndexOf("."); 064 ext = fileName.substring(mid + 1); 065 return ext; 066 } 067 068 /** 069 * Gets the file name up to and excluding the first 070 * '.' character. If there is no extension, the full filename 071 * is returned. 072 * @param f A file 073 * @return A possibly empty but non-null String. 074 */ 075 public static String getFilePrefix(File f) { 076 String fileName = f.getName(); 077 int mid = fileName.indexOf("."); 078 if (mid < 0) { 079 return fileName; 080 } 081 return fileName.substring(0, mid); 082 } 083 084 /** 085 * Download the content provided at URL url and store the result to a local 086 * file, using a temp file to cache the content in case something goes wrong 087 * in download. A timeout of 60 seconds is hard-coded and 10 retries are attempted. 088 * 089 * @param url 090 * @param destination 091 * @throws IOException 092 */ 093 public static void downloadFile(URL url, File destination) throws IOException { 094 int count = 0; 095 int maxTries = 10; 096 int timeout = 60000; //60 sec 097 098 File tempFile = Files.createTempFile(getFilePrefix(destination), "." + getFileExtension(destination)).toFile(); 099 100 // Took following recipe from stackoverflow: 101 // http://stackoverflow.com/questions/921262/how-to-download-and-save-a-file-from-internet-using-java 102 // It seems to be the most efficient way to transfer a file 103 // See: http://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.html 104 ReadableByteChannel rbc = null; 105 FileOutputStream fos = null; 106 while (true) { 107 try { 108 URLConnection connection = prepareURLConnection(url.toString(), timeout); 109 connection.connect(); 110 InputStream inputStream = connection.getInputStream(); 111 112 rbc = Channels.newChannel(inputStream); 113 fos = new FileOutputStream(tempFile); 114 fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); 115 break; 116 } catch (SocketTimeoutException e) { 117 if (++count == maxTries) throw e; 118 } finally { 119 if (rbc != null) { 120 rbc.close(); 121 } 122 if (fos != null) { 123 fos.close(); 124 } 125 } 126 } 127 128 logger.debug("Copying temp file [{}] to final location [{}]", tempFile, destination); 129 Files.copy(tempFile.toPath(), destination.toPath(), StandardCopyOption.REPLACE_EXISTING); 130 131 // delete the tmp file 132 tempFile.delete(); 133 134 } 135 136 /** 137 * Creates validation files beside a file to be downloaded.<br> 138 * Whenever possible, for a <code>file.ext</code> file, it creates 139 * <code>file.ext.size</code> and <code>file.hash</code> for in the same 140 * folder where <code>file.ext</code> exists. 141 * If the file connection size could not be deduced from the URL, no size file is created. 142 * If <code>hashURL</code> is <code>null</code>, no hash file is created. 143 * @param url the remote file URL to download 144 * @param localDestination the local file to download into 145 * @param hashURL the URL of the hash file to download. Can be <code>null</code>. 146 * @param hash The Hashing algorithm. Ignored if <code>hashURL</code> is <code>null</code>. 147 */ 148 public static void createValidationFiles(URL url, File localDestination, URL hashURL, Hash hash){ 149 try { 150 URLConnection resourceConnection = url.openConnection(); 151 createValidationFiles(resourceConnection, localDestination, hashURL, FileDownloadUtils.Hash.UNKNOWN); 152 } catch (IOException e) { 153 logger.warn("could not open connection to resource file due to exception: {}", e.getMessage()); 154 } 155 } 156 /** 157 * Creates validation files beside a file to be downloaded.<br> 158 * Whenever possible, for a <code>file.ext</code> file, it creates 159 * <code>file.ext.size</code> and <code>file.hash_XXXX</code> in the same 160 * folder where <code>file.ext</code> exists (XXXX may be DM5, SHA1, or SHA256). 161 * If the file connection size could not be deduced from the resourceUrlConnection 162 * {@link URLConnection}, no size file is created. 163 * If <code>hashURL</code> is <code>null</code>, no hash file is created.<br> 164 * <b>N.B.</b> None of the hashing algorithms is implemented (yet), because we did not need any of them yet. 165 * @param resourceUrlConnection the remote file URLConnection to download 166 * @param localDestination the local file to download into 167 * @param hashURL the URL of the hash file to download. Can be <code>null</code>. 168 * @param hash The Hashing algorithm. Ignored if <code>hashURL</code> is <code>null</code>. 169 * @since 7.0.0 170 */ 171 public static void createValidationFiles(URLConnection resourceUrlConnection, File localDestination, URL hashURL, Hash hash){ 172 long size = resourceUrlConnection.getContentLengthLong(); 173 if(size == -1) { 174 logger.debug("Could not find expected file size for resource {}. Size validation metadata file won't be available for this download.", resourceUrlConnection.getURL()); 175 } else { 176 logger.debug("Content-Length: {}", size); 177 File sizeFile = new File(localDestination.getParentFile(), localDestination.getName() + SIZE_EXT); 178 try (PrintStream sizePrintStream = new PrintStream(sizeFile)) { 179 sizePrintStream.print(size); 180 } catch (FileNotFoundException e) { 181 logger.warn("Could not write size validation metadata file due to exception: {}", e.getMessage()); 182 } 183 } 184 185 if(hashURL == null) 186 return; 187 188 if(hash == Hash.UNKNOWN) 189 throw new IllegalArgumentException("Hash URL given but algorithm is unknown"); 190 try { 191 File hashFile = new File(localDestination.getParentFile(), String.format("%s%s_%s", localDestination.getName(), HASH_EXT, hash)); 192 downloadFile(hashURL, hashFile); 193 } catch (IOException e) { 194 logger.warn("Could not write validation hash file due to exception: {}", e.getMessage()); 195 } 196 } 197 198 /** 199 * Validate a local file based on pre-existing metadata files for size and hash.<br> 200 * If the passed in <code>localFile</code> parameter is a file named <code>file.ext</code>, the function searches in the same folder for: 201 * <ul> 202 * <li><code>file.ext.size</code>: If found, it compares the size stored in it to the length of <code>localFile</code> (in bytes).</li> 203 * <li><code>file.ext.hash_XXXX (where XXXX is DM5, SHA1, or SHA256)</code>: If found, it compares the size stored in it to the hash code of <code>localFile</code>.</li> 204 * </ul> 205 * If any of these comparisons fail, the function returns <code>false</code>. otherwise it returns true. 206 * <p> 207 * <b>N.B.</b> None of the 3 common verification hashing algorithms are implement yet. 208 * @param localFile The file to validate 209 * @return <code>false</code> if any of the size or hash code metadata files exists but its contents does not match the expected value in the file, <code>true</code> otherwise. 210 * @since 7.0.0 211 */ 212 public static boolean validateFile(File localFile) { 213 File sizeFile = new File(localFile.getParentFile(), localFile.getName() + SIZE_EXT); 214 if(sizeFile.exists()) { 215 try (Scanner scanner = new Scanner(sizeFile)) { 216 long expectedSize = scanner.nextLong(); 217 long actualSize = localFile.length(); 218 if (expectedSize != actualSize) { 219 logger.warn("File [{}] size ({}) does not match expected size ({}).", localFile, actualSize, expectedSize); 220 return false; 221 } 222 } catch (FileNotFoundException e) { 223 logger.warn("could not validate size of file [{}] because no size metadata file exists.", localFile); 224 } 225 } 226 227 File[] hashFiles = localFile.getParentFile().listFiles(new FilenameFilter() { 228 final String hashPattern = String.format("%s%s_(%s|%s|%s)", localFile.getName(), HASH_EXT, Hash.MD5, Hash.SHA1, Hash.SHA256); 229 @Override 230 public boolean accept(File dir, String name) { 231 return name.matches(hashPattern); 232 } 233 }); 234 if(hashFiles.length > 0) { 235 File hashFile = hashFiles[0]; 236 String name = hashFile.getName(); 237 String algo = name.substring(name.lastIndexOf('_') + 1); 238 switch (Hash.valueOf(algo)) { 239 case MD5: 240 case SHA1: 241 case SHA256: 242 throw new UnsupportedOperationException("Not yet implemented"); 243 case UNKNOWN: 244 default: // No need. Already checked above 245 throw new IllegalArgumentException("Hashing algorithm not known: " + algo); 246 } 247 } 248 249 return true; 250 } 251 252 /** 253 * Converts path to Unix convention and adds a terminating slash if it was 254 * omitted. 255 * 256 * @param path original platform dependent path 257 * @return path in Unix convention 258 * @author Peter Rose 259 * @since 3.2 260 */ 261 public static String toUnixPath(String path) { 262 String uPath = path; 263 if (uPath.contains("\\")) { 264 uPath = uPath.replaceAll("\\\\", "/"); 265 } 266 // this should be removed, it's need since "\" is added AtomCache code 267 if (uPath.endsWith("//")) { 268 uPath = uPath.substring(0, uPath.length() - 1); 269 } 270 if (!uPath.endsWith("/")) { 271 uPath = uPath + "/"; 272 } 273 return uPath; 274 } 275 276 /** 277 * Expands ~ in paths to the user's home directory. 278 * 279 * <p> 280 * This does not work for some special cases for paths: Other users' homes 281 * (~user/...), and Tilde expansion within the path (/.../~/...). In these cases 282 * the original argument is returned. 283 * 284 * @param file A filepath starting with a tilde 285 * @return An absolute path 286 */ 287 public static String expandUserHome(String file) { 288 // replace any / with the proper separator (/ or \ for Linux and Windows respectively). 289 file = file.replaceAll("/", "\\"+File.separator); //The "\\" is to escape the separator if needed. 290 if (file.startsWith("~") && (file.length() == 1 || File.separator.equals(file.substring(1, 2)))) { 291 file = System.getProperty("user.home") + file.substring(1); 292 } 293 return file; 294 } 295 296 /** 297 * Pings a HTTP URL. This effectively sends a HEAD request and returns 298 * <code>true</code> if the response code is in the 200-399 range. 299 * 300 * @param url The HTTP URL to be pinged. 301 * @param timeout The timeout in millis for both the connection timeout and 302 * the response read timeout. Note that the total timeout is effectively two 303 * times the given timeout. 304 * @return <code>true</code> if the given HTTP URL has returned response 305 * code 200-399 on a HEAD request within the given timeout, otherwise 306 * <code>false</code>. 307 * @author BalusC, 308 * http://stackoverflow.com/questions/3584210/preferred-java-way-to-ping-a-http-url-for-availability 309 */ 310 public static boolean ping(String url, int timeout) { 311 //url = url.replaceFirst("https", "http"); // Otherwise an exception may be thrown on invalid SSL certificates. 312 313 try { 314 HttpURLConnection connection = (HttpURLConnection) prepareURLConnection(url, timeout); 315 connection.setRequestMethod("HEAD"); 316 int responseCode = connection.getResponseCode(); 317 return (200 <= responseCode && responseCode <= 399); 318 } catch (IOException exception) { 319 return false; 320 } 321 } 322 323 /** 324 * Prepare {@link URLConnection} with customised timeouts. 325 * 326 * @param url The URL 327 * @param timeout The timeout in millis for both the connection timeout and 328 * the response read timeout. Note that the total timeout is effectively two 329 * times the given timeout. 330 * 331 * <p> 332 * Example of code. <code> 333 * UrlConnection conn = prepareURLConnection("http://www.google.com/", 20000); 334 * conn.connect(); 335 * conn.getInputStream(); 336 * </code> 337 * <p> 338 * 339 * <strong>NB. User should execute connect() method before getting input 340 * stream.</strong> 341 * @return 342 * @throws IOException 343 * @author Jacek Grzebyta 344 */ 345 public static URLConnection prepareURLConnection(String url, int timeout) throws IOException { 346 URLConnection connection = new URL(url).openConnection(); 347 connection.setReadTimeout(timeout); 348 connection.setConnectTimeout(timeout); 349 return connection; 350 } 351 352 /** 353 * Recursively delete a folder & contents 354 * 355 * @param dir directory to delete 356 */ 357 public static void deleteDirectory(Path dir) throws IOException { 358 if(dir == null || !Files.exists(dir)) 359 return; 360 Files.walkFileTree(dir, new SimpleFileVisitor<>() { 361 @Override 362 public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { 363 Files.delete(file); 364 return FileVisitResult.CONTINUE; 365 } 366 367 @Override 368 public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException { 369 if (e != null) { 370 throw e; 371 } 372 Files.delete(dir); 373 return FileVisitResult.CONTINUE; 374 } 375 }); 376 } 377 /** 378 * Recursively delete a folder & contents 379 * 380 * @param dir directory to delete 381 */ 382 public static void deleteDirectory(String dir) throws IOException { 383 deleteDirectory(Paths.get(dir)); 384 } 385 386}