001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.search.io; 022 023import java.io.File; 024import java.io.IOException; 025import java.text.ParseException; 026import java.util.HashMap; 027import java.util.Iterator; 028import java.util.List; 029import java.util.ServiceLoader; 030 031/** 032 * Designed by Paolo Pavan. 033 * You may want to find my contacts on Github and LinkedIn for code info 034 * or discuss major changes. 035 * https://github.com/paolopavan 036 * 037 * @author Paolo Pavan 038 */ 039 040public class SearchIO implements Iterable<Result>{ 041 static private HashMap<String,ResultFactory> extensionFactoryAssociation; 042 043 final private ResultFactory factory; 044 final private File file; 045 046 /** 047 * this threshold applies in retrieving hsp. Those having e-value below this 048 * will not be loaded. 049 */ 050 private double evalueThreshold = Double.MAX_VALUE; 051 /** 052 * contains one object per query sequence describing search results. 053 * Sometime also referred as Iterations. 054 */ 055 private List<Result> results; 056 057 private final String NOT_SUPPORTED_FILE_EXCEPTION = 058 "This extension is not associated with any parser. You can try to specify a ResultFactory object."; 059 060 /** 061 * Build a SearchIO reader and tries to select the appropriate parser inspecting 062 * file extension. 063 * 064 * @param f 065 * @throws Exception 066 */ 067 public SearchIO (File f) throws IOException, ParseException{ 068 factory = guessFactory(f); 069 file = f; 070 if (file.exists()) readResults(); 071 } 072 073 /** 074 * Build a SearchIO reader and specify a ResultFactory object to be used 075 * for parsing 076 * 077 * @param f 078 * @param factory 079 * 080 * @throws java.io.IOException for file access related issues 081 * @throws java.text.ParseException for file format related issues 082 */ 083 public SearchIO (File f, ResultFactory factory) throws IOException, ParseException{ 084 file = f; 085 this.factory = factory; 086 if (file.exists()) readResults(); 087 } 088 /** 089 * Build a SearchIO reader, specify a ResultFactory object to be used for parsing 090 * and filter hsp retrieved by a e-value threshold. 091 * This usually increase parsing speed 092 * @param f 093 * @param factory 094 * @param maxEvalue 095 * 096 * @throws java.io.IOException for file access related issues 097 * @throws java.text.ParseException for file format related issues 098 */ 099 public SearchIO(File f, ResultFactory factory, double maxEvalue) throws IOException, ParseException{ 100 file = f; 101 this.factory = factory; 102 this.evalueThreshold = maxEvalue; 103 if (file.exists()) readResults(); 104 } 105 106 /** 107 * This method is declared private because it is the default action of constructor 108 * when file exists 109 * 110 * @throws java.io.IOException for file access related issues 111 * @throws java.text.ParseException for file format related issues 112 */ 113 private void readResults() throws IOException, ParseException { 114 factory.setFile(file); 115 results = factory.createObjects(evalueThreshold); 116 } 117 118 /** 119 * used to write a search report using the guessed or specified factory 120 * 121 * @throws java.io.IOException for file access related issues 122 * @throws java.text.ParseException for file format related issues 123 */ 124 public void writeResults() throws IOException, ParseException { 125 factory.setFile(file); 126 factory.createObjects(evalueThreshold); 127 } 128 129 /** 130 * Guess factory class to be used using file extension. 131 * It can be used both for read and for in write. 132 * To be ResultFactory classes automatically available to this subsystem 133 * they must be listed in the file org.biojava.nbio.core.search.io.ResultFactory 134 * located in src/main/resources 135 * 136 * @param f: file. Its last extension (text after last dot) will be compared 137 * to default extensions of known ResultFactory implementing classes 138 * @return the guessed factory 139 */ 140 private ResultFactory guessFactory(File f){ 141 if (extensionFactoryAssociation == null){ 142 extensionFactoryAssociation = new HashMap<String, ResultFactory>(); 143 ServiceLoader<ResultFactory> impl = ServiceLoader.load(ResultFactory.class); 144 for (ResultFactory loadedImpl : impl) { 145 List<String> fileExtensions = loadedImpl.getFileExtensions(); 146 for (String ext: fileExtensions) extensionFactoryAssociation.put(ext, loadedImpl); 147 } 148 } 149 150 String filename = f.getAbsolutePath(); 151 int extensionPos = filename.lastIndexOf("."); 152 String extension = filename.substring(extensionPos + 1); 153 if (extensionFactoryAssociation.get(extension) == null) 154 throw new UnsupportedOperationException(NOT_SUPPORTED_FILE_EXCEPTION 155 + "\nExtension:"+ extension); 156 157 return extensionFactoryAssociation.get(extension); 158 } 159 160 public double getEvalueThreshold() { 161 return evalueThreshold; 162 } 163 164 @Override 165 public Iterator<Result> iterator() { 166 return new Iterator<Result>() { 167 int currentResult = 0; 168 @Override 169 public boolean hasNext() { 170 return currentResult < results.size(); 171 } 172 173 @Override 174 public Result next() { 175 return results.get(currentResult++); 176 } 177 178 @Override 179 public void remove() { 180 throw new UnsupportedOperationException("The remove operation is not supported by this iterator"); 181 } 182 }; 183 } 184}