001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.search.io; 022 023import java.io.File; 024import java.io.IOException; 025import java.text.ParseException; 026import java.util.HashMap; 027import java.util.Iterator; 028import java.util.List; 029import java.util.ServiceLoader; 030import java.util.NoSuchElementException; 031 032/** 033 * Designed by Paolo Pavan. 034 * You may want to find my contacts on Github and LinkedIn for code info 035 * or discuss major changes. 036 * https://github.com/paolopavan 037 * 038 * @author Paolo Pavan 039 */ 040 041public class SearchIO implements Iterable<Result>{ 042 static private HashMap<String,ResultFactory> extensionFactoryAssociation; 043 044 final private ResultFactory factory; 045 final private File file; 046 047 /** 048 * this threshold applies in retrieving hsp. Those having e-value below this 049 * will not be loaded. 050 */ 051 private double evalueThreshold = Double.MAX_VALUE; 052 /** 053 * contains one object per query sequence describing search results. 054 * Sometime also referred as Iterations. 055 */ 056 private List<Result> results; 057 058 private final String NOT_SUPPORTED_FILE_EXCEPTION = 059 "This extension is not associated with any parser. You can try to specify a ResultFactory object."; 060 061 /** 062 * Build a SearchIO reader and tries to select the appropriate parser inspecting 063 * file extension. 064 * 065 * @param f 066 * @throws Exception 067 */ 068 public SearchIO (File f) throws IOException, ParseException{ 069 factory = guessFactory(f); 070 file = f; 071 if (file.exists()) readResults(); 072 } 073 074 /** 075 * Build a SearchIO reader and specify a ResultFactory object to be used 076 * for parsing 077 * 078 * @param f 079 * @param factory 080 * 081 * @throws java.io.IOException for file access related issues 082 * @throws java.text.ParseException for file format related issues 083 */ 084 public SearchIO (File f, ResultFactory factory) throws IOException, ParseException{ 085 file = f; 086 this.factory = factory; 087 if (file.exists()) readResults(); 088 } 089 /** 090 * Build a SearchIO reader, specify a ResultFactory object to be used for parsing 091 * and filter hsp retrieved by a e-value threshold. 092 * This usually increase parsing speed 093 * @param f 094 * @param factory 095 * @param maxEvalue 096 * 097 * @throws java.io.IOException for file access related issues 098 * @throws java.text.ParseException for file format related issues 099 */ 100 public SearchIO(File f, ResultFactory factory, double maxEvalue) throws IOException, ParseException{ 101 file = f; 102 this.factory = factory; 103 this.evalueThreshold = maxEvalue; 104 if (file.exists()) readResults(); 105 } 106 107 /** 108 * This method is declared private because it is the default action of constructor 109 * when file exists 110 * 111 * @throws java.io.IOException for file access related issues 112 * @throws java.text.ParseException for file format related issues 113 */ 114 private void readResults() throws IOException, ParseException { 115 factory.setFile(file); 116 results = factory.createObjects(evalueThreshold); 117 } 118 119 /** 120 * used to write a search report using the guessed or specified factory 121 * 122 * @throws java.io.IOException for file access related issues 123 * @throws java.text.ParseException for file format related issues 124 */ 125 public void writeResults() throws IOException, ParseException { 126 factory.setFile(file); 127 factory.createObjects(evalueThreshold); 128 } 129 130 /** 131 * Guess factory class to be used using file extension. 132 * It can be used both for read and for in write. 133 * To be ResultFactory classes automatically available to this subsystem 134 * they must be listed in the file org.biojava.nbio.core.search.io.ResultFactory 135 * located in src/main/resources 136 * 137 * @param f: file. Its last extension (text after last dot) will be compared 138 * to default extensions of known ResultFactory implementing classes 139 * @return the guessed factory 140 */ 141 private ResultFactory guessFactory(File f){ 142 if (extensionFactoryAssociation == null){ 143 extensionFactoryAssociation = new HashMap<String, ResultFactory>(); 144 ServiceLoader<ResultFactory> impl = ServiceLoader.load(ResultFactory.class); 145 for (ResultFactory loadedImpl : impl) { 146 List<String> fileExtensions = loadedImpl.getFileExtensions(); 147 for (String ext: fileExtensions) extensionFactoryAssociation.put(ext, loadedImpl); 148 } 149 } 150 151 String filename = f.getAbsolutePath(); 152 int extensionPos = filename.lastIndexOf("."); 153 String extension = filename.substring(extensionPos + 1); 154 if (extensionFactoryAssociation.get(extension) == null) 155 throw new UnsupportedOperationException(NOT_SUPPORTED_FILE_EXCEPTION 156 + "\nExtension:"+ extension); 157 158 return extensionFactoryAssociation.get(extension); 159 } 160 161 public double getEvalueThreshold() { 162 return evalueThreshold; 163 } 164 165 @Override 166 public Iterator<Result> iterator() { 167 return new Iterator<Result>() { 168 int currentResult = 0; 169 @Override 170 public boolean hasNext() { 171 return currentResult < results.size(); 172 } 173 174 @Override 175 public Result next() { 176 if(!hasNext()){ 177 throw new NoSuchElementException(); 178 } 179 return results.get(currentResult++); 180 } 181 182 @Override 183 public void remove() { 184 throw new UnsupportedOperationException("The remove operation is not supported by this iterator"); 185 } 186 }; 187 } 188}