001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.core.search.io; 022 023import java.io.File; 024import java.io.IOException; 025import java.text.ParseException; 026import java.util.HashMap; 027import java.util.Iterator; 028import java.util.List; 029import java.util.ServiceLoader; 030import java.util.NoSuchElementException; 031import java.util.Map; 032 033/** 034 * Designed by Paolo Pavan. 035 * You may want to find my contacts on Github and LinkedIn for code info 036 * or discuss major changes. 037 * https://github.com/paolopavan 038 * 039 * @author Paolo Pavan 040 */ 041 042public class SearchIO implements Iterable<Result>{ 043 static private Map<String, ResultFactory> extensionFactoryAssociation; 044 045 final private ResultFactory factory; 046 final private File file; 047 048 /** 049 * this threshold applies in retrieving hsp. Those having e-value below this 050 * will not be loaded. 051 */ 052 private double evalueThreshold = Double.MAX_VALUE; 053 /** 054 * contains one object per query sequence describing search results. 055 * Sometime also referred as Iterations. 056 */ 057 private List<Result> results; 058 059 private final String NOT_SUPPORTED_FILE_EXCEPTION = 060 "This extension is not associated with any parser. You can try to specify a ResultFactory object."; 061 062 /** 063 * Build a SearchIO reader and tries to select the appropriate parser inspecting 064 * file extension. 065 * 066 * @param f 067 */ 068 public SearchIO (File f) throws IOException, ParseException{ 069 factory = guessFactory(f); 070 file = f; 071 if (file.exists()) readResults(); 072 } 073 074 /** 075 * Build a SearchIO reader and specify a ResultFactory object to be used 076 * for parsing 077 * 078 * @param f 079 * @param factory 080 * 081 * @throws java.io.IOException for file access related issues 082 * @throws java.text.ParseException for file format related issues 083 */ 084 public SearchIO (File f, ResultFactory factory) throws IOException, ParseException{ 085 file = f; 086 this.factory = factory; 087 if (file.exists()) readResults(); 088 } 089 /** 090 * Build a SearchIO reader, specify a ResultFactory object to be used for parsing 091 * and filter hsp retrieved by a e-value threshold. 092 * This usually increase parsing speed 093 * @param f 094 * @param factory 095 * @param maxEvalue 096 * 097 * @throws java.io.IOException for file access related issues 098 * @throws java.text.ParseException for file format related issues 099 */ 100 public SearchIO(File f, ResultFactory factory, double maxEvalue) throws IOException, ParseException{ 101 file = f; 102 this.factory = factory; 103 this.evalueThreshold = maxEvalue; 104 if (file.exists()) readResults(); 105 } 106 107 /** 108 * This method is declared private because it is the default action of constructor 109 * when file exists 110 * 111 * @throws java.io.IOException for file access related issues 112 * @throws java.text.ParseException for file format related issues 113 */ 114 private void readResults() throws IOException, ParseException { 115 factory.setFile(file); 116 results = factory.createObjects(evalueThreshold); 117 } 118 119 /** 120 * used to write a search report using the guessed or specified factory 121 * 122 * @throws java.io.IOException for file access related issues 123 * @throws java.text.ParseException for file format related issues 124 */ 125 public void writeResults() throws IOException, ParseException { 126 factory.setFile(file); 127 factory.createObjects(evalueThreshold); 128 } 129 130 /** 131 * Guess factory class to be used using file extension. 132 * It can be used both for read and for in write. 133 * To be ResultFactory classes automatically available to this subsystem 134 * they must be listed in the file org.biojava.nbio.core.search.io.ResultFactory 135 * located in src/main/resources 136 * 137 * @param f: file. Its last extension (text after last dot) will be compared 138 * to default extensions of known ResultFactory implementing classes 139 * @return the guessed factory 140 */ 141 private ResultFactory guessFactory(File f){ 142 if (extensionFactoryAssociation == null){ 143 extensionFactoryAssociation = new HashMap<>(); 144 ServiceLoader<ResultFactory> impl = ServiceLoader.load(ResultFactory.class); 145 for (ResultFactory loadedImpl : impl) { 146 List<String> fileExtensions = loadedImpl.getFileExtensions(); 147 for (String ext: fileExtensions) extensionFactoryAssociation.put(ext, loadedImpl); 148 } 149 } 150 151 String filename = f.getAbsolutePath(); 152 int extensionPos = filename.lastIndexOf("."); 153 String extension = filename.substring(extensionPos + 1); 154 if (extensionFactoryAssociation.get(extension) == null) 155 throw new UnsupportedOperationException(NOT_SUPPORTED_FILE_EXCEPTION 156 + "\nExtension:"+ extension); 157 158 return extensionFactoryAssociation.get(extension); 159 } 160 161 public double getEvalueThreshold() { 162 return evalueThreshold; 163 } 164 165 @Override 166 public Iterator<Result> iterator() { 167 return new Iterator<Result>() { 168 int currentResult = 0; 169 @Override 170 public boolean hasNext() { 171 return currentResult < results.size(); 172 } 173 174 @Override 175 public Result next() { 176 if(!hasNext()){ 177 throw new NoSuchElementException(); 178 } 179 return results.get(currentResult++); 180 } 181 182 @Override 183 public void remove() { 184 throw new UnsupportedOperationException("The remove operation is not supported by this iterator"); 185 } 186 }; 187 } 188}