001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 01-21-2010 021 */ 022package org.biojava.nbio.core.sequence.io.util; 023 024import org.biojava.nbio.core.exceptions.ParserException; 025import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet; 026import org.biojava.nbio.core.sequence.compound.AmbiguityRNACompoundSet; 027import org.biojava.nbio.core.sequence.compound.DNACompoundSet; 028import org.biojava.nbio.core.sequence.compound.RNACompoundSet; 029import org.biojava.nbio.core.sequence.template.Compound; 030import org.biojava.nbio.core.sequence.template.CompoundSet; 031import org.biojava.nbio.core.sequence.template.Sequence; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.io.*; 036import java.util.ArrayList; 037import java.util.List; 038import java.util.zip.GZIPInputStream; 039 040public class IOUtils { 041 042 private static final Logger logger = LoggerFactory.getLogger(IOUtils.class); 043 044 private static final int BUFFER = 4096; 045 046 /** 047 * Closes any Object which implements the interface {@link Closeable} and 048 * sending any error to the logger but not forcing any explicit catching of 049 * stream errors. 050 * 051 * @param c The stream to close 052 */ 053 public static void close(Closeable c) { 054 try { 055 if (c != null) { 056 c.close(); 057 } 058 } catch (IOException e) { 059 logger.warn("Cannot close down the given Closeable object", e); 060 } 061 } 062 063 /** 064 * Moves the bytes from input to output using a 4KB byte array. 065 * 066 * @param input Input stream of bytes 067 * @param output Output stream of bytes 068 * @throws IOException If anything occurs in the case of the reads and writes 069 */ 070 public static void copy(InputStream input, OutputStream output) 071 throws IOException { 072 byte[] buffer = new byte[BUFFER]; 073 int n = 0; 074 while (-1 != (n = input.read(buffer))) { 075 output.write(buffer, 0, n); 076 } 077 } 078 079 /** 080 * Takes in a reader and a processor, reads every line from the given 081 * file and then invokes the processor. What you do with the lines is 082 * dependent on your processor. 083 * 084 * The code will automatically close the given BufferedReader. 085 * 086 * @param br The reader to process 087 * @param processor The processor to invoke on all lines 088 * @throws ParserException Can throw this if we cannot parse the given reader 089 */ 090 public static void processReader(BufferedReader br, ReaderProcessor processor) throws ParserException { 091 String line; 092 try { 093 while( (line = br.readLine()) != null ) { 094 processor.process(line); 095 } 096 } 097 catch(IOException e) { 098 throw new ParserException("Could not read from the given BufferedReader"); 099 } 100 finally { 101 close(br); 102 } 103 } 104 105 /** 106 * Returns the contents of a buffered reader as a list of strings 107 * 108 * @param br BufferedReader to read from; <strong>will be closed</strong> 109 * @return List of Strings 110 * @throws ParserException Can throw this if we cannot parse the given reader 111 */ 112 public static List<String> getList(BufferedReader br) throws ParserException { 113 final List<String> list = new ArrayList<String>(); 114 processReader(br, new ReaderProcessor() { 115 @Override 116 public void process(String line) { 117 list.add(line); 118 } 119 }); 120 return list; 121 } 122 123 /** 124 * Delegates to {@link #getList(BufferedReader)} by wrapping the InputStream 125 * in a valid reader. No encoding is mentioned so if you need anything 126 * more advanced then use the other version of this method. 127 * 128 * @param is InputStream which is a text file 129 * @return List of Strings representing the lines of the files 130 * @throws ParserException Can throw this if the file is not a file or we 131 * cannot parse it 132 */ 133 public static List<String> getList(InputStream is) throws ParserException { 134 return getList(new BufferedReader(new InputStreamReader(is))); 135 } 136 137 /** 138 * Delegates to {@link #getList(InputStream)} by wrapping the File 139 * in a valid stream. No encoding is mentioned so if you need anything 140 * more advanced then use the other version of this method. Since this 141 * uses {@link #openFile(File)} this code can support GZipped and plain 142 * files. 143 * 144 * @param file File which is a text file 145 * @return List of Strings representing the lines of the files 146 * @throws IOException 147 */ 148 public static List<String> getList(File file) throws IOException { 149 return getList(openFile(file)); 150 } 151 152 /** 153 * For a filename this code will check the extension of the file for a 154 * .gz extension. If it finds one then the InputStream given back 155 * is a {@link GZIPInputStream}. Otherwise we return a normal 156 * {@link FileInputStream}. 157 * 158 * @param file File which may or may not be GZipped 159 * @return The final stream 160 * @throws IOExceptio n 161 */ 162 public static InputStream openFile(File file) throws IOException { 163 final InputStream is; 164 if(!file.isFile()) { 165 throw new ParserException("The file "+file+" is not a file."); 166 } 167 String name = file.getName(); 168 169 if(name.endsWith(".gz")) { 170 is = new GZIPInputStream(new FileInputStream(file)); 171 } 172 else { 173 is = new FileInputStream(file); 174 } 175 176 return is; 177 } 178 179 /** 180 * Closure interface used when working with 181 * {@link IOUtils#processReader(String)}. Each time a line is encountered 182 * the object that implements this interface will be invoked. 183 * 184 * @author ayates 185 */ 186 public static interface ReaderProcessor { 187 void process(String line) throws IOException; 188 } 189 190 /** 191 * Calculates GCG checksum for entire list of sequences 192 * 193 * @param sequences list of sequences 194 * @return GCG checksum 195 */ 196 public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(List<S> sequences) { 197 int check = 0; 198 for (S as : sequences) { 199 check += getGCGChecksum(as); 200 } 201 return check % 10000; 202 } 203 204 /** 205 * Calculates GCG checksum for a given sequence 206 * 207 * @param sequence given sequence 208 * @return GCG checksum 209 */ 210 public static <S extends Sequence<C>, C extends Compound> int getGCGChecksum(S sequence) { 211 String s = sequence.toString().toUpperCase(); 212 int count = 0, check = 0; 213 for (int i = 0; i < s.length(); i++) { 214 count++; 215 check += count * s.charAt(i); 216 if (count == 57) { 217 count = 0; 218 } 219 } 220 return check % 10000; 221 } 222 223 /** 224 * Assembles a GCG file header 225 * 226 * @param sequences list of sequences 227 * @return GCG header 228 */ 229 public static <S extends Sequence<C>, C extends Compound> String getGCGHeader(List<S> sequences) { 230 StringBuilder header = new StringBuilder(); 231 S s1 = sequences.get(0); 232 header.append(String.format("MSA from BioJava%n%n MSF: %d Type: %s Check: %d ..%n%n", 233 s1.getLength(), getGCGType(s1.getCompoundSet()), getGCGChecksum(sequences))); 234 String format = " Name: " + getIDFormat(sequences) + " Len: " + s1.getLength() + " Check: %4d Weight: 1.0%n"; 235 for (S as : sequences) { 236 header.append(String.format(format, as.getAccession(), getGCGChecksum(as))); 237 // TODO show weights in MSF header 238 } 239 header.append(String.format("%n//%n%n")); 240 // TODO? convert gap characters to '.' 241 return header.toString(); 242 } 243 244 /** 245 * Determines GCG type 246 * @param cs compound set of sequences 247 * @return GCG type 248 */ 249 public static <C extends Compound> String getGCGType(CompoundSet<C> cs) { 250 return (cs == DNACompoundSet.getDNACompoundSet() || cs == AmbiguityDNACompoundSet.getDNACompoundSet()) ? "D" : 251 (cs == RNACompoundSet.getRNACompoundSet() || cs == AmbiguityRNACompoundSet.getRNACompoundSet()) ? "R" : "P"; 252 } 253 254 /** 255 * Creates format String for accession IDs 256 * 257 * @param sequences list of sequences 258 * @return format String for accession IDs 259 */ 260 public static <S extends Sequence<C>, C extends Compound> String getIDFormat(List<S> sequences) { 261 int length = 0; 262 for (S as : sequences) { 263 length = Math.max(length, (as.getAccession() == null) ? 0 : as.getAccession().toString().length()); 264 } 265 return (length == 0) ? null : "%-" + (length + 1) + "s"; 266 } 267 268 /** 269 * Creates formatted String for a single character of PDB output 270 * 271 * @param web true for HTML display 272 * @param c1 character in first sequence 273 * @param c2 character in second sequence 274 * @param similar true if c1 and c2 are considered similar compounds 275 * @param c character to display 276 * @return formatted String 277 */ 278 public static String getPDBCharacter(boolean web, char c1, char c2, boolean similar, char c) { 279 String s = String.valueOf(c); 280 return getPDBString(web, c1, c2, similar, s, s, s, s); 281 } 282 283 /** 284 * Creates formatted String for displaying conservation in PDB output 285 * 286 * @param web true for HTML display 287 * @param c1 character in first sequence 288 * @param c2 character in second sequence 289 * @param similar true if c1 and c2 are considered similar compounds 290 * @return formatted String 291 */ 292 public static String getPDBConservation(boolean web, char c1, char c2, boolean similar) { 293 return getPDBString(web, c1, c2, similar, "|", ".", " ", web ? " " : " "); 294 } 295 296 // helper method for getPDBCharacter and getPDBConservation 297 private static String getPDBString(boolean web, char c1, char c2, boolean similar, String m, String sm, String dm, 298 String qg) { 299 if (c1 == c2) 300 return web ? "<span class=\"m\">" + m + "</span>" : m; 301 else if (similar) 302 return web ? "<span class=\"sm\">" + sm + "</span>" : sm; 303 else if (c1 == '-' || c2 == '-') 304 return web ? "<span class=\"dm\">" + dm + "</span>" : dm; 305 else 306 return web ? "<span class=\"qg\">" + qg + "</span>" : qg; 307 } 308 309 /** 310 * Creates formatted String for displaying conservation legend in PDB output 311 * 312 * @return legend String 313 */ 314 public static String getPDBLegend() { 315 StringBuilder s = new StringBuilder(); 316 s.append("</pre></div>"); 317 s.append(" <div class=\"subText\">"); 318 s.append(" <b>Legend:</b>"); 319 s.append(" <span class=\"m\">Green</span> - identical residues |"); 320 s.append(" <span class=\"sm\">Pink</span> - similar residues | "); 321 s.append(" <span class=\"qg\">Blue</span> - sequence mismatch |"); 322 s.append(" <span class=\"dm\">Brown</span> - insertion/deletion |"); 323 s.append(" </div>"); 324 s.append(String.format("%n")); 325 return s.toString(); 326 } 327 328 /** 329 * Prints {@code string} to {@code file}. 330 * @throws IOException If any I/O exception occurs while printing; this method does not catch any exceptions 331 */ 332 public static void print(String string, File file) throws IOException { 333 PrintWriter out = null; 334 try { 335 out = new PrintWriter(new BufferedWriter(new FileWriter(file))); 336 out.print(string); 337 out.flush(); 338 out.close(); 339 } finally { 340 if (out != null) out.close(); 341 } 342 } 343 344}