001/* 002 * @(#)ORonn.java 1.0 June 2010 003 * 004 * Copyright (c) 2010 Peter Troshin 005 * 006 * BioJava development code 007 * 008 * This code may be freely distributed and modified under the 009 * terms of the GNU Lesser General Public Licence. This should 010 * be distributed with the code. If you do not have a copy, 011 * see: 012 * 013 * http://www.gnu.org/copyleft/lesser.html 014 * 015 * Copyright for this code is held jointly by the individual 016 * authors. These should be listed in @author doc comments. 017 * 018 * For more information on the BioJava project and its aims, 019 * or to join the biojava-l mailing list, visit the home page 020 * at: 021 * 022 * http://www.biojava.org/ 023 * 024 */ 025package org.biojava.nbio.ronn; 026 027import org.biojava.nbio.data.sequence.FastaSequence; 028import org.biojava.nbio.data.sequence.SequenceUtil; 029import org.biojava.nbio.ronn.ModelLoader.Model; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.PrintWriter; 036import java.text.DateFormat; 037import java.text.NumberFormat; 038import java.util.Date; 039import java.util.List; 040import java.util.Locale; 041import java.util.concurrent.*; 042 043 044/** 045 * Fully re-factored and enhanced version of RONN. 046 * 047 * This class does the calculation and contains the main for the command line client. 048 * 049 * @author Peter Troshin 050 * @version 1.0 051 * @since 3.0.2 052 053 * TODO refactor 054 */ 055public final class ORonn implements Callable<ORonn> { 056 057 private static final Logger logger = LoggerFactory.getLogger(ORonn.class); 058 059 private static final DateFormat DATE_FORMAT = DateFormat 060 .getDateTimeInstance(DateFormat.LONG, DateFormat.LONG, Locale.US); 061 062 private static final NumberFormat nformat = NumberFormat.getInstance(); 063 static { 064 ORonn.nformat.setMaximumFractionDigits(2); 065 } 066 067 068 static final byte NUMBER_OF_MODELS = 10; 069 private final FastaSequence sequence; 070 private final ModelLoader mloader; 071 private final PrintWriter out; 072 private final ResultLayout layout; 073 private final PrintWriter stat; 074 private final Timer timer; 075 private final float disorder; 076 077 // This gets initialized after calling a call method! 078 private float[] cummulativeScore; 079 080 081 ORonn(final FastaSequence sequence, final ModelLoader mloader, 082 final InputParameters params) throws NumberFormatException, 083 IOException { 084 this.sequence = sequence; 085 this.mloader = mloader; 086 out = params.getOutputWriter(); 087 assert out != null; 088 layout = params.getFormat(); 089 stat = params.getStatWriter(); 090 disorder = params.getDisorder(); 091 timer = new Timer(TimeUnit.MILLISECONDS); 092 } 093 //This constructor is for API calls where the caller collects the results directly 094 ORonn(final FastaSequence sequence, final ModelLoader mloader) throws NumberFormatException, 095 IOException { 096 this.sequence = sequence; 097 this.mloader = mloader; 098 out = new PrintWriter(new NullOutputStream()); 099 layout = ResultLayout.HORIZONTAL; 100 stat = new PrintWriter(new NullOutputStream()); 101 disorder = RonnConstraint.DEFAULT_DISORDER; 102 timer = new Timer(TimeUnit.MILLISECONDS); 103 } 104 105 void writeResults(final float[] meanScores, final char[] seqs) { 106 107 synchronized (out) 108 { 109 out.println(">" + sequence.getId()); 110 if (layout == ResultLayout.VERTICAL) { 111 for (int i = 0; i < meanScores.length; i++) { 112 out.printf("%c\t%.2f%n", seqs[i], meanScores[i]); 113 //out.printf("%c\t%f%n", seqs[i], meanScores[i]); 114 } 115 } else { 116 final StringBuilder seqLine = new StringBuilder(); 117 final StringBuilder resultLine = new StringBuilder(); 118 final String spacer = "\t"; 119 for (int i = 0; i < meanScores.length; i++) { 120 seqLine.append(seqs[i]); 121 seqLine.append(spacer); 122 resultLine.append(ORonn.nformat.format(meanScores[i])); 123 resultLine.append(spacer); 124 } 125 out.println(seqLine.toString()); 126 out.println(resultLine.toString()); 127 } 128 out.println(); 129 out.flush(); 130 } 131 } 132 133 static boolean isValidSequence(final FastaSequence fsequence) { 134 assert fsequence != null; 135 return fsequence.getLength() > RonnConstraint.MIN_SEQUENCE_LENGTH; 136 } 137 138 @Override 139 public ORonn call() throws NumberFormatException, IOException { 140 final String seq = sequence.getSequence(); 141 // Calculate for each model 142 for (int m = 0; m < ORonn.NUMBER_OF_MODELS; m++) { 143 final Model model = mloader.getModel(m); 144 final ORonnModel rmodel = new ORonnModel(seq, model, disorder); 145 final float[] scores = rmodel.detect(); 146 addScore(scores); 147 } 148 149 final char[] ch = seq.toCharArray(); 150 final float[] meanScores = getMeanScores(); 151 assert meanScores.length == seq.length() : "Scores are not calculated for " 152 + "all residues!"; 153 writeResults(meanScores, ch); 154 stat.println(timer.getTotalTime() + "ms prediction completed for " 155 + sequence.getId()); 156 return this; 157 } 158 159 private void addScore(final float[] scores) { 160 // For the first time just add all elements 161 if (cummulativeScore == null) { 162 cummulativeScore = scores; 163 return; 164 } 165 if (cummulativeScore.length != scores.length) { 166 throw new IllegalArgumentException("Expected " 167 + cummulativeScore.length + " but get " + scores.length); 168 } 169 for (int i = 0; i < scores.length; i++) { 170 cummulativeScore[i] += scores[i]; 171 } 172 } 173 174 float[] getMeanScores() { 175 final float[] meanScores = new float[cummulativeScore.length]; 176 for (int i = 0; i < cummulativeScore.length; i++) { 177 meanScores[i] = cummulativeScore[i] / ORonn.NUMBER_OF_MODELS; 178 } 179 return meanScores; 180 } 181 182 /** 183 * 184 * @author pvtroshin 185 * 186 * VERTICAL - where the letters of the sequence and corresponding disorder values are 187 * output in two column layout. 188 * 189 * HORIZONTAL where the disorder values are provided under the letters of the 190 * sequence. Letters and values separated by tabulation in this case. 191 * 192 */ 193 static enum ResultLayout { 194 VERTICAL, HORIZONTAL 195 } 196 197 static void printUsage() { 198 logger.error(RonnConstraint.HELP_MESSAGE); 199 } 200 201 static boolean isValidSequenceForRonn(final FastaSequence fsequence, 202 final PrintWriter stat) { 203 boolean valid = true; 204 String message = ""; 205 if (!ORonn.isValidSequence(fsequence)) { 206 message = "IGNORING sequence " 207 + fsequence.getId() 208 + " as its too short. Minimum sequence length for disorder prediction is " 209 + (RonnConstraint.MIN_SEQUENCE_LENGTH + 1) + " characters!"; 210 stat.println(message); 211 logger.warn(message); 212 valid = false; 213 } 214 final String sequence = fsequence.getSequence(); 215 if (!(SequenceUtil.isProteinSequence(sequence) || SequenceUtil 216 .isAmbiguosProtein(sequence))) { 217 message = "IGNORING sequence " + fsequence.getId() 218 + " as it is not a protein sequence!"; 219 stat.println(message); 220 logger.warn(message); 221 valid = false; 222 } 223 return valid; 224 } 225 226 static void validateSequenceForRonn(final FastaSequence fsequence) { 227 228 String message = ""; 229 if (!ORonn.isValidSequence(fsequence)) { 230 message = "IGNORING sequence " 231 + fsequence.getId() 232 + " as its too short. Minimum sequence length for disorder prediction is " 233 + (RonnConstraint.MIN_SEQUENCE_LENGTH + 1) + " characters!"; 234 throw new IllegalArgumentException(message); 235 } 236 final String sequence = fsequence.getSequence(); 237 238 if ( SequenceUtil.isAmbiguosProtein(sequence)){ 239 logger.warn("Sequence is ambiguous!"); 240 } 241 242 if (!(SequenceUtil.isProteinSequence(sequence) )){ 243 logger.warn("Does not look like a protein sequence!"); 244 } 245 246 if (!(SequenceUtil.isProteinSequence(sequence) || SequenceUtil 247 .isAmbiguosProtein(sequence))) { 248 message = "IGNORING sequence " + fsequence.getId() 249 + " as it is not a protein sequence!"; 250 throw new IllegalArgumentException(message); 251 } 252 } 253 254 private static InputParameters parseArguments(final String[] args) 255 throws IOException { 256 final InputParameters prms = new InputParameters(); 257 for (int i = 0; i < args.length; i++) { 258 final String prm = args[i].trim().toLowerCase(); 259 if (prm.startsWith(InputParameters.inputKey)) { 260 prms.setFilePrm(args[i], InputParameters.inputKey); 261 } 262 if (prm.startsWith(InputParameters.outputKey)) { 263 prms.setFilePrm(args[i], InputParameters.outputKey); 264 } 265 if (prm.startsWith(InputParameters.disorderKey)) { 266 prms.setDisorder(prm); 267 } 268 if (prm.startsWith(InputParameters.formatKey)) { 269 prms.setFormat(prm); 270 } 271 if (prm.startsWith(InputParameters.statKey)) { 272 prms.setFilePrm(args[i], InputParameters.statKey); 273 } 274 if (prm.startsWith(InputParameters.threadKey)) { 275 prms.setThreadNum(prm); 276 } 277 278 } 279 return prms; 280 } 281 282 public static void main(final String[] args) throws NumberFormatException, 283 IOException { 284 285 if ((args.length == 0) || (args.length > 5)) { 286 ORonn.printUsage(); 287 System.exit(1); 288 } 289 final InputParameters prms = ORonn.parseArguments(args); 290 291 final PrintWriter stat = prms.getStatWriter(); 292 stat.println("Using parameters: \n[" + prms + "]"); 293 294 if (prms.getInput() == null) { 295 logger.error("Input is not defined! "); 296 ORonn.printUsage(); 297 System.exit(1); 298 } 299 stat.println("Calculation started: " 300 + ORonn.DATE_FORMAT.format(new Date())); 301 302 final Timer timer = new Timer(); 303 // The stream is closed after reading inside readFasta 304 final List<FastaSequence> sequences = SequenceUtil 305 .readFasta(new FileInputStream(prms.getInput())); 306 stat.println(timer.getStepTime(TimeUnit.MILLISECONDS) 307 + "ms input file loaded"); 308 stat.println("Input file has " + sequences.size() + " sequences"); 309 310 final ModelLoader mloader = new ModelLoader(); 311 mloader.loadModels(); 312 313 final PrintWriter out = prms.getOutputWriter(); 314 assert out != null; 315 316 // do serial execution 317 if (prms.getThreadNum() == 1) { 318 stat.println("Running predictions serially"); 319 ORonn.predictSerial(sequences, prms, mloader); 320 } else { 321 // Run predictions in parallel 322 stat.print("Running preditions in parallel - "); 323 stat.println("Using " + prms.getThreadNum() + " threads"); 324 ORonn.predictParallel(sequences, prms, mloader); 325 } 326 327 stat.println("Total calculation time: " + timer.getTotalTime() + "s "); 328 stat.println("Calculation completed: " 329 + ORonn.DATE_FORMAT.format(new Date())); 330 stat.close(); 331 out.flush(); 332 out.close(); 333 } 334 335 static void predictSerial(final List<FastaSequence> fsequences, 336 final InputParameters prms, final ModelLoader mloader) 337 throws NumberFormatException, IOException { 338 for (final FastaSequence sequence : fsequences) { 339 if (!ORonn.isValidSequenceForRonn(sequence, prms.getStatWriter())) { 340 continue; 341 } 342 final ORonn ronn = new ORonn(sequence, mloader, prms); 343 ronn.call(); 344 } 345 } 346 347 348 static void predictParallel(final List<FastaSequence> fsequences, 349 final InputParameters prms, final ModelLoader mloader) 350 throws NumberFormatException, IOException { 351 final PrintWriter stat = prms.getStatWriter(); 352 353 // Do parallel execution 354 final ExecutorService executor = new ThreadPoolExecutor(prms 355 .getThreadNum(), prms.getThreadNum(), 0L, TimeUnit.SECONDS, 356 new SynchronousQueue<Runnable>(), 357 new ThreadPoolExecutor.CallerRunsPolicy()); 358 try { 359 for (final FastaSequence sequence : fsequences) { 360 if (!ORonn.isValidSequenceForRonn(sequence, stat)) { 361 continue; 362 } 363 final ORonn ronn = new ORonn(sequence, mloader, prms); 364 /* 365 * To get stack traces from tasks one need to obtain a Future 366 * from this method and call its get() method. Otherwise some 367 * task may end up with exception but unnoticed 368 */ 369 executor.submit(ronn); 370 } 371 executor.shutdown(); 372 final int timeOut = (fsequences.size() < 60) ? 60 : fsequences 373 .size(); 374 stat.println("All task submitted. Waiting for complition for " 375 + "maximum of " + timeOut + " minutes"); 376 executor.awaitTermination(timeOut, TimeUnit.MINUTES); 377 } catch (final InterruptedException e) { 378 logger.error("Execution is terminated! " 379 + "Terminated by either by the system or the timeout. " 380 + "Maximum of 1 minute is allowed for one sequence analisys! " 381 + "If it took longer to complite this analysis " 382 + "the program is terminated.", e); 383 } finally { 384 executor.shutdownNow(); 385 } 386 } 387 388} // class end