001/* 002 * @(#)ORonn.java 1.0 June 2010 003 * 004 * Copyright (c) 2010 Peter Troshin 005 * 006 * BioJava development code 007 * 008 * This code may be freely distributed and modified under the 009 * terms of the GNU Lesser General Public Licence. This should 010 * be distributed with the code. If you do not have a copy, 011 * see: 012 * 013 * http://www.gnu.org/copyleft/lesser.html 014 * 015 * Copyright for this code is held jointly by the individual 016 * authors. These should be listed in @author doc comments. 017 * 018 * For more information on the BioJava project and its aims, 019 * or to join the biojava-l mailing list, visit the home page 020 * at: 021 * 022 * http://www.biojava.org/ 023 * 024 */ 025package org.biojava.nbio.ronn; 026 027import org.biojava.nbio.data.sequence.FastaSequence; 028import org.biojava.nbio.data.sequence.SequenceUtil; 029import org.biojava.nbio.ronn.ModelLoader.Model; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.PrintWriter; 036import java.text.DateFormat; 037import java.text.NumberFormat; 038import java.util.Date; 039import java.util.List; 040import java.util.Locale; 041import java.util.concurrent.*; 042import java.util.stream.IntStream; 043import java.util.stream.Stream; 044 045 046/** 047 * Fully re-factored and enhanced version of RONN. 048 * 049 * This class does the calculation and contains the main for the command line client. 050 * 051 * @author Peter Troshin 052 * @version 1.0 053 * @since 3.0.2 054 055 * TODO refactor 056 */ 057public final class ORonn implements Callable<ORonn> { 058 059 private static final Logger logger = LoggerFactory.getLogger(ORonn.class); 060 061 private static final DateFormat DATE_FORMAT = DateFormat 062 .getDateTimeInstance(DateFormat.LONG, DateFormat.LONG, Locale.US); 063 064 private static final NumberFormat nformat = NumberFormat.getInstance(); 065 static { 066 ORonn.nformat.setMaximumFractionDigits(2); 067 } 068 069 070 static final byte NUMBER_OF_MODELS = 10; 071 private final FastaSequence sequence; 072 private final ModelLoader mloader; 073 private final PrintWriter out; 074 private final ResultLayout layout; 075 private final PrintWriter stat; 076 private final Timer timer; 077 private final float disorder; 078 079 // This gets initialized after calling a call method! 080 private float[] cummulativeScore; 081 082 083 ORonn(final FastaSequence sequence, final ModelLoader mloader, 084 final InputParameters params) throws 085 IOException { 086 this.sequence = sequence; 087 this.mloader = mloader; 088 out = params.getOutputWriter(); 089 assert out != null; 090 layout = params.getFormat(); 091 stat = params.getStatWriter(); 092 disorder = params.getDisorder(); 093 timer = new Timer(TimeUnit.MILLISECONDS); 094 } 095 //This constructor is for API calls where the caller collects the results directly 096 ORonn(final FastaSequence sequence, final ModelLoader mloader) throws 097IOException { 098 this.sequence = sequence; 099 this.mloader = mloader; 100 out = new PrintWriter(new NullOutputStream()); 101 layout = ResultLayout.HORIZONTAL; 102 stat = new PrintWriter(new NullOutputStream()); 103 disorder = RonnConstraint.DEFAULT_DISORDER; 104 timer = new Timer(TimeUnit.MILLISECONDS); 105 } 106 107 void writeResults(final float[] meanScores, final char[] seqs) { 108 109 synchronized (out) 110 { 111 out.println(">" + sequence.getId()); 112 if (layout == ResultLayout.VERTICAL) { 113 for (int i = 0; i < meanScores.length; i++) { 114 out.printf(Locale.US, "%c\t%.2f%n", seqs[i], meanScores[i]); 115 //out.printf(Locale.US, "%c\t%f%n", seqs[i], meanScores[i]); 116 } 117 } else { 118 final StringBuilder seqLine = new StringBuilder(); 119 final StringBuilder resultLine = new StringBuilder(); 120 final String spacer = "\t"; 121 for (int i = 0; i < meanScores.length; i++) { 122 seqLine.append(seqs[i]); 123 seqLine.append(spacer); 124 resultLine.append(ORonn.nformat.format(meanScores[i])); 125 resultLine.append(spacer); 126 } 127 out.println(seqLine.toString()); 128 out.println(resultLine.toString()); 129 } 130 out.println(); 131 out.flush(); 132 } 133 } 134 135 static boolean isValidSequence(final FastaSequence fsequence) { 136 assert fsequence != null; 137 return fsequence.getLength() > RonnConstraint.MIN_SEQUENCE_LENGTH; 138 } 139 140 @Override 141 public ORonn call() throws IOException { 142 final String seq = sequence.getSequence(); 143 // Calculate for each model 144 Stream.iterate(0, n -> n +1).limit(NUMBER_OF_MODELS).map(modelNumber -> mloader.getModel(modelNumber)) 145 .map(rmodel -> new ORonnModel(seq, rmodel, disorder).detect()) 146 .forEach(score ->addScore(score)); 147 final char[] ch = seq.toCharArray(); 148 final float[] meanScores = getMeanScores(); 149 assert meanScores.length == seq.length() : "Scores are not calculated for " 150 + "all residues!"; 151 writeResults(meanScores, ch); 152 stat.println(timer.getTotalTime() + "ms prediction completed for " 153 + sequence.getId()); 154 return this; 155 } 156 157 private void addScore(final float[] scores) { 158 // For the first time just add all elements 159 if (cummulativeScore == null) { 160 cummulativeScore = scores; 161 return; 162 } 163 if (cummulativeScore.length != scores.length) { 164 throw new IllegalArgumentException("Expected " 165 + cummulativeScore.length + " but get " + scores.length); 166 } 167 for (int i = 0; i < scores.length; i++) { 168 cummulativeScore[i] += scores[i]; 169 } 170 } 171 172 float[] getMeanScores() { 173 final float[] meanScores = new float[cummulativeScore.length]; 174 for (int i = 0; i < cummulativeScore.length; i++) { 175 meanScores[i] = cummulativeScore[i] / ORonn.NUMBER_OF_MODELS; 176 } 177 return meanScores; 178 } 179 180 /** 181 * 182 * @author pvtroshin 183 * 184 * VERTICAL - where the letters of the sequence and corresponding disorder values are 185 * output in two column layout. 186 * 187 * HORIZONTAL where the disorder values are provided under the letters of the 188 * sequence. Letters and values separated by tabulation in this case. 189 * 190 */ 191 static enum ResultLayout { 192 VERTICAL, HORIZONTAL 193 } 194 195 static void printUsage() { 196 logger.error(RonnConstraint.HELP_MESSAGE); 197 } 198 199 static boolean isValidSequenceForRonn(final FastaSequence fsequence, 200 final PrintWriter stat) { 201 boolean valid = true; 202 String message = ""; 203 if (!ORonn.isValidSequence(fsequence)) { 204 message = "IGNORING sequence " 205 + fsequence.getId() 206 + " as its too short. Minimum sequence length for disorder prediction is " 207 + (RonnConstraint.MIN_SEQUENCE_LENGTH + 1) + " characters!"; 208 stat.println(message); 209 logger.warn(message); 210 valid = false; 211 } 212 final String sequence = fsequence.getSequence(); 213 if (!(SequenceUtil.isProteinSequence(sequence) || SequenceUtil 214 .isAmbiguosProtein(sequence))) { 215 message = "IGNORING sequence " + fsequence.getId() 216 + " as it is not a protein sequence!"; 217 stat.println(message); 218 logger.warn(message); 219 valid = false; 220 } 221 return valid; 222 } 223 224 static void validateSequenceForRonn(final FastaSequence fsequence) { 225 226 String message = ""; 227 if (!ORonn.isValidSequence(fsequence)) { 228 message = "IGNORING sequence " 229 + fsequence.getId() 230 + " as its too short. Minimum sequence length for disorder prediction is " 231 + (RonnConstraint.MIN_SEQUENCE_LENGTH + 1) + " characters!"; 232 throw new IllegalArgumentException(message); 233 } 234 final String sequence = fsequence.getSequence(); 235 236 if ( SequenceUtil.isAmbiguosProtein(sequence)){ 237 logger.warn("Sequence is ambiguous!"); 238 } 239 240 if (!(SequenceUtil.isProteinSequence(sequence) )){ 241 logger.warn("Does not look like a protein sequence!"); 242 } 243 244 if (!(SequenceUtil.isProteinSequence(sequence) || SequenceUtil 245 .isAmbiguosProtein(sequence))) { 246 message = "IGNORING sequence " + fsequence.getId() 247 + " as it is not a protein sequence!"; 248 throw new IllegalArgumentException(message); 249 } 250 } 251 252 private static InputParameters parseArguments(final String[] args) 253 throws IOException { 254 final InputParameters prms = new InputParameters(); 255 for (int i = 0; i < args.length; i++) { 256 final String prm = args[i].trim().toLowerCase(); 257 if (prm.startsWith(InputParameters.inputKey)) { 258 prms.setFilePrm(args[i], InputParameters.inputKey); 259 } 260 if (prm.startsWith(InputParameters.outputKey)) { 261 prms.setFilePrm(args[i], InputParameters.outputKey); 262 } 263 if (prm.startsWith(InputParameters.disorderKey)) { 264 prms.setDisorder(prm); 265 } 266 if (prm.startsWith(InputParameters.formatKey)) { 267 prms.setFormat(prm); 268 } 269 if (prm.startsWith(InputParameters.statKey)) { 270 prms.setFilePrm(args[i], InputParameters.statKey); 271 } 272 if (prm.startsWith(InputParameters.threadKey)) { 273 prms.setThreadNum(prm); 274 } 275 276 } 277 return prms; 278 } 279 280 public static void main(final String[] args) throws 281IOException { 282 283 if ((args.length == 0) || (args.length > 5)) { 284 ORonn.printUsage(); 285 System.exit(1); 286 } 287 final InputParameters prms = ORonn.parseArguments(args); 288 289 final PrintWriter stat = prms.getStatWriter(); 290 stat.println("Using parameters: \n[" + prms + "]"); 291 292 if (prms.getInput() == null) { 293 logger.error("Input is not defined! "); 294 ORonn.printUsage(); 295 System.exit(1); 296 } 297 stat.println("Calculation started: " 298 + ORonn.DATE_FORMAT.format(new Date())); 299 300 final Timer timer = new Timer(); 301 // The stream is closed after reading inside readFasta 302 final List<FastaSequence> sequences = SequenceUtil 303 .readFasta(new FileInputStream(prms.getInput())); 304 stat.println(timer.getStepTime(TimeUnit.MILLISECONDS) 305 + "ms input file loaded"); 306 stat.println("Input file has " + sequences.size() + " sequences"); 307 308 final ModelLoader mloader = new ModelLoader(); 309 mloader.loadModels(); 310 311 final PrintWriter out = prms.getOutputWriter(); 312 assert out != null; 313 314 // do serial execution 315 if (prms.getThreadNum() == 1) { 316 stat.println("Running predictions serially"); 317 ORonn.predictSerial(sequences, prms, mloader); 318 } else { 319 // Run predictions in parallel 320 stat.print("Running preditions in parallel - "); 321 stat.println("Using " + prms.getThreadNum() + " threads"); 322 ORonn.predictParallel(sequences, prms, mloader); 323 } 324 325 stat.println("Total calculation time: " + timer.getTotalTime() + "s "); 326 stat.println("Calculation completed: " 327 + ORonn.DATE_FORMAT.format(new Date())); 328 stat.close(); 329 out.flush(); 330 out.close(); 331 } 332 333 static void predictSerial(final List<FastaSequence> fsequences, 334 final InputParameters prms, final ModelLoader mloader) 335 throws IOException { 336 for (final FastaSequence sequence : fsequences) { 337 if (!ORonn.isValidSequenceForRonn(sequence, prms.getStatWriter())) { 338 continue; 339 } 340 final ORonn ronn = new ORonn(sequence, mloader, prms); 341 ronn.call(); 342 } 343 } 344 345 346 static void predictParallel(final List<FastaSequence> fsequences, 347 final InputParameters prms, final ModelLoader mloader) 348 throws IOException { 349 final PrintWriter stat = prms.getStatWriter(); 350 351 // Do parallel execution 352 final ExecutorService executor = new ThreadPoolExecutor(prms 353 .getThreadNum(), prms.getThreadNum(), 0L, TimeUnit.SECONDS, 354 new SynchronousQueue<Runnable>(), 355 new ThreadPoolExecutor.CallerRunsPolicy()); 356 try { 357 for (final FastaSequence sequence : fsequences) { 358 if (!ORonn.isValidSequenceForRonn(sequence, stat)) { 359 continue; 360 } 361 final ORonn ronn = new ORonn(sequence, mloader, prms); 362 /* 363 * To get stack traces from tasks one need to obtain a Future 364 * from this method and call its get() method. Otherwise some 365 * task may end up with exception but unnoticed 366 */ 367 executor.submit(ronn); 368 } 369 executor.shutdown(); 370 final int timeOut = (fsequences.size() < 60) ? 60 : fsequences 371 .size(); 372 stat.println("All task submitted. Waiting for complition for " 373 + "maximum of " + timeOut + " minutes"); 374 executor.awaitTermination(timeOut, TimeUnit.MINUTES); 375 } catch (final InterruptedException e) { 376 logger.error("Execution is terminated! " 377 + "Terminated by either by the system or the timeout. " 378 + "Maximum of 1 minute is allowed for one sequence analisys! " 379 + "If it took longer to complite this analysis " 380 + "the program is terminated.", e); 381 } finally { 382 executor.shutdownNow(); 383 } 384 } 385 386} // class end