001/* 002 * @(#)ORonnModel.java 1.0 June 2010 003 * 004 * Copyright (c) 2010 Peter Troshin 005 * 006 * BioJava development code 007 * 008 * This code may be freely distributed and modified under the 009 * terms of the GNU Lesser General Public Licence. This should 010 * be distributed with the code. If you do not have a copy, 011 * see: 012 * 013 * http://www.gnu.org/copyleft/lesser.html 014 * 015 * Copyright for this code is held jointly by the individual 016 * authors. These should be listed in @author doc comments. 017 * 018 * For more information on the BioJava project and its aims, 019 * or to join the biojava-l mailing list, visit the home page 020 * at: 021 * 022 * http://www.biojava.org/ 023 * 024 */ 025package org.biojava.nbio.ronn; 026 027import org.biojava.nbio.ronn.ModelLoader.Model; 028import org.biojava.nbio.ronn.ModelLoader.Threshold; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import java.io.File; 033import java.io.FileNotFoundException; 034import java.io.PrintWriter; 035 036 037 038/** 039 * Fully re-factored version of RONN model. Based on the code in C version of 040 * RONN. 041 * 042 * @author Peter Troshin 043 * @version 1.0 044 * @since 3.0.2 045 */ 046public final class ORonnModel { 047 048 private static final Logger logger = LoggerFactory.getLogger(ORonnModel.class); 049 050 /** 051 * Order probability, corresponds to disorder as 1-order 052 */ 053 private final float disorder_weight; 054 055 private final static int AA_ALPHABET = 19; 056 private final static int maxR = 110; 057 //private final static float coef = 1.0f; 058 /** 059 * Holds encoded query sequence 060 */ 061 private final short[] seqAA; 062 /** 063 * Holds query sequence 064 */ 065 private final char[] query; 066 067 private final Model model; 068 069 /** 070 * Disorder scores for all residues 071 */ 072 private float[] scores = null; 073 074 final float[] detect() { 075 076 scores = new float[query.length]; 077 int sResidue; 078 int dIndex; 079 int r; 080 float est, fOrder, pDisor, fDisor; 081 final float[][] Z = new float[seqAA.length][ORonnModel.maxR]; 082 final int[] Q = new int[seqAA.length]; 083 final Threshold thold = new ModelLoader.Threshold(model.modelNum); 084 085 /* 086 * 19 looks like a size of the sliding window. So for any sequences 087 * shorted than 19 AA the score will be NaN. Original RONN segfault in 088 * such condition 089 */ 090 for (sResidue = 0; sResidue <= query.length - ORonnModel.AA_ALPHABET; sResidue++) { 091 est = 0.0f; 092 093 for (dIndex = 0; dIndex < model.numOfDBAAseq; dIndex++) { 094 final float[] rho = align(sResidue, dIndex);// search for the 095 // maximum alignment between ith peptide from the 096 // query and the dIndex-th database sequence 097 est += model.W[dIndex] * Math.exp((rho[1] - rho[0]) / rho[0]); 098 } 099 100 fOrder = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu0, 2.0) 101 / thold.sigma0) / (Math.sqrt(6.28) * thold.sigma0)); 102 103 fDisor = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu1, 2.0) 104 / thold.sigma1) / (Math.sqrt(6.28) * thold.sigma1)); 105 106 pDisor = (float) (disorder_weight * fDisor / ((1.0 - disorder_weight) 107 * fOrder + disorder_weight * fDisor)); 108 for (r = sResidue; r < sResidue + ORonnModel.AA_ALPHABET; r++) { 109 Z[r][Q[r]] = pDisor; 110 Q[r]++; 111 } 112 } 113 114 for (sResidue = 0; sResidue < query.length; sResidue++) { 115 est = 0.0f; 116 float[] zRow = Z[sResidue]; 117 int numOfIterations = Q[sResidue]; 118 for (r = 0; r < numOfIterations; r++) { 119 est += zRow[r]; 120 } 121 scores[sResidue] = est / numOfIterations; 122 } 123 return scores; 124 } 125 126 public void getScores(final File outfile) throws FileNotFoundException { 127 final PrintWriter output = new PrintWriter(outfile); 128 if (scores == null) { 129 synchronized (this) { 130 if (scores == null) { 131 detect(); 132 } 133 } 134 } 135 for (int i = 0; i < scores.length; i++) { 136 output.printf("%c\t%f\n", query[i], scores[i]); 137 } 138 output.close(); 139 } 140 141 // sResidue query sequence index and dIndex database sequence index 142 private final float[] align(final int sResidue, final int dIndex) { 143 int dResidue, r; 144 float maxScore = -1000000; 145 float rho1 = 0; 146 int maxIdx = 0; 147 float rho0 = 0; 148 short[] dbAARow = model.dbAA[dIndex]; 149 int numOfIterations = model.Length[dIndex] - ORonnModel.AA_ALPHABET; 150 for (dResidue = 0; dResidue <= numOfIterations; dResidue++) { 151 // go though the database sequence for maximised alignment 152 rho1 = 0.0f; 153 for (r = 0; r < ORonnModel.AA_ALPHABET; r++) { 154 // go through the query sequence for one alignment 155 rho1 += RonnConstraint.Blosum62[seqAA[sResidue + r]][dbAARow[dResidue 156 + r]]; 157 } 158 if (rho1 > maxScore) { 159 maxScore = rho1; 160 maxIdx = dResidue; 161 } 162 } 163 for (r = 0; r < ORonnModel.AA_ALPHABET; r++) { 164 rho0 += RonnConstraint.Blosum62[dbAARow[maxIdx + r]][dbAARow[maxIdx 165 + r]]; 166 } 167 return new float[] { rho0, maxScore }; 168 } 169 170 public ORonnModel(final String sequence, final Model model, 171 final float disorder) throws NumberFormatException { 172 this.disorder_weight = disorder; 173 this.model = model; 174 query = sequence.toCharArray(); 175 seqAA = new short[query.length]; 176 assert model != null; 177 assert model.numOfDBAAseq > 0; 178 for (int sResidue = 0; sResidue < sequence.length(); sResidue++) { 179 seqAA[sResidue] = RonnConstraint.INDEX[query[sResidue] - 'A']; 180 if ((seqAA[sResidue] < 0) || (seqAA[sResidue] > 19)) { 181 logger.error("seqAA[sResidue]={}({})", seqAA[sResidue], query[sResidue]); 182 System.exit(1); 183 } 184 } 185 } 186}