001/* 002 * @(#)ORonnModel.java 1.0 June 2010 003 * 004 * Copyright (c) 2010 Peter Troshin 005 * 006 * BioJava development code 007 * 008 * This code may be freely distributed and modified under the 009 * terms of the GNU Lesser General Public Licence. This should 010 * be distributed with the code. If you do not have a copy, 011 * see: 012 * 013 * http://www.gnu.org/copyleft/lesser.html 014 * 015 * Copyright for this code is held jointly by the individual 016 * authors. These should be listed in @author doc comments. 017 * 018 * For more information on the BioJava project and its aims, 019 * or to join the biojava-l mailing list, visit the home page 020 * at: 021 * 022 * http://www.biojava.org/ 023 * 024 */ 025package org.biojava.nbio.ronn; 026 027import org.biojava.nbio.ronn.ModelLoader.Model; 028import org.biojava.nbio.ronn.ModelLoader.Threshold; 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import java.io.File; 033import java.io.FileNotFoundException; 034import java.io.PrintWriter; 035import java.util.Locale; 036 037 038 039/** 040 * Fully re-factored version of RONN model. Based on the code in C version of 041 * RONN. 042 * 043 * @author Peter Troshin 044 * @version 1.0 045 * @since 3.0.2 046 */ 047public final class ORonnModel { 048 049 private static final Logger logger = LoggerFactory.getLogger(ORonnModel.class); 050 051 /** 052 * Order probability, corresponds to disorder as 1-order 053 */ 054 private final float disorder_weight; 055 056 private final static int AA_ALPHABET = 19; 057 private final static int maxR = 110; 058 //private final static float coef = 1.0f; 059 /** 060 * Holds encoded query sequence 061 */ 062 private final short[] seqAA; 063 /** 064 * Holds query sequence 065 */ 066 private final char[] query; 067 068 private final Model model; 069 070 /** 071 * Disorder scores for all residues 072 */ 073 private float[] scores = null; 074 075 final float[] detect() { 076 077 scores = new float[query.length]; 078 int sResidue; 079 int dIndex; 080 int r; 081 float est, fOrder, pDisor, fDisor; 082 final float[][] Z = new float[seqAA.length][ORonnModel.maxR]; 083 final int[] Q = new int[seqAA.length]; 084 final Threshold thold = new ModelLoader.Threshold(model.modelNum); 085 086 /* 087 * 19 looks like a size of the sliding window. So for any sequences 088 * shorted than 19 AA the score will be NaN. Original RONN segfault in 089 * such condition 090 */ 091 for (sResidue = 0; sResidue <= query.length - ORonnModel.AA_ALPHABET; sResidue++) { 092 est = 0.0f; 093 094 for (dIndex = 0; dIndex < model.numOfDBAAseq; dIndex++) { 095 final float[] rho = align(sResidue, dIndex);// search for the 096 // maximum alignment between ith peptide from the 097 // query and the dIndex-th database sequence 098 est += model.W[dIndex] * Math.exp((rho[1] - rho[0]) / rho[0]); 099 } 100 101 fOrder = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu0, 2.0) 102 / thold.sigma0) / (Math.sqrt(6.28) * thold.sigma0)); 103 104 fDisor = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu1, 2.0) 105 / thold.sigma1) / (Math.sqrt(6.28) * thold.sigma1)); 106 107 pDisor = (float) (disorder_weight * fDisor / ((1.0 - disorder_weight) 108 * fOrder + disorder_weight * fDisor)); 109 for (r = sResidue; r < sResidue + ORonnModel.AA_ALPHABET; r++) { 110 Z[r][Q[r]] = pDisor; 111 Q[r]++; 112 } 113 } 114 115 for (sResidue = 0; sResidue < query.length; sResidue++) { 116 est = 0.0f; 117 float[] zRow = Z[sResidue]; 118 int numOfIterations = Q[sResidue]; 119 for (r = 0; r < numOfIterations; r++) { 120 est += zRow[r]; 121 } 122 scores[sResidue] = est / numOfIterations; 123 } 124 return scores; 125 } 126 127 public void getScores(final File outfile) throws FileNotFoundException { 128 final PrintWriter output = new PrintWriter(outfile); 129 if (scores == null) { 130 synchronized (this) { 131 if (scores == null) { 132 detect(); 133 } 134 } 135 } 136 for (int i = 0; i < scores.length; i++) { 137 output.printf(Locale.US, "%c\t%f\n", query[i], scores[i]); 138 } 139 output.close(); 140 } 141 142 // sResidue query sequence index and dIndex database sequence index 143 private final float[] align(final int sResidue, final int dIndex) { 144 int dResidue, r; 145 float maxScore = -1000000; 146 float rho1 = 0; 147 int maxIdx = 0; 148 float rho0 = 0; 149 short[] dbAARow = model.dbAA[dIndex]; 150 int numOfIterations = model.Length[dIndex] - ORonnModel.AA_ALPHABET; 151 for (dResidue = 0; dResidue <= numOfIterations; dResidue++) { 152 // go though the database sequence for maximised alignment 153 rho1 = 0.0f; 154 for (r = 0; r < ORonnModel.AA_ALPHABET; r++) { 155 // go through the query sequence for one alignment 156 rho1 += RonnConstraint.Blosum62[seqAA[sResidue + r]][dbAARow[dResidue 157 + r]]; 158 } 159 if (rho1 > maxScore) { 160 maxScore = rho1; 161 maxIdx = dResidue; 162 } 163 } 164 for (r = 0; r < ORonnModel.AA_ALPHABET; r++) { 165 rho0 += RonnConstraint.Blosum62[dbAARow[maxIdx + r]][dbAARow[maxIdx 166 + r]]; 167 } 168 return new float[] { rho0, maxScore }; 169 } 170 171 public ORonnModel(final String sequence, final Model model, 172 final float disorder) throws NumberFormatException { 173 this.disorder_weight = disorder; 174 this.model = model; 175 query = sequence.toCharArray(); 176 seqAA = new short[query.length]; 177 assert model != null; 178 assert model.numOfDBAAseq > 0; 179 for (int sResidue = 0; sResidue < sequence.length(); sResidue++) { 180 seqAA[sResidue] = RonnConstraint.INDEX[query[sResidue] - 'A']; 181 if ((seqAA[sResidue] < 0) || (seqAA[sResidue] > 19)) { 182 logger.error("seqAA[sResidue]={}({})", seqAA[sResidue], query[sResidue]); 183 System.exit(1); 184 } 185 } 186 } 187}