001/*
002 * @(#)ORonnModel.java  1.0 June 2010
003 *
004 * Copyright (c) 2010 Peter Troshin
005 *
006 *        BioJava development code
007 *
008 * This code may be freely distributed and modified under the
009 * terms of the GNU Lesser General Public Licence.  This should
010 * be distributed with the code.  If you do not have a copy,
011 * see:
012 *
013 *      http://www.gnu.org/copyleft/lesser.html
014 *
015 * Copyright for this code is held jointly by the individual
016 * authors.  These should be listed in @author doc comments.
017 *
018 * For more information on the BioJava project and its aims,
019 * or to join the biojava-l mailing list, visit the home page
020 * at:
021 *
022 *      http://www.biojava.org/
023 *
024 */
025package org.biojava.nbio.ronn;
026
027import org.biojava.nbio.ronn.ModelLoader.Model;
028import org.biojava.nbio.ronn.ModelLoader.Threshold;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import java.io.File;
033import java.io.FileNotFoundException;
034import java.io.PrintWriter;
035
036
037
038/**
039 * Fully re-factored version of RONN model. Based on the code in C version of
040 * RONN.
041 *
042 * @author Peter Troshin
043 * @version 1.0
044 * @since 3.0.2
045 */
046public final class ORonnModel {
047
048        private static final Logger logger = LoggerFactory.getLogger(ORonnModel.class);
049
050        /**
051         * Order probability, corresponds to disorder as 1-order
052         */
053        private final float disorder_weight;
054
055        private final static int AA_ALPHABET = 19;
056        private final static int maxR = 110;
057        //private final static float coef = 1.0f;
058        /**
059         * Holds encoded query sequence
060         */
061        private final short[] seqAA;
062        /**
063         * Holds query sequence
064         */
065        private final char[] query;
066
067        private final Model model;
068
069        /**
070         * Disorder scores for all residues
071         */
072        private float[] scores = null;
073
074        final float[] detect() {
075
076        scores = new float[query.length];
077        int sResidue;
078        int dIndex;
079        int r;
080        float est, fOrder, pDisor, fDisor;
081        final float[][] Z = new float[seqAA.length][ORonnModel.maxR];
082        final int[] Q = new int[seqAA.length];
083        final Threshold thold = new ModelLoader.Threshold(model.modelNum);
084
085        /*
086         * 19 looks like a size of the sliding window. So for any sequences
087         * shorted than 19 AA the score will be NaN. Original RONN segfault in
088         * such condition
089         */
090        for (sResidue = 0; sResidue <= query.length - ORonnModel.AA_ALPHABET; sResidue++) {
091                est = 0.0f;
092
093                for (dIndex = 0; dIndex < model.numOfDBAAseq; dIndex++) {
094                final float[] rho = align(sResidue, dIndex);// search for the
095                // maximum alignment between ith peptide from the
096                // query and the dIndex-th database sequence
097                est += model.W[dIndex] * Math.exp((rho[1] - rho[0]) / rho[0]);
098                }
099
100                fOrder = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu0, 2.0)
101                        / thold.sigma0) / (Math.sqrt(6.28) * thold.sigma0));
102
103                fDisor = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu1, 2.0)
104                        / thold.sigma1) / (Math.sqrt(6.28) * thold.sigma1));
105
106                pDisor = (float) (disorder_weight * fDisor / ((1.0 - disorder_weight)
107                        * fOrder + disorder_weight * fDisor));
108                for (r = sResidue; r < sResidue + ORonnModel.AA_ALPHABET; r++) {
109                Z[r][Q[r]] = pDisor;
110                Q[r]++;
111                }
112        }
113
114        for (sResidue = 0; sResidue < query.length; sResidue++) {
115                est = 0.0f;
116                float[] zRow = Z[sResidue];
117                int numOfIterations = Q[sResidue];
118                for (r = 0; r < numOfIterations; r++) {
119                est += zRow[r];
120                }
121                scores[sResidue] = est / numOfIterations;
122        }
123        return scores;
124        }
125
126        public void getScores(final File outfile) throws FileNotFoundException {
127        final PrintWriter output = new PrintWriter(outfile);
128        if (scores == null) {
129                synchronized (this) {
130                if (scores == null) {
131                        detect();
132                }
133                }
134        }
135        for (int i = 0; i < scores.length; i++) {
136                output.printf("%c\t%f\n", query[i], scores[i]);
137        }
138        output.close();
139        }
140
141        // sResidue query sequence index and dIndex database sequence index
142        private final float[] align(final int sResidue, final int dIndex) {
143        int dResidue, r;
144        float maxScore = -1000000;
145        float rho1 = 0;
146        int maxIdx = 0;
147        float rho0 = 0;
148        short[] dbAARow = model.dbAA[dIndex];
149        int numOfIterations = model.Length[dIndex] - ORonnModel.AA_ALPHABET;
150        for (dResidue = 0; dResidue <= numOfIterations; dResidue++) {
151                // go though the database sequence for maximised alignment
152                rho1 = 0.0f;
153                for (r = 0; r < ORonnModel.AA_ALPHABET; r++) {
154                // go through the query sequence for one alignment
155                rho1 += RonnConstraint.Blosum62[seqAA[sResidue + r]][dbAARow[dResidue
156                        + r]];
157                }
158                if (rho1 > maxScore) {
159                maxScore = rho1;
160                maxIdx = dResidue;
161                }
162        }
163        for (r = 0; r < ORonnModel.AA_ALPHABET; r++) {
164                rho0 += RonnConstraint.Blosum62[dbAARow[maxIdx + r]][dbAARow[maxIdx
165                        + r]];
166        }
167        return new float[] { rho0, maxScore };
168        }
169
170        public ORonnModel(final String sequence, final Model model,
171                final float disorder) throws NumberFormatException {
172        this.disorder_weight = disorder;
173        this.model = model;
174        query = sequence.toCharArray();
175        seqAA = new short[query.length];
176        assert model != null;
177        assert model.numOfDBAAseq > 0;
178        for (int sResidue = 0; sResidue < sequence.length(); sResidue++) {
179                seqAA[sResidue] = RonnConstraint.INDEX[query[sResidue] - 'A'];
180                if ((seqAA[sResidue] < 0) || (seqAA[sResidue] > 19)) {
181                        logger.error("seqAA[sResidue]={}({})", seqAA[sResidue], query[sResidue]);
182                        System.exit(1);
183                }
184        }
185        }
186}