001/*
002 * @(#)ORonnModel.java  1.0 June 2010
003 *
004 * Copyright (c) 2010 Peter Troshin
005 *
006 *        BioJava development code
007 *
008 * This code may be freely distributed and modified under the
009 * terms of the GNU Lesser General Public Licence.  This should
010 * be distributed with the code.  If you do not have a copy,
011 * see:
012 *
013 *      http://www.gnu.org/copyleft/lesser.html
014 *
015 * Copyright for this code is held jointly by the individual
016 * authors.  These should be listed in @author doc comments.
017 *
018 * For more information on the BioJava project and its aims,
019 * or to join the biojava-l mailing list, visit the home page
020 * at:
021 *
022 *      http://www.biojava.org/
023 *
024 */
025package org.biojava.nbio.ronn;
026
027import org.biojava.nbio.ronn.ModelLoader.Model;
028import org.biojava.nbio.ronn.ModelLoader.Threshold;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import java.io.File;
033import java.io.FileNotFoundException;
034import java.io.PrintWriter;
035import java.util.Locale;
036
037
038
039/**
040 * Fully re-factored version of RONN model. Based on the code in C version of
041 * RONN.
042 *
043 * @author Peter Troshin
044 * @version 1.0
045 * @since 3.0.2
046 */
047public final class ORonnModel {
048
049        private static final Logger logger = LoggerFactory.getLogger(ORonnModel.class);
050
051        /**
052         * Order probability, corresponds to disorder as 1-order
053         */
054        private final float disorder_weight;
055
056        private final static int AA_ALPHABET = 19;
057        private final static int maxR = 110;
058        //private final static float coef = 1.0f;
059        /**
060         * Holds encoded query sequence
061         */
062        private final short[] seqAA;
063        /**
064         * Holds query sequence
065         */
066        private final char[] query;
067
068        private final Model model;
069
070        /**
071         * Disorder scores for all residues
072         */
073        private float[] scores = null;
074
075        final float[] detect() {
076
077        scores = new float[query.length];
078        int sResidue;
079        int dIndex;
080        int r;
081        float est, fOrder, pDisor, fDisor;
082        final float[][] Z = new float[seqAA.length][ORonnModel.maxR];
083        final int[] Q = new int[seqAA.length];
084        final Threshold thold = new ModelLoader.Threshold(model.modelNum);
085
086        /*
087         * 19 looks like a size of the sliding window. So for any sequences
088         * shorted than 19 AA the score will be NaN. Original RONN segfault in
089         * such condition
090         */
091        for (sResidue = 0; sResidue <= query.length - ORonnModel.AA_ALPHABET; sResidue++) {
092                est = 0.0f;
093
094                for (dIndex = 0; dIndex < model.numOfDBAAseq; dIndex++) {
095                final float[] rho = align(sResidue, dIndex);// search for the
096                // maximum alignment between ith peptide from the
097                // query and the dIndex-th database sequence
098                est += model.W[dIndex] * Math.exp((rho[1] - rho[0]) / rho[0]);
099                }
100
101                fOrder = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu0, 2.0)
102                        / thold.sigma0) / (Math.sqrt(6.28) * thold.sigma0));
103
104                fDisor = (float) (Math.exp(-0.5 * Math.pow(est - thold.mu1, 2.0)
105                        / thold.sigma1) / (Math.sqrt(6.28) * thold.sigma1));
106
107                pDisor = (float) (disorder_weight * fDisor / ((1.0 - disorder_weight)
108                        * fOrder + disorder_weight * fDisor));
109                for (r = sResidue; r < sResidue + ORonnModel.AA_ALPHABET; r++) {
110                Z[r][Q[r]] = pDisor;
111                Q[r]++;
112                }
113        }
114
115        for (sResidue = 0; sResidue < query.length; sResidue++) {
116                est = 0.0f;
117                float[] zRow = Z[sResidue];
118                int numOfIterations = Q[sResidue];
119                for (r = 0; r < numOfIterations; r++) {
120                est += zRow[r];
121                }
122                scores[sResidue] = est / numOfIterations;
123        }
124        return scores;
125        }
126
127        public void getScores(final File outfile) throws FileNotFoundException {
128        final PrintWriter output = new PrintWriter(outfile);
129        if (scores == null) {
130                synchronized (this) {
131                if (scores == null) {
132                        detect();
133                }
134                }
135        }
136        for (int i = 0; i < scores.length; i++) {
137                output.printf(Locale.US, "%c\t%f\n", query[i], scores[i]);
138        }
139        output.close();
140        }
141
142        // sResidue query sequence index and dIndex database sequence index
143        private final float[] align(final int sResidue, final int dIndex) {
144        int dResidue, r;
145        float maxScore = -1000000;
146        float rho1 = 0;
147        int maxIdx = 0;
148        float rho0 = 0;
149        short[] dbAARow = model.dbAA[dIndex];
150        int numOfIterations = model.Length[dIndex] - ORonnModel.AA_ALPHABET;
151        for (dResidue = 0; dResidue <= numOfIterations; dResidue++) {
152                // go though the database sequence for maximised alignment
153                rho1 = 0.0f;
154                for (r = 0; r < ORonnModel.AA_ALPHABET; r++) {
155                // go through the query sequence for one alignment
156                rho1 += RonnConstraint.Blosum62[seqAA[sResidue + r]][dbAARow[dResidue
157                        + r]];
158                }
159                if (rho1 > maxScore) {
160                maxScore = rho1;
161                maxIdx = dResidue;
162                }
163        }
164        for (r = 0; r < ORonnModel.AA_ALPHABET; r++) {
165                rho0 += RonnConstraint.Blosum62[dbAARow[maxIdx + r]][dbAARow[maxIdx
166                        + r]];
167        }
168        return new float[] { rho0, maxScore };
169        }
170
171        public ORonnModel(final String sequence, final Model model,
172                final float disorder) throws NumberFormatException {
173        this.disorder_weight = disorder;
174        this.model = model;
175        query = sequence.toCharArray();
176        seqAA = new short[query.length];
177        assert model != null;
178        assert model.numOfDBAAseq > 0;
179        for (int sResidue = 0; sResidue < sequence.length(); sResidue++) {
180                seqAA[sResidue] = RonnConstraint.INDEX[query[sResidue] - 'A'];
181                if ((seqAA[sResidue] < 0) || (seqAA[sResidue] > 19)) {
182                        logger.error("seqAA[sResidue]={}({})", seqAA[sResidue], query[sResidue]);
183                        System.exit(1);
184                }
185        }
186        }
187}