001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.survival.cox;
022
023
024import org.biojava.nbio.survival.data.WorkSheet;
025
026import java.util.ArrayList;
027
028/**
029 * The CoxHelper class is provided to start with a tab delimited file in a similar process in R and return the results as a CoxInfo class.
030 * Given the number of options for adjusting the calculations using weighting, strata, clustering etc the helper class can be used to hide
031 * the complexity for typical use case.
032 *
033 * @author Scooter Willis <willishf at gmail dot com>
034 */
035public class CoxHelper {
036
037        /**
038         *
039         * @param datafile The tab delimited file containing survival data and variables. The first column needs to be unique index
040         * @param timeColumn The column representing the event/censor time
041         * @param statusColumn The column representing an event=1 and censor=0
042         * @param weightColumn For case-cohort data sets may require weighting to reflect the entire cohort
043         * @param strataColumn A column representing strata data
044         * @param clusterColumn If robost variation calculation is required the cluster column will group samples by the value in this column
045         * @param variables The variables to be used in the cox regression analysis. For Interactions using variable1:variable2
046         * @param useStrata Boolean to indicate if strata column should be used
047         * @param useWeights Boolean to indicate if weight column should be used
048         * @return
049         * @throws Exception
050         */
051
052
053        public static CoxInfo process(String datafile, String timeColumn, String statusColumn, String weightColumn, String strataColumn, String clusterColumn, ArrayList<String> variables, boolean useStrata, boolean useWeights) throws Exception {
054                WorkSheet worksheet = WorkSheet.readCSV(datafile, '\t');
055                return process(worksheet, timeColumn, statusColumn, weightColumn, strataColumn, clusterColumn, variables, useStrata, useWeights);
056        }
057
058        /**
059         *
060         * @param worksheet
061         * @param timeColumn The column representing the event/censor time
062         * @param statusColumn The column representing an event=1 and censor=0
063         * @param weightColumn For case-cohort data sets may require weighting to reflect the entire cohort
064         * @param strataColumn A column representing strata data
065         * @param clusterColumn If robost variation calculation is required the cluster column will group samples by the value in this column
066         * @param variables The variables to be used in the cox regression analysis. For Interactions using variable1:variable2
067         * @param useStrata Boolean to indicate if strata column should be used
068         * @param useWeights Boolean to indicate if weight column should be used
069         * @return
070         */
071        public static CoxInfo process(WorkSheet worksheet, String timeColumn, String statusColumn, String weightColumn, String strataColumn, String clusterColumn, ArrayList<String> variables, boolean useStrata, boolean useWeights) {
072
073                try {
074                        ArrayList<SurvivalInfo> survivalInfoList = new ArrayList<SurvivalInfo>();
075
076                        int i = 1;
077                        for (String row : worksheet.getRows()) {
078                                double time = worksheet.getCellDouble(row, timeColumn);
079
080                                double c = worksheet.getCellDouble(row, statusColumn);
081                                double weight = 1.0;
082                                if (weightColumn != null && weightColumn.length() > 0) {
083                                        weight = worksheet.getCellDouble(row, weightColumn);
084
085                                }
086                                int strata = 0;
087                                if (strataColumn != null && strataColumn.length() > 0) {
088                                        strata = worksheet.getCellDouble(row, strataColumn).intValue();
089                                }
090                                int censor = (int) c;
091
092                                if (weight <= 0) {
093                                        //   System.out.println("Weight <= 0 Sample=" + row + " weight=" + weight);
094                                        i++;
095                                        continue;
096                                }
097
098
099
100                                SurvivalInfo si = new SurvivalInfo(time, censor);
101                                si.setOrder(i);
102                                si.setWeight(weight);
103                                si.setStrata(strata);
104                                for (String column : variables) {
105                                        if (column.contains(":")) {
106                                                continue;
107                                        }
108                                        String value = worksheet.getCell(row, column);
109                                        si.addUnknownDataTypeVariable(column, value);
110                                }
111                                if (clusterColumn != null && clusterColumn.length() > 0) {
112                                        String v = worksheet.getCell(row, clusterColumn);
113                                        si.setClusterValue(v);
114                                }
115
116                                survivalInfoList.add(si);
117                                i++;
118                        }
119
120
121
122                        boolean cluster = false;
123                        boolean robust = false;
124                        if (clusterColumn != null && clusterColumn.length() > 0) {
125                                cluster = true;
126                                robust = true;
127                        }
128                        //       variables.add("TREAT:AGE");
129                        CoxR cox = new CoxR();
130                        CoxInfo ci = cox.process(variables, survivalInfoList, useStrata, useWeights, robust, cluster);
131                        // System.out.println(ci);
132
133                        //applying Bob Gray's correction for weighted strata wtexamples.docx
134                        //           CoxCC.process(ci, survivalInfoList);
135                        //           ci.dump();
136                        //           ci.calcSummaryValues();
137
138                        return ci;
139                } catch (Exception e) {
140                        e.printStackTrace();
141                }
142                return null;
143        }
144
145
146
147        /**
148         * @param args the command line arguments
149         */
150        public static void main(String[] args) {
151                // TODO code application logic here
152                try {
153                        if (true) {
154                                String datafile = "/Users/Scooter/scripps/ngs/DataSets/E2197/misc/ecoglabtransfer/500790/2013.05.10.12.28.58.313/clindasl0228.txt";
155                                ArrayList<String> variables = new ArrayList<String>();
156                                variables.add("nndpos");
157                                variables.add("meno");
158//              variables.add("er1");
159//              variables.add("meno:er1");
160
161                                CoxInfo ci = CoxHelper.process(datafile, "ttr", "recind", "wt", "sstrat", "Seq", variables, false, true);
162
163                          //  ci.dump();
164
165                                System.out.println(ci);
166                                System.out.println();
167
168                                CoxCC.process(ci);
169
170                                ci.dump();
171
172                        }
173
174
175                } catch (Exception e) {
176                        e.printStackTrace();
177                }
178        }
179}