001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.survival.cox; 022 023 024import org.biojava.nbio.survival.data.WorkSheet; 025 026import java.util.ArrayList; 027 028/** 029 * The CoxHelper class is provided to start with a tab delimited file in a similar process in R and return the results as a CoxInfo class. 030 * Given the number of options for adjusting the calculations using weighting, strata, clustering etc the helper class can be used to hide 031 * the complexity for typical use case. 032 * 033 * @author Scooter Willis <willishf at gmail dot com> 034 */ 035public class CoxHelper { 036 037 /** 038 * 039 * @param datafile The tab delimited file containing survival data and variables. The first column needs to be unique index 040 * @param timeColumn The column representing the event/censor time 041 * @param statusColumn The column representing an event=1 and censor=0 042 * @param weightColumn For case-cohort data sets may require weighting to reflect the entire cohort 043 * @param strataColumn A column representing strata data 044 * @param clusterColumn If robost variation calculation is required the cluster column will group samples by the value in this column 045 * @param variables The variables to be used in the cox regression analysis. For Interactions using variable1:variable2 046 * @param useStrata Boolean to indicate if strata column should be used 047 * @param useWeights Boolean to indicate if weight column should be used 048 * @return 049 * @throws Exception 050 */ 051 052 053 public static CoxInfo process(String datafile, String timeColumn, String statusColumn, String weightColumn, String strataColumn, String clusterColumn, ArrayList<String> variables, boolean useStrata, boolean useWeights) throws Exception { 054 WorkSheet worksheet = WorkSheet.readCSV(datafile, '\t'); 055 return process(worksheet, timeColumn, statusColumn, weightColumn, strataColumn, clusterColumn, variables, useStrata, useWeights); 056 } 057 058 /** 059 * 060 * @param worksheet 061 * @param timeColumn The column representing the event/censor time 062 * @param statusColumn The column representing an event=1 and censor=0 063 * @param weightColumn For case-cohort data sets may require weighting to reflect the entire cohort 064 * @param strataColumn A column representing strata data 065 * @param clusterColumn If robost variation calculation is required the cluster column will group samples by the value in this column 066 * @param variables The variables to be used in the cox regression analysis. For Interactions using variable1:variable2 067 * @param useStrata Boolean to indicate if strata column should be used 068 * @param useWeights Boolean to indicate if weight column should be used 069 * @return 070 */ 071 public static CoxInfo process(WorkSheet worksheet, String timeColumn, String statusColumn, String weightColumn, String strataColumn, String clusterColumn, ArrayList<String> variables, boolean useStrata, boolean useWeights) { 072 073 try { 074 ArrayList<SurvivalInfo> survivalInfoList = new ArrayList<SurvivalInfo>(); 075 076 int i = 1; 077 for (String row : worksheet.getRows()) { 078 double time = worksheet.getCellDouble(row, timeColumn); 079 080 double c = worksheet.getCellDouble(row, statusColumn); 081 double weight = 1.0; 082 if (weightColumn != null && weightColumn.length() > 0) { 083 weight = worksheet.getCellDouble(row, weightColumn); 084 085 } 086 int strata = 0; 087 if (strataColumn != null && strataColumn.length() > 0) { 088 strata = worksheet.getCellDouble(row, strataColumn).intValue(); 089 } 090 int censor = (int) c; 091 092 if (weight <= 0) { 093 // System.out.println("Weight <= 0 Sample=" + row + " weight=" + weight); 094 i++; 095 continue; 096 } 097 098 099 100 SurvivalInfo si = new SurvivalInfo(time, censor); 101 si.setOrder(i); 102 si.setWeight(weight); 103 si.setStrata(strata); 104 for (String column : variables) { 105 if (column.contains(":")) { 106 continue; 107 } 108 String value = worksheet.getCell(row, column); 109 si.addUnknownDataTypeVariable(column, value); 110 } 111 if (clusterColumn != null && clusterColumn.length() > 0) { 112 String v = worksheet.getCell(row, clusterColumn); 113 si.setClusterValue(v); 114 } 115 116 survivalInfoList.add(si); 117 i++; 118 } 119 120 121 122 boolean cluster = false; 123 boolean robust = false; 124 if (clusterColumn != null && clusterColumn.length() > 0) { 125 cluster = true; 126 robust = true; 127 } 128 // variables.add("TREAT:AGE"); 129 CoxR cox = new CoxR(); 130 CoxInfo ci = cox.process(variables, survivalInfoList, useStrata, useWeights, robust, cluster); 131 // System.out.println(ci); 132 133 //applying Bob Gray's correction for weighted strata wtexamples.docx 134 // CoxCC.process(ci, survivalInfoList); 135 // ci.dump(); 136 // ci.calcSummaryValues(); 137 138 return ci; 139 } catch (Exception e) { 140 e.printStackTrace(); 141 } 142 return null; 143 } 144 145 146 147 /** 148 * @param args the command line arguments 149 */ 150 public static void main(String[] args) { 151 // TODO code application logic here 152 try { 153 if (true) { 154 String datafile = "/Users/Scooter/scripps/ngs/DataSets/E2197/misc/ecoglabtransfer/500790/2013.05.10.12.28.58.313/clindasl0228.txt"; 155 ArrayList<String> variables = new ArrayList<String>(); 156 variables.add("nndpos"); 157 variables.add("meno"); 158// variables.add("er1"); 159// variables.add("meno:er1"); 160 161 CoxInfo ci = CoxHelper.process(datafile, "ttr", "recind", "wt", "sstrat", "Seq", variables, false, true); 162 163 // ci.dump(); 164 165 System.out.println(ci); 166 System.out.println(); 167 168 CoxCC.process(ci); 169 170 ci.dump(); 171 172 } 173 174 175 } catch (Exception e) { 176 e.printStackTrace(); 177 } 178 } 179}