001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.survival.kaplanmeier.figure; 022 023import org.biojava.nbio.survival.cox.StrataInfo; 024import org.biojava.nbio.survival.cox.SurvFitInfo; 025import org.biojava.nbio.survival.cox.SurvivalInfo; 026import org.biojava.nbio.survival.data.WorkSheet; 027 028import javax.swing.*; 029import java.util.ArrayList; 030import java.util.Collections; 031import java.util.LinkedHashMap; 032 033/** 034 * Ported from survfitKM.S When combining multiple entries with same time not 035 * sure how the weighting adds up 036 * 037 * @author Scooter Willis <willishf at gmail dot com> 038 */ 039public class SurvFitKM { 040 041 /** 042 * 043 */ 044 public enum Method { 045 046 /** 047 * 048 */ 049 kaplanMeier, 050 /** 051 * 052 */ 053 flemingHarrington, 054 /** 055 * 056 */ 057 fh2; 058 } 059 060 /** 061 * 062 */ 063 public enum Error { 064 065 /** 066 * 067 */ 068 greenwood, 069 /** 070 * 071 */ 072 tsiatis; 073 } 074 075 /** 076 * 077 */ 078 public enum ConfType { 079 080 /** 081 * 082 */ 083 log, 084 /** 085 * 086 */ 087 log_log, 088 /** 089 * 090 */ 091 plain, 092 /** 093 * 094 */ 095 none; 096 } 097 098 /** 099 * 100 */ 101 public enum ConfLower { 102 103 /** 104 * 105 */ 106 usual, 107 /** 108 * 109 */ 110 peto, 111 /** 112 * 113 */ 114 modified; 115 } 116 117 /** 118 * 119 * @param survivalData 120 * @param useWeights 121 * @return 122 * @throws Exception 123 */ 124 public SurvFitInfo process(LinkedHashMap<String, ArrayList<CensorStatus>> survivalData, boolean useWeights) throws Exception { 125 ArrayList<SurvivalInfo> survivalInfoList = new ArrayList<SurvivalInfo>(); 126 int i = 0; 127 for (String strata : survivalData.keySet()) { 128 ArrayList<CensorStatus> csList = survivalData.get(strata); 129 for (CensorStatus cs : csList) { 130 SurvivalInfo si = new SurvivalInfo(cs.time, Integer.parseInt(cs.censored)); 131 si.setOrder(i); 132 i++; 133 si.setWeight(cs.weight); 134 si.addUnknownDataTypeVariable("STRATA", strata); 135 si.addUnknownDataTypeVariable("VALUE", cs.value + ""); 136 survivalInfoList.add(si); 137 } 138 } 139 140 return process("STRATA", survivalInfoList, useWeights); 141 142 } 143 144 /** 145 * 146 * @param datafile 147 * @param timeColumn 148 * @param statusColumn 149 * @param weightColumn 150 * @param variableColumn 151 * @param useWeights 152 * @return 153 * @throws Exception 154 */ 155 public SurvFitInfo process(String datafile, String timeColumn, String statusColumn, String weightColumn, String variableColumn, boolean useWeights) throws Exception { 156 WorkSheet worksheet = WorkSheet.readCSV(datafile, '\t'); 157 ArrayList<SurvivalInfo> survivalInfoList = new ArrayList<SurvivalInfo>(); 158 159 int i = 1; 160 for (String row : worksheet.getRows()) { 161 double time = worksheet.getCellDouble(row, timeColumn); 162 163 double c = worksheet.getCellDouble(row, statusColumn); 164 double weight = 1.0; 165 if (weightColumn != null && weightColumn.length() > 0) { 166 weight = worksheet.getCellDouble(row, weightColumn); 167 168 } 169 int strata = 0; 170 171 int censor = (int) c; 172 173 if (weight <= 0) { 174 // System.out.println("Weight <= 0 Sample=" + row + " weight=" + weight); 175 i++; 176 continue; 177 } 178 179 180 181 SurvivalInfo si = new SurvivalInfo(time, censor); 182 si.setOrder(i); 183 si.setWeight(weight); 184 si.setStrata(strata); 185 186 187 String value = worksheet.getCell(row, variableColumn); 188 si.addUnknownDataTypeVariable(variableColumn, value); 189 190 191 192 survivalInfoList.add(si); 193 i++; 194 } 195 196 197 198 return process(variableColumn, survivalInfoList, useWeights); 199 } 200 201 /** 202 * 203 * @param variable 204 * @param dataT 205 * @param useWeighted 206 * @return 207 * @throws Exception 208 */ 209 public SurvFitInfo process(String variable, ArrayList<SurvivalInfo> dataT, boolean useWeighted) throws Exception { 210 return this.process(variable, dataT, Method.kaplanMeier, Error.greenwood, true, .95, ConfType.log, ConfLower.usual, null, null, useWeighted); 211 } 212 213 214 public LinkedHashMap<String, StrataInfo> processStrataInfo(String variable, ArrayList<SurvivalInfo> dataT, SurvFitKM.Method method, SurvFitKM.Error error, boolean seFit, double confInt, ConfType confType, ConfLower confLower, Double startTime, Double newTime, boolean useWeighted) throws Exception{ 215 Collections.sort(dataT); 216 if (startTime == null && newTime != null) { 217 startTime = newTime; 218 } 219 //int ny = 2; // setup for right censored versus counting 220 if (startTime != null) { 221 throw new Exception("Filter on startTime not implemented"); 222 } 223 int n = dataT.size(); 224 225 226 LinkedHashMap<String, Integer> levels = new LinkedHashMap<String, Integer>(); 227 LinkedHashMap<String, ArrayList<SurvivalInfo>> strataHashMap = new LinkedHashMap<String, ArrayList<SurvivalInfo>>(); 228 229 for (int i = 0; i < n; i++) { 230 SurvivalInfo si = dataT.get(i); 231 String value = si.getUnknownDataTypeVariable(variable); 232 Integer count = levels.get(value); 233 if (count == null) { 234 count = 0; 235 } 236 count++; 237 levels.put(value, count); 238 ArrayList<SurvivalInfo> strataList = strataHashMap.get(value); 239 if (strataList == null) { 240 strataList = new ArrayList<SurvivalInfo>(); 241 strataHashMap.put(value, strataList); 242 } 243 strataList.add(si); 244 245 } 246 247 //int nstrat = levels.size(); 248 249 LinkedHashMap<String, StrataInfo> strataInfoHashMap = new LinkedHashMap<String, StrataInfo>(); 250 251 for (String strata : strataHashMap.keySet()) { 252 253 ArrayList<SurvivalInfo> strataList = strataHashMap.get(strata); 254 StrataInfo strataInfo = new StrataInfo(); 255 strataInfoHashMap.put(strata, strataInfo); 256 257 258 Double previousTime = null; 259 for (SurvivalInfo si : strataList) { 260 double w = 1.0; 261 if (useWeighted) { 262 w = si.getWeight(); 263 } 264 265 if (previousTime == null || si.getTime() != previousTime) { 266 strataInfo.getTime().add(si.getTime()); 267 if (si.getStatus() == 0) { 268 strataInfo.getStatus().add(0); 269 strataInfo.getNcens().add(w); 270 strataInfo.getNevent().add(0.0); 271 } else { 272 strataInfo.getNcens().add(0.0); 273 strataInfo.getNevent().add(w); 274 strataInfo.getStatus().add(1); 275 } 276 strataInfo.getNrisk().add(0.0); 277 strataInfo.getStderr().add(0.0); 278 strataInfo.getWeight().add(w); 279 } else { 280 //we have the same time so add to previous entry 281 int index = strataInfo.getTime().size() - 1; 282 if (si.getStatus() == 0) { 283 double nw = strataInfo.getNcens().get(index) + w; 284 strataInfo.getNcens().remove(index); 285 strataInfo.getNcens().add(nw); 286 // strataInfo.nevent.add(0.0); 287 } else { 288 // strataInfo.ncens.add(0.0); 289 double nw = strataInfo.getNevent().get(index) + w; 290 strataInfo.getNevent().remove(index); 291 strataInfo.getNevent().add(nw); 292 293 } 294 double nw = strataInfo.getWeight().get(index) + w; 295 strataInfo.getWeight().remove(index); 296 strataInfo.getWeight().add(nw); 297 } 298 previousTime = si.getTime(); 299 // strataInfo.status.add(si.status); 300 301 302 303 Integer ndead = strataInfo.getNdead().get(si.getTime()); 304 if (ndead == null) { 305 ndead = 0; 306 } 307 if (si.getStatus() == 1) { 308 ndead++; 309 } 310 strataInfo.getNdead().put(si.getTime(), ndead); 311 312 } 313 314 315 316 int j = strataInfo.getWeight().size() - 1; 317 double cw = 0.0; 318 for (int i = strataInfo.getWeight().size() - 1; i >= 0; i--) { 319 double c = strataInfo.getWeight().get(i); 320 cw = cw + c; 321 strataInfo.getNrisk().set(j, cw); 322 j--; 323 } 324 if (method == Method.kaplanMeier) { 325 326 for (int i = 0; i < strataInfo.getNrisk().size(); i++) { 327 double t = (strataInfo.getNrisk().get(i) - strataInfo.getNevent().get(i)) / strataInfo.getNrisk().get(i); 328 if (i == 0) { 329 strataInfo.getSurv().add(t); 330 } else { 331 strataInfo.getSurv().add(t * strataInfo.getSurv().get(i - 1)); 332 } 333 } 334 } else if (method == Method.flemingHarrington) { 335 for (int i = 0; i < strataInfo.getNrisk().size(); i++) { 336 double hazard = (strataInfo.getNevent().get(i)) / strataInfo.getNrisk().get(i); 337 if (i == 0) { 338 strataInfo.getSurv().add(Math.exp(-1.0 * hazard)); 339 } else { 340 strataInfo.getSurv().add(Math.exp(-1.0 * (hazard + strataInfo.getSurv().get(i - 1)))); 341 } 342 } 343 } else if (method == Method.fh2) { 344 throw new Exception("Method.fh2 not supported. Need to implement survfit4.c "); 345 } 346 347 if (seFit) { 348 if (error == Error.greenwood) { 349 for (int i = 0; i < strataInfo.getNrisk().size(); i++) { 350 double t = (strataInfo.getNevent().get(i)) / (strataInfo.getNrisk().get(i) * (strataInfo.getNrisk().get(i) - strataInfo.getNevent().get(i))); 351 if (i == 0) { 352 strataInfo.getVarhaz().add(t); 353 } else { 354 strataInfo.getVarhaz().add(t + strataInfo.getVarhaz().get(i - 1)); 355 } 356 } 357 } else if (method == Method.kaplanMeier || method == Method.flemingHarrington) { 358 for (int i = 0; i < strataInfo.getNrisk().size(); i++) { 359 double t = (strataInfo.getNevent().get(i)) / (strataInfo.getNrisk().get(i) * strataInfo.getNrisk().get(i)); 360 if (i == 0) { 361 strataInfo.getVarhaz().add(t); 362 } else { 363 strataInfo.getVarhaz().add(t + strataInfo.getVarhaz().get(i - 1)); 364 } 365 } 366 367 } else { 368 //varhaz[[i]] <- cumsum(nevent* tsum$sum2) 369 throw new Exception("Method.fh2 not supported. Need to implement survfit4.c "); 370 } 371 strataInfo.getStderr().clear(); 372 for (int i = 0; i < strataInfo.getNrisk().size(); i++) { 373 374 strataInfo.getStderr().add(Math.sqrt(strataInfo.getVarhaz().get(i))); 375 } 376 377 378 } 379 380 } 381 382 ArrayList<Boolean> events = new ArrayList<Boolean>(); 383 ArrayList<Double> nrisk = new ArrayList<Double>(); 384 for (StrataInfo strataInfo : strataInfoHashMap.values()) { 385 boolean firsttime = true; 386 for (int j = 0; j < strataInfo.getNevent().size(); j++) { 387 Double d = strataInfo.getNevent().get(j); 388 if (d > 0 || firsttime) { 389 firsttime = false; 390 events.add(true); 391 nrisk.add(strataInfo.getNrisk().get(j)); 392 } else { 393 events.add(false); 394 } 395 } 396 397 } 398 ArrayList<Integer> zz = new ArrayList<Integer>(); 399 for (int i = 0; i < events.size(); i++) { 400 if (events.get(i)) { 401 zz.add(i + 1); 402 } 403 } 404 zz.add(events.size() + 1); 405 ArrayList<Integer> diffzz = new ArrayList<Integer>(); 406 for (int i = 0; i < zz.size() - 1; i++) { 407 diffzz.add(zz.get(i + 1) - zz.get(i)); 408 } 409 //System.out.println(diffzz); 410 ArrayList<Double> nlag = new ArrayList<Double>(); 411 for (int j = 0; j < nrisk.size(); j++) { 412 int count = diffzz.get(j); 413 for (int c = 0; c < count; c++) { 414 nlag.add(nrisk.get(j)); 415 } 416 } 417 418 // System.out.println(nlag); 419 // System.out.println("nlag.size=" + nlag.size()); 420 if (confLower == ConfLower.usual) { 421 for (StrataInfo strataInfo : strataInfoHashMap.values()) { 422 strataInfo.setStdlow(strataInfo.getStderr()); 423 } 424 } else if (confLower == ConfLower.peto) { 425 for (StrataInfo strataInfo : strataInfoHashMap.values()) { 426 for (int j = 0; j < strataInfo.getSurv().size(); j++) { 427 double v = Math.sqrt((1.0 - strataInfo.getSurv().get(j)) / strataInfo.getNrisk().get(j)); 428 strataInfo.getStdlow().add(v); 429 } 430 } 431 } else if (confLower == ConfLower.modified) { 432 int i = 0; 433 for (StrataInfo strataInfo : strataInfoHashMap.values()) { 434 for (int j = 0; j < strataInfo.getSurv().size(); j++) { 435 double v = strataInfo.getStderr().get(j) * Math.sqrt(nlag.get(i) / strataInfo.getNrisk().get(j)); 436 strataInfo.getStdlow().add(v); 437 i++; 438 } 439 } 440 } 441 442 //zval <- qnorm(1- (1-conf.int)/2, 0,1) 443 double zvalue = 1.959964; 444 445 if (confType == ConfType.plain) { 446 for (StrataInfo strataInfo : strataInfoHashMap.values()) { 447 for (int j = 0; j < strataInfo.getSurv().size(); j++) { 448 double upper = strataInfo.getSurv().get(j) + zvalue * strataInfo.getStderr().get(j) * strataInfo.getSurv().get(j); 449 double lower = strataInfo.getSurv().get(j) - zvalue * strataInfo.getStdlow().get(j) * strataInfo.getSurv().get(j); 450 strataInfo.getLower().add(lower); 451 strataInfo.getUpper().add(upper); 452 } 453 } 454 455 } else if (confType == ConfType.log) { 456 for (StrataInfo strataInfo : strataInfoHashMap.values()) { 457 for (int j = 0; j < strataInfo.getSurv().size(); j++) { 458 double surv = strataInfo.getSurv().get(j); 459 if (surv == 0) { 460 strataInfo.getLower().add(Double.NaN); 461 strataInfo.getUpper().add(Double.NaN); 462 } else { 463 double upper = Math.exp(Math.log(surv) + zvalue * strataInfo.getStderr().get(j)); 464 double lower = Math.exp(Math.log(surv) - zvalue * strataInfo.getStdlow().get(j)); 465 strataInfo.getLower().add(lower); 466 strataInfo.getUpper().add(upper); 467 } 468 } 469 } 470 471 } else if (confType == ConfType.log_log) { 472 throw new Exception("ConfType log-log currently not supported"); 473 } 474 475 476// if (false) { 477// for (String strata : strataInfoHashMap.keySet()) { 478// StrataInfo strataInfo = strataInfoHashMap.get(strata); 479// System.out.println(strataInfo.toString()); 480// System.out.println(); 481// } 482// System.out.println(); 483// } 484 485 return strataInfoHashMap; 486 } 487 488 /** 489 * 490 * @param variable 491 * @param dataT 492 * @param method 493 * @param error 494 * @param seFit 495 * @param confInt 496 * @param confType 497 * @param confLower 498 * @param startTime 499 * @param newTime 500 * @param useWeighted 501 * @return 502 * @throws Exception 503 */ 504 public SurvFitInfo process(String variable, ArrayList<SurvivalInfo> dataT, SurvFitKM.Method method, SurvFitKM.Error error, boolean seFit, double confInt, ConfType confType, ConfLower confLower, Double startTime, Double newTime, boolean useWeighted) throws Exception { 505 SurvFitInfo si = new SurvFitInfo(); 506 507 LinkedHashMap<String, StrataInfo> strataInfoHashMap = this.processStrataInfo(variable, dataT, method, error, seFit, confInt, confType, confLower, startTime, newTime, useWeighted); 508 si.setStrataInfoHashMap(strataInfoHashMap); 509 LinkedHashMap<String, StrataInfo> unweightedStrataInfoHashMap = this.processStrataInfo(variable, dataT, method, error, seFit, confInt, confType, confLower, startTime, newTime, false); 510 si.setUnweightedStrataInfoHashMap(unweightedStrataInfoHashMap); 511 si.setWeighted(useWeighted); 512 return si; 513 } 514 515 /** 516 * @param args the command line arguments 517 */ 518 public static void main(String[] args) { 519 // TODO code application logic here 520 try { 521 String datafile = "/Users/Scooter/scripps/ngs/BLJ/E2197/Predictive Signatures/V$HSF_Q6-E2197 TTR.txt"; 522 523 SurvFitKM survFitKM = new SurvFitKM(); 524 525 SurvFitInfo si = survFitKM.process(datafile, "TIME", "STATUS", "WEIGHT", "MEAN", true); 526 527 528 529 if (true) { 530 531 KaplanMeierFigure kaplanMeierFigure = new KaplanMeierFigure(); 532 533 534 535 536 537 JFrame application = new JFrame(); 538 application.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 539 application.add(kaplanMeierFigure); 540 kaplanMeierFigure.setSize(500, 400); 541 542 application.setSize(500, 400); // window is 500 pixels wide, 400 high 543 application.setVisible(true); 544 545 ArrayList<String> titles = new ArrayList<String>(); 546 titles.add("Line 1"); 547 titles.add("line 2"); 548 kaplanMeierFigure.setSurvivalData(titles, si, null); 549 550 ArrayList<String> figureInfo = new ArrayList<String>(); 551 // figureInfo.add("HR=2.1 95% CI(1.8-2.5)"); 552 // figureInfo.add("p-value=.001"); 553 kaplanMeierFigure.setFigureLineInfo(figureInfo); 554 555 kaplanMeierFigure.savePNG("/Users/Scooter/Downloads/test.png"); 556 557 558 559 } 560 561 562 563 } catch (Exception e) { 564 e.printStackTrace(); 565 } 566 } 567}