001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.phosphosite; 022 023import org.biojava.nbio.structure.align.util.AtomCache; 024import org.slf4j.Logger; 025import org.slf4j.LoggerFactory; 026 027import java.io.*; 028import java.net.URL; 029import java.nio.file.Files; 030import java.nio.file.StandardCopyOption; 031import java.util.ArrayList; 032import java.util.List; 033 034/** 035 * Phosphosite is available under the PhosphoSitePlus® is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License and is freely available for non-commercial purposes from 036 * 037 * http://www.phosphosite.org/staticDownloads.do 038 * 039 * Please acknowledge PhosphoSitePlus®, www.phosphosite.org" at appropriate locations. 040 * 041 * Please cite : “Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.”. 042 * 043 * 044 * 045 * Created by ap3 on 31/10/2014. 046 */ 047public class Dataset { 048 049 private static final Logger logger = LoggerFactory.getLogger(Dataset.class); 050 051 public static final String ACETYLATION = "https://www.phosphosite.org/downloads/Acetylation_site_dataset.gz"; 052 053 public static final String DISEASE_ASSOC = "https://www.phosphosite.org/downloads/Disease-associated_sites.gz"; 054 055 public static final String METHYLATION = "https://www.phosphosite.org/downloads/Methylation_site_dataset.gz"; 056 057 public static final String PHOSPHORYLATION = "https://www.phosphosite.org/downloads/Phosphorylation_site_dataset.gz"; 058 059 public static final String REGULATORY = "https://www.phosphosite.org/downloads/Regulatory_sites.gz"; 060 061 public static final String SUMOYLATION = "https://www.phosphosite.org/downloads/Sumoylation_site_dataset.gz"; 062 063 public static final String UBIQUITINATION = "https://www.phosphosite.org/downloads/Ubiquitination_site_dataset.gz"; 064 065 066 public Dataset(){ 067 068 069 } 070 071 private String[] getRemoteFiles(){ 072 String[] files = new String[]{ACETYLATION,DISEASE_ASSOC,METHYLATION,PHOSPHORYLATION,REGULATORY,SUMOYLATION,UBIQUITINATION}; 073 074 075 return files; 076 } 077 078 public File[] getLocalFiles(){ 079 080 String[] rfiles = getRemoteFiles(); 081 082 083 File dir = getLocalDir(); 084 085 List<File> files = new ArrayList<File>(); 086 for ( String f : rfiles) { 087 088 089 int slashIndex = f.lastIndexOf("/"); 090 091 String fileName = f.substring(slashIndex); 092 093 File localFile = new File(dir+"/" + fileName); 094 095 if ( localFile.exists()){ 096 files.add(localFile); 097 } 098 099 } 100 101 return files.toArray(new File[files.size()]); 102 } 103 104 105 public File getLocalDir(){ 106 AtomCache cache = new AtomCache(); 107 108 String path = cache.getCachePath(); 109 110 File dir = new File(path+"/phosphosite"); 111 112 return dir; 113 } 114 115 public void download(){ 116 117 logger.warn("Downloading data from www.phosposite.org. Data is under CC-BY-NC-SA license. Please link to site and cite: "); 118 logger.warn("Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70."); 119 120 File dir = getLocalDir(); 121 122 if ( ! dir.exists()) { 123 124 // need to download all... 125 126 dir.mkdir(); 127 128 129 } 130 131 String[] files = getRemoteFiles(); 132 133 for ( String f : files){ 134 135 try { 136 137 138 int slashIndex = f.lastIndexOf("/"); 139 140 String fileName = f.substring(slashIndex); 141 142 File localFile = new File(dir+"/" + fileName); 143 144 if ( ! localFile.exists()){ 145 146 URL u = new URL(f); 147 downloadFile(u, localFile); 148 } 149 150 151 } catch (Exception e){ 152 153 e.printStackTrace(); 154 } 155 156 157 } 158 159 } 160 161 public void downloadFile(URL u, File localFile) throws IOException { 162 163 logger.info("Downloading " + u); 164 165 File tmp = File.createTempFile("tmp","phosphosite"); 166 167 InputStream is = u.openStream(); 168 169 BufferedInputStream in = new BufferedInputStream(is); 170 171 FileOutputStream w = new FileOutputStream(tmp); 172 173 int i= 0; 174 byte[] bytesIn = new byte[300000]; 175 while ((i = in.read(bytesIn)) >= 0) { 176 w.write(bytesIn,0,i); 177 } 178 in.close(); 179 w.close(); 180 181 182 // now copy tmp file to localFile 183 copyFile(tmp, localFile); 184 185 } 186 187 188 189 public static void copyFile(File src, File dst) throws IOException 190 { 191 192 Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING); 193 194 } 195 196 197 public static void main(String[] args) { 198 199 Dataset ds = new Dataset(); 200 201 ds.download(); 202 203 try { 204 205 for (File f : ds.getLocalFiles()) { 206 207 logger.info(f.getAbsolutePath()); 208 209 List<Site> sites = Site.parseSites(f); 210 211 logger.info("Got " + sites.size() + " sites"); 212 for (Site s : sites) { 213 if (s.getUniprot().equals("P50225") || s.getUniprot().equals("P48025")) { 214 logger.info(s.toString()); 215 } 216 } 217 218 } 219 220 221 } catch (Exception e) { 222 e.printStackTrace(); 223 } 224 } 225 226}