001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phosphosite;
022
023import org.biojava.nbio.structure.align.util.AtomCache;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import java.io.*;
028import java.net.URL;
029import java.nio.file.Files;
030import java.nio.file.StandardCopyOption;
031import java.util.ArrayList;
032import java.util.List;
033
034/**
035 * Phosphosite is available under the PhosphoSitePlus® is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License and is freely available for non-commercial purposes from
036 *
037 * http://www.phosphosite.org/staticDownloads.do
038 *
039 * Please acknowledge PhosphoSitePlus®, www.phosphosite.org" at appropriate locations.
040 *
041 * Please cite : “Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.”.
042 *
043 *
044 *
045 * Created by ap3 on 31/10/2014.
046 */
047public class Dataset {
048
049        private static final Logger logger = LoggerFactory.getLogger(Dataset.class);
050
051        public static final String ACETYLATION = "https://www.phosphosite.org/downloads/Acetylation_site_dataset.gz";
052
053        public static final String DISEASE_ASSOC = "https://www.phosphosite.org/downloads/Disease-associated_sites.gz";
054
055        public static final String METHYLATION = "https://www.phosphosite.org/downloads/Methylation_site_dataset.gz";
056
057        public static final String PHOSPHORYLATION = "https://www.phosphosite.org/downloads/Phosphorylation_site_dataset.gz";
058
059        public static final String REGULATORY = "https://www.phosphosite.org/downloads/Regulatory_sites.gz";
060
061        public static final String SUMOYLATION = "https://www.phosphosite.org/downloads/Sumoylation_site_dataset.gz";
062
063        public static final String UBIQUITINATION = "https://www.phosphosite.org/downloads/Ubiquitination_site_dataset.gz";
064
065
066        public Dataset(){
067
068
069        }
070
071        private String[] getRemoteFiles(){
072                String[] files = new String[]{ACETYLATION,DISEASE_ASSOC,METHYLATION,PHOSPHORYLATION,REGULATORY,SUMOYLATION,UBIQUITINATION};
073
074
075                return files;
076        }
077
078        public File[] getLocalFiles(){
079
080                String[] rfiles = getRemoteFiles();
081
082
083                File dir = getLocalDir();
084
085                List<File> files = new ArrayList<File>();
086                for ( String f : rfiles) {
087
088
089                        int slashIndex = f.lastIndexOf("/");
090
091                        String fileName = f.substring(slashIndex);
092
093                        File localFile = new File(dir+"/" + fileName);
094
095                        if (  localFile.exists()){
096                                files.add(localFile);
097                        }
098
099                }
100
101                return files.toArray(new File[files.size()]);
102        }
103
104
105        public File getLocalDir(){
106                AtomCache cache = new AtomCache();
107
108                String path = cache.getCachePath();
109
110                File dir = new File(path+"/phosphosite");
111
112                return dir;
113        }
114
115        public void download(){
116
117                logger.warn("Downloading data from www.phosposite.org. Data is under CC-BY-NC-SA license. Please link to site and cite: ");
118                logger.warn("Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.");
119
120                File dir = getLocalDir();
121
122                if ( ! dir.exists()) {
123
124                        // need to download all...
125
126                        dir.mkdir();
127
128
129                }
130
131                String[] files = getRemoteFiles();
132
133                for ( String f : files){
134
135                        try {
136
137
138                                int slashIndex = f.lastIndexOf("/");
139
140                                String fileName = f.substring(slashIndex);
141
142                                File localFile = new File(dir+"/" + fileName);
143
144                                if ( ! localFile.exists()){
145
146                                        URL u = new URL(f);
147                                        downloadFile(u, localFile);
148                                }
149
150
151                        } catch (Exception e){
152
153                                e.printStackTrace();
154                        }
155
156
157                }
158
159        }
160
161        public void downloadFile(URL u, File localFile) throws IOException {
162
163                logger.info("Downloading " + u);
164
165                File tmp = File.createTempFile("tmp","phosphosite");
166
167                InputStream is = u.openStream();
168
169                BufferedInputStream in = new BufferedInputStream(is);
170
171                FileOutputStream w = new FileOutputStream(tmp);
172
173                int i= 0;
174                byte[] bytesIn = new byte[300000];
175                while ((i = in.read(bytesIn)) >= 0) {
176                        w.write(bytesIn,0,i);
177                }
178                in.close();
179                w.close();
180
181
182                // now copy  tmp file to localFile
183                copyFile(tmp, localFile);
184
185        }
186
187
188
189        public static void copyFile(File src, File dst) throws IOException
190        {
191
192                Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING);
193
194        }
195
196
197        public static void main(String[] args) {
198
199                Dataset ds = new Dataset();
200
201                ds.download();
202
203                try {
204
205                        for (File f : ds.getLocalFiles()) {
206
207                                logger.info(f.getAbsolutePath());
208
209                                List<Site> sites = Site.parseSites(f);
210
211                                logger.info("Got " + sites.size() + " sites");
212                                for (Site s : sites) {
213                                        if (s.getUniprot().equals("P50225") || s.getUniprot().equals("P48025")) {
214                                                logger.info(s.toString());
215                                        }
216                                }
217
218                        }
219
220
221                } catch (Exception e) {
222                        e.printStackTrace();
223                }
224        }
225
226}