001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phosphosite;
022
023import org.biojava.nbio.structure.align.util.AtomCache;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import java.io.*;
028import java.net.URL;
029import java.nio.channels.FileChannel;
030import java.nio.file.Files;
031import java.nio.file.StandardCopyOption;
032import java.util.ArrayList;
033import java.util.List;
034
035/**
036 * Phosphosite is available under the PhosphoSitePlus® is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License and is freely available for non-commercial purposes from
037 *
038 * http://www.phosphosite.org/staticDownloads.do
039 *
040 * Please acknowledge PhosphoSitePlus®, www.phosphosite.org" at appropriate locations.
041 *
042 * Please cite : “Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.”.
043 *
044 *
045 *
046 * Created by ap3 on 31/10/2014.
047 */
048public class Dataset {
049
050        private static final Logger logger = LoggerFactory.getLogger(Dataset.class);
051
052        public static final String ACETYLATION = "https://www.phosphosite.org/downloads/Acetylation_site_dataset.gz";
053
054        public static final String DISEASE_ASSOC = "https://www.phosphosite.org/downloads/Disease-associated_sites.gz";
055
056        public static final String METHYLATION = "https://www.phosphosite.org/downloads/Methylation_site_dataset.gz";
057
058        public static final String PHOSPHORYLATION = "https://www.phosphosite.org/downloads/Phosphorylation_site_dataset.gz";
059
060        public static final String REGULATORY = "https://www.phosphosite.org/downloads/Regulatory_sites.gz";
061
062        public static final String SUMOYLATION = "https://www.phosphosite.org/downloads/Sumoylation_site_dataset.gz";
063
064        public static final String UBIQUITINATION = "https://www.phosphosite.org/downloads/Ubiquitination_site_dataset.gz";
065
066
067        public Dataset(){
068
069
070        }
071
072        private String[] getRemoteFiles(){
073                String[] files = new String[]{ACETYLATION,DISEASE_ASSOC,METHYLATION,PHOSPHORYLATION,REGULATORY,SUMOYLATION,UBIQUITINATION};
074
075
076                return files;
077        }
078
079        public File[] getLocalFiles(){
080
081                String[] rfiles = getRemoteFiles();
082
083
084                File dir = getLocalDir();
085
086                List<File> files = new ArrayList<File>();
087                for ( String f : rfiles) {
088
089
090                        int slashIndex = f.lastIndexOf("/");
091
092                        String fileName = f.substring(slashIndex);
093
094                        File localFile = new File(dir+"/" + fileName);
095
096                        if (  localFile.exists()){
097                                files.add(localFile);
098                        }
099
100                }
101
102                return files.toArray(new File[files.size()]);
103        }
104
105
106        public File getLocalDir(){
107                AtomCache cache = new AtomCache();
108
109                String path = cache.getCachePath();
110
111                File dir = new File(path+"/phosphosite");
112
113                return dir;
114        }
115
116        public void download(){
117
118                logger.warn("Downloading data from www.phosposite.org. Data is under CC-BY-NC-SA license. Please link to site and cite: ");
119                logger.warn("Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.");
120
121                File dir = getLocalDir();
122
123                if ( ! dir.exists()) {
124
125                        // need to download all...
126
127                        dir.mkdir();
128
129
130                }
131
132                String[] files = getRemoteFiles();
133
134                for ( String f : files){
135
136                        try {
137
138
139                                int slashIndex = f.lastIndexOf("/");
140
141                                String fileName = f.substring(slashIndex);
142
143                                File localFile = new File(dir+"/" + fileName);
144
145                                if ( ! localFile.exists()){
146
147                                        URL u = new URL(f);
148                                        downloadFile(u, localFile);
149                                }
150
151
152                        } catch (Exception e){
153
154                                e.printStackTrace();
155                        }
156
157
158                }
159
160        }
161
162        public void downloadFile(URL u, File localFile) throws IOException {
163
164                logger.info("Downloading " + u);
165
166                File tmp = File.createTempFile("tmp","phosphosite");
167
168                InputStream is = u.openStream();
169
170                BufferedInputStream in = new BufferedInputStream(is);
171
172                FileOutputStream w = new FileOutputStream(tmp);
173
174                int i= 0;
175                byte[] bytesIn = new byte[300000];
176                while ((i = in.read(bytesIn)) >= 0) {
177                        w.write(bytesIn,0,i);
178                }
179                in.close();
180                w.close();
181
182
183                // now copy  tmp file to localFile
184                copyFile(tmp, localFile);
185
186        }
187
188
189
190        public static void copyFile(File src, File dst) throws IOException
191        {
192
193                Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING);
194
195        }
196
197
198        public static void main(String[] args) {
199
200                Dataset ds = new Dataset();
201
202                ds.download();
203
204                try {
205
206                        for (File f : ds.getLocalFiles()) {
207
208                                logger.info(f.getAbsolutePath());
209
210                                List<Site> sites = Site.parseSites(f);
211
212                                logger.info("Got " + sites.size() + " sites");
213                                for (Site s : sites) {
214                                        if (s.getUniprot().equals("P50225") || s.getUniprot().equals("P48025")) {
215                                                logger.info(s.toString());
216                                        }
217                                }
218
219                        }
220
221
222                } catch (Exception e) {
223                        e.printStackTrace();
224                }
225        }
226
227}