001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phosphosite;
022
023import org.biojava.nbio.structure.align.util.AtomCache;
024import org.slf4j.Logger;
025import org.slf4j.LoggerFactory;
026
027import java.io.*;
028import java.net.URL;
029import java.nio.file.Files;
030import java.nio.file.StandardCopyOption;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.List;
034import java.util.stream.Collectors;
035import java.util.stream.Stream;
036
037/**
038 * Phosphosite is available under the PhosphoSitePlus® is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License and is freely available for non-commercial purposes from
039 *
040 * http://www.phosphosite.org/staticDownloads.do
041 *
042 * Please acknowledge PhosphoSitePlus®, www.phosphosite.org" at appropriate locations.
043 *
044 * Please cite : “Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.”.
045 *
046 *
047 *
048 * Created by ap3 on 31/10/2014.
049 */
050public class Dataset {
051
052        private static final Logger logger = LoggerFactory.getLogger(Dataset.class);
053
054        public static final String ACETYLATION = "https://www.phosphosite.org/downloads/Acetylation_site_dataset.gz";
055
056        public static final String DISEASE_ASSOC = "https://www.phosphosite.org/downloads/Disease-associated_sites.gz";
057
058        public static final String METHYLATION = "https://www.phosphosite.org/downloads/Methylation_site_dataset.gz";
059
060        public static final String PHOSPHORYLATION = "https://www.phosphosite.org/downloads/Phosphorylation_site_dataset.gz";
061
062        public static final String REGULATORY = "https://www.phosphosite.org/downloads/Regulatory_sites.gz";
063
064        public static final String SUMOYLATION = "https://www.phosphosite.org/downloads/Sumoylation_site_dataset.gz";
065
066        public static final String UBIQUITINATION = "https://www.phosphosite.org/downloads/Ubiquitination_site_dataset.gz";
067
068
069        public Dataset(){
070
071
072        }
073
074        private String[] getRemoteFiles(){
075                String[] files = new String[]{ACETYLATION,DISEASE_ASSOC,METHYLATION,PHOSPHORYLATION,REGULATORY,SUMOYLATION,UBIQUITINATION};
076
077
078                return files;
079        }
080
081        public File[] getLocalFiles(){
082                String[] rfiles = getRemoteFiles();
083                File dir = getLocalDir();
084                List<File> files =  Arrays.stream(rfiles).map(remoteFileName -> remoteFileName.substring(remoteFileName.lastIndexOf("/")))
085                                                                                                                                                        .map(localFile -> new File(dir+"/"+localFile))
086                                                                                                                                                        .filter(file -> file.exists())
087                                                                                                                                                        .collect(Collectors.toList());
088
089                return files.toArray(new File[files.size()]);
090        }
091
092
093        public File getLocalDir(){
094                AtomCache cache = new AtomCache();
095
096                String path = cache.getCachePath();
097
098                File dir = new File(path+"/phosphosite");
099
100                return dir;
101        }
102
103        public void download(){
104
105                logger.warn("Downloading data from www.phosposite.org. Data is under CC-BY-NC-SA license. Please link to site and cite: ");
106                logger.warn("Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.");
107
108                File dir = getLocalDir();
109
110                if ( ! dir.exists()) {
111
112                        // need to download all...
113
114                        dir.mkdir();
115
116
117                }
118
119                String[] files = getRemoteFiles();
120
121                for ( String f : files){
122
123                        try {
124
125
126                                int slashIndex = f.lastIndexOf("/");
127
128                                String fileName = f.substring(slashIndex);
129
130                                File localFile = new File(dir+"/" + fileName);
131
132                                if ( ! localFile.exists()){
133
134                                        URL u = new URL(f);
135                                        downloadFile(u, localFile);
136                                }
137
138
139                        } catch (Exception e){
140
141                                e.printStackTrace();
142                        }
143
144
145                }
146
147        }
148
149        public void downloadFile(URL u, File localFile) throws IOException {
150
151                logger.info("Downloading " + u);
152
153                File tmp = File.createTempFile("tmp","phosphosite");
154
155                InputStream is = u.openStream();
156
157                BufferedInputStream in = new BufferedInputStream(is);
158
159                FileOutputStream w = new FileOutputStream(tmp);
160
161                int i= 0;
162                byte[] bytesIn = new byte[300000];
163                while ((i = in.read(bytesIn)) >= 0) {
164                        w.write(bytesIn,0,i);
165                }
166                in.close();
167                w.close();
168
169
170                // now copy  tmp file to localFile
171                copyFile(tmp, localFile);
172
173        }
174
175
176
177        public static void copyFile(File src, File dst) throws IOException
178        {
179
180                Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING);
181
182        }
183
184
185        public static void main(String[] args) {
186
187                Dataset ds = new Dataset();
188
189                ds.download();
190
191                try {
192
193                        for (File f : ds.getLocalFiles()) {
194
195                                logger.info(f.getAbsolutePath());
196
197                                List<Site> sites = Site.parseSites(f);
198
199                                logger.info("Got " + sites.size() + " sites");
200                                for (Site s : sites) {
201                                        if (s.getUniprot().equals("P50225") || s.getUniprot().equals("P48025")) {
202                                                logger.info(s.toString());
203                                        }
204                                }
205
206                        }
207
208
209                } catch (Exception e) {
210                        e.printStackTrace();
211                }
212        }
213
214}