001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phosphosite;
022
023import java.io.BufferedInputStream;
024import java.io.File;
025import java.io.FileOutputStream;
026import java.io.IOException;
027import java.io.InputStream;
028import java.net.URL;
029import java.nio.file.Files;
030import java.nio.file.StandardCopyOption;
031import java.util.Arrays;
032import java.util.List;
033import java.util.stream.Collectors;
034
035import org.biojava.nbio.structure.align.util.AtomCache;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * Phosphosite is available under the PhosphoSitePlus® is licensed under Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License and is freely available for non-commercial purposes from
041 *
042 * http://www.phosphosite.org/staticDownloads.do
043 *
044 * Please acknowledge PhosphoSitePlus®, www.phosphosite.org" at appropriate locations.
045 *
046 * Please cite : “Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.”.
047 *
048 *
049 *
050 * Created by ap3 on 31/10/2014.
051 */
052public class Dataset {
053
054        private static final Logger logger = LoggerFactory.getLogger(Dataset.class);
055
056        public static final String ACETYLATION = "https://www.phosphosite.org/downloads/Acetylation_site_dataset.gz";
057
058        public static final String DISEASE_ASSOC = "https://www.phosphosite.org/downloads/Disease-associated_sites.gz";
059
060        public static final String METHYLATION = "https://www.phosphosite.org/downloads/Methylation_site_dataset.gz";
061
062        public static final String PHOSPHORYLATION = "https://www.phosphosite.org/downloads/Phosphorylation_site_dataset.gz";
063
064        public static final String REGULATORY = "https://www.phosphosite.org/downloads/Regulatory_sites.gz";
065
066        public static final String SUMOYLATION = "https://www.phosphosite.org/downloads/Sumoylation_site_dataset.gz";
067
068        public static final String UBIQUITINATION = "https://www.phosphosite.org/downloads/Ubiquitination_site_dataset.gz";
069
070
071        public Dataset(){
072
073
074        }
075
076        private String[] getRemoteFiles(){
077                String[] files = new String[]{ACETYLATION,DISEASE_ASSOC,METHYLATION,PHOSPHORYLATION,REGULATORY,SUMOYLATION,UBIQUITINATION};
078
079
080                return files;
081        }
082
083        public File[] getLocalFiles(){
084                String[] rfiles = getRemoteFiles();
085                File dir = getLocalDir();
086                List<File> files =  Arrays.stream(rfiles).map(remoteFileName -> remoteFileName.substring(remoteFileName.lastIndexOf("/")))
087                                                                                                                                                        .map(localFile -> new File(dir+"/"+localFile))
088                                                                                                                                                        .filter(file -> file.exists())
089                                                                                                                                                        .collect(Collectors.toList());
090
091                return files.toArray(new File[files.size()]);
092        }
093
094
095        public File getLocalDir(){
096                AtomCache cache = new AtomCache();
097
098                String path = cache.getCachePath();
099
100                File dir = new File(path+"/phosphosite");
101
102                return dir;
103        }
104
105        public void download(){
106
107                logger.warn("Downloading data from www.phosposite.org. Data is under CC-BY-NC-SA license. Please link to site and cite: ");
108                logger.warn("Hornbeck PV, Kornhauser JM, Tkachev S, Zhang B, Skrzypek E, Murray B, Latham V, Sullivan M (2012) PhosphoSitePlus: a comprehensive resource for investigating the structure and function of experimentally determined post-translational modifications in man and mouse. Nucleic Acids Res. 40(Database issue), D261–70.");
109
110                File dir = getLocalDir();
111
112                if ( ! dir.exists()) {
113
114                        // need to download all...
115
116                        dir.mkdir();
117
118
119                }
120
121                String[] files = getRemoteFiles();
122
123                for ( String f : files){
124
125                        try {
126
127
128                                int slashIndex = f.lastIndexOf("/");
129
130                                String fileName = f.substring(slashIndex);
131
132                                File localFile = new File(dir+"/" + fileName);
133
134                                if ( ! localFile.exists()){
135
136                                        URL u = new URL(f);
137                                        downloadFile(u, localFile);
138                                }
139
140
141                        } catch (Exception e){
142
143                                e.printStackTrace();
144                        }
145
146
147                }
148
149        }
150
151        public void downloadFile(URL u, File localFile) throws IOException {
152
153                logger.info("Downloading " + u);
154
155                File tmp = Files.createTempFile("tmp","phosphosite").toFile();
156
157                InputStream is = u.openStream();
158
159                BufferedInputStream in = new BufferedInputStream(is);
160
161                FileOutputStream w = new FileOutputStream(tmp);
162
163                int i= 0;
164                byte[] bytesIn = new byte[300000];
165                while ((i = in.read(bytesIn)) >= 0) {
166                        w.write(bytesIn,0,i);
167                }
168                in.close();
169                w.close();
170
171
172                // now copy  tmp file to localFile
173                copyFile(tmp, localFile);
174
175        }
176
177
178
179        public static void copyFile(File src, File dst) throws IOException
180        {
181
182                Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING);
183
184        }
185
186
187        public static void main(String[] args) {
188
189                Dataset ds = new Dataset();
190
191                ds.download();
192
193                try {
194
195                        for (File f : ds.getLocalFiles()) {
196
197                                logger.info(f.getAbsolutePath());
198
199                                List<Site> sites = Site.parseSites(f);
200
201                                logger.info("Got " + sites.size() + " sites");
202                                for (Site s : sites) {
203                                        if ("P50225".equals(s.getUniprot()) || "P48025".equals(s.getUniprot())) {
204                                                logger.info(s.toString());
205                                        }
206                                }
207
208                        }
209
210
211                } catch (Exception e) {
212                        e.printStackTrace();
213                }
214        }
215
216}