001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phosphosite;
022
023import org.slf4j.Logger;
024import org.slf4j.LoggerFactory;
025
026import java.io.*;
027import java.util.ArrayList;
028import java.util.List;
029import java.util.zip.GZIPInputStream;
030
031/**
032 * Created by ap3 on 31/10/2014.
033 */
034public class Site {
035
036        private final static Logger logger = LoggerFactory.getLogger(Site.class);
037
038        public Site(){
039
040
041        }
042
043        public static List<Site> parseSites(File f) throws IOException {
044
045                InputStream gzipStream;
046                try (InputStream inStream = new FileInputStream(f)) {
047                        gzipStream = new GZIPInputStream(inStream);
048                }
049
050                Reader decoder = new InputStreamReader(gzipStream);
051                BufferedReader buf = new BufferedReader(decoder);
052
053                String line = null;
054
055                List<Site > data = new ArrayList<>();
056
057                List<String> headerFields = null;
058
059                int proteinIndex = -1;
060                int uniprotIndex = -1;
061                int residueIndex = -1;
062                int orgIndex     = -1;
063                int groupIndex   = -1;
064                int geneIndex    = -1;
065
066                boolean inHeader = true;
067
068
069                while ((line = buf.readLine()) != null){
070                        if ( line.startsWith("GENE") ||
071                                        line.startsWith("PROTEIN")) {
072
073                                headerFields = parseHeaderFields(line);
074
075                                proteinIndex = headerFields.indexOf("PROTEIN");
076                                uniprotIndex = headerFields.indexOf("ACC_ID");
077                                residueIndex = headerFields.indexOf("MOD_RSD");
078                                orgIndex     = headerFields.indexOf("ORGANISM");
079                                groupIndex   = headerFields.indexOf("SITE_GRP_ID");
080                                geneIndex        = headerFields.indexOf("GENE");
081
082                                inHeader = false;
083                                continue;
084                        }
085                        if ( inHeader)
086                                continue;
087
088                        if ( line.trim().length() == 0)
089                                continue;
090
091                        // fields are:
092                        String[] spl = line.split("\t");
093                        if ( spl.length  < 5){
094                                logger.info("Found wrong line length: " + line);
095                                continue;
096
097                        }
098
099                        String protein = spl[proteinIndex];
100                        String uniprot = spl[uniprotIndex];
101
102                        String residue = spl[residueIndex];
103
104                        String[] resSpl = residue.split("-");
105                        String modType = null;
106                        if ( resSpl.length == 2) {
107
108                                 modType = resSpl[1];
109                        }
110                        String group    = spl[groupIndex];
111
112                        String organism = spl[orgIndex];
113
114                        String geneSymb = spl[geneIndex];
115
116                        Site s = new Site();
117                        s.setProtein(protein);
118                        s.setUniprot(uniprot);
119                        s.setGeneSymb(geneSymb);
120                        s.setModType(modType);
121                        s.setResidue(residue);
122                        s.setGroup(group);
123                        s.setOrganism(organism);
124                        data.add(s);
125
126                }
127                buf.close();
128
129                return data;
130
131        }
132
133        private static List<String> parseHeaderFields(String line) {
134                String[] spl = line.split("\t");
135
136                List<String> h = new ArrayList<>();
137                for (String s: spl){
138                        h.add(s);
139
140                }
141
142                return h;
143        }
144
145        String protein;
146        String uniprot;
147        String geneSymb;
148        String chrLoc;
149        String modType;
150        String residue ;
151        String group;
152        String organism;
153
154        public String getProtein() {
155                return protein;
156        }
157
158        public void setProtein(String protein) {
159                this.protein = protein;
160        }
161
162        public String getUniprot() {
163                return uniprot;
164        }
165
166        public void setUniprot(String uniprot) {
167                this.uniprot = uniprot;
168        }
169
170        public String getGeneSymb() {
171                return geneSymb;
172        }
173
174        public void setGeneSymb(String geneSymb) {
175                this.geneSymb = geneSymb;
176        }
177
178        public String getChrLoc() {
179                return chrLoc;
180        }
181
182        public void setChrLoc(String chrLoc) {
183                this.chrLoc = chrLoc;
184        }
185
186        public String getModType() {
187                return modType;
188        }
189
190        public void setModType(String modType) {
191                this.modType = modType;
192        }
193
194        public String getResidue() {
195                return residue;
196        }
197
198        public void setResidue(String residue) {
199                this.residue = residue;
200        }
201
202        public String getGroup() {
203                return group;
204        }
205
206        public void setGroup(String group) {
207                this.group = group;
208        }
209
210        public String getOrganism() {
211                return organism;
212        }
213
214        public void setOrganism(String organism) {
215                this.organism = organism;
216        }
217
218        @Override
219        public String toString() {
220                StringBuffer s = new StringBuffer();
221
222                s.append("Site{" +
223                                "protein='" + protein + '\'');
224                if ( uniprot != null)
225                                s.append(", uniprot='" + uniprot + '\'' );
226                if ( geneSymb != null)
227                        s.append(
228                                ", geneSymb='" + geneSymb + '\'' );
229                if (chrLoc != null)
230                                s.append(", chrLoc='" + chrLoc + '\'' );
231                if (modType != null)
232                        s.append(", modType='" + modType + '\'' );
233
234                if (residue != null)
235                        s.append(        ", residue='" + residue + '\'' );
236                if ( group != null)
237                                s.append(", group='" + group + '\'' );
238                if (organism != null)
239                        s.append(", organism='" + organism + '\'' );
240
241                  s.append(      '}');
242
243                return s.toString();
244        }
245}
246
247