001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.phosphosite;
022
023import org.slf4j.Logger;
024import org.slf4j.LoggerFactory;
025
026import java.io.*;
027import java.util.ArrayList;
028import java.util.List;
029import java.util.zip.GZIPInputStream;
030
031/**
032 * Created by ap3 on 31/10/2014.
033 */
034public class Site {
035
036        private final static Logger logger = LoggerFactory.getLogger(Site.class);
037
038        public Site(){
039
040
041        }
042
043        public static List<Site> parseSites(File f) throws IOException {
044
045                InputStream inStream = new FileInputStream(f);
046                InputStream gzipStream = new GZIPInputStream(inStream);
047
048                Reader decoder = new InputStreamReader(gzipStream);
049                BufferedReader buf = new BufferedReader(decoder);
050
051                String line = null;
052
053                List<Site > data = new ArrayList<Site>();
054
055                List<String> headerFields = null;
056
057                int proteinIndex = -1;
058                int uniprotIndex = -1;
059                int residueIndex = -1;
060                int orgIndex     = -1;
061                int groupIndex   = -1;
062                int geneIndex    = -1;
063
064                boolean inHeader = true;
065
066
067                while ((line = buf.readLine()) != null){
068                        if ( line.startsWith("GENE") ||
069                                        line.startsWith("PROTEIN")) {
070
071                                headerFields = parseHeaderFields(line);
072
073                                proteinIndex = headerFields.indexOf("PROTEIN");
074                                uniprotIndex = headerFields.indexOf("ACC_ID");
075                                residueIndex = headerFields.indexOf("MOD_RSD");
076                                orgIndex     = headerFields.indexOf("ORGANISM");
077                                groupIndex   = headerFields.indexOf("SITE_GRP_ID");
078                                geneIndex        = headerFields.indexOf("GENE");
079
080                                inHeader = false;
081                                continue;
082                        }
083                        if ( inHeader)
084                                continue;
085
086                        if ( line.trim().length() == 0)
087                                continue;
088
089                        // fields are:
090                        String[] spl = line.split("\t");
091                        if ( spl.length  < 5){
092                                logger.info("Found wrong line length: " + line);
093                                continue;
094
095                        }
096
097                        String protein = spl[proteinIndex];
098                        String uniprot = spl[uniprotIndex];
099
100                        String residue = spl[residueIndex];
101
102                        String[] resSpl = residue.split("-");
103                        String modType = null;
104                        if ( resSpl.length == 2) {
105
106                                 modType = resSpl[1];
107                        }
108                        String group    = spl[groupIndex];
109
110                        String organism = spl[orgIndex];
111
112                        String geneSymb = spl[geneIndex];
113
114                        Site s = new Site();
115                        s.setProtein(protein);
116                        s.setUniprot(uniprot);
117                        s.setGeneSymb(geneSymb);
118                        s.setModType(modType);
119                        s.setResidue(residue);
120                        s.setGroup(group);
121                        s.setOrganism(organism);
122                        data.add(s);
123
124                }
125                buf.close();
126
127                return data;
128
129        }
130
131        private static List<String> parseHeaderFields(String line) {
132                String[] spl = line.split("\t");
133
134                List<String> h = new ArrayList<String>();
135                for (String s: spl){
136                        h.add(s);
137
138                }
139
140                return h;
141        }
142
143        String protein;
144        String uniprot;
145        String geneSymb;
146        String chrLoc;
147        String modType;
148        String residue ;
149        String group;
150        String organism;
151
152        public String getProtein() {
153                return protein;
154        }
155
156        public void setProtein(String protein) {
157                this.protein = protein;
158        }
159
160        public String getUniprot() {
161                return uniprot;
162        }
163
164        public void setUniprot(String uniprot) {
165                this.uniprot = uniprot;
166        }
167
168        public String getGeneSymb() {
169                return geneSymb;
170        }
171
172        public void setGeneSymb(String geneSymb) {
173                this.geneSymb = geneSymb;
174        }
175
176        public String getChrLoc() {
177                return chrLoc;
178        }
179
180        public void setChrLoc(String chrLoc) {
181                this.chrLoc = chrLoc;
182        }
183
184        public String getModType() {
185                return modType;
186        }
187
188        public void setModType(String modType) {
189                this.modType = modType;
190        }
191
192        public String getResidue() {
193                return residue;
194        }
195
196        public void setResidue(String residue) {
197                this.residue = residue;
198        }
199
200        public String getGroup() {
201                return group;
202        }
203
204        public void setGroup(String group) {
205                this.group = group;
206        }
207
208        public String getOrganism() {
209                return organism;
210        }
211
212        public void setOrganism(String organism) {
213                this.organism = organism;
214        }
215
216        @Override
217        public String toString() {
218                StringBuffer s = new StringBuffer();
219
220                s.append("Site{" +
221                                "protein='" + protein + '\'');
222                if ( uniprot != null)
223                                s.append(", uniprot='" + uniprot + '\'' );
224                if ( geneSymb != null)
225                        s.append(
226                                ", geneSymb='" + geneSymb + '\'' );
227                if (chrLoc != null)
228                                s.append(", chrLoc='" + chrLoc + '\'' );
229                if (modType != null)
230                        s.append(", modType='" + modType + '\'' );
231
232                if (residue != null)
233                        s.append(        ", residue='" + residue + '\'' );
234                if ( group != null)
235                                s.append(", group='" + group + '\'' );
236                if (organism != null)
237                        s.append(", organism='" + organism + '\'' );
238
239                  s.append(      '}');
240
241                return s.toString();
242        }
243}
244
245