001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.phosphosite; 022 023import org.slf4j.Logger; 024import org.slf4j.LoggerFactory; 025 026import java.io.*; 027import java.util.ArrayList; 028import java.util.List; 029import java.util.zip.GZIPInputStream; 030 031/** 032 * Created by ap3 on 31/10/2014. 033 */ 034public class Site { 035 036 private final static Logger logger = LoggerFactory.getLogger(Site.class); 037 038 public Site(){ 039 040 041 } 042 043 public static List<Site> parseSites(File f) throws IOException { 044 045 InputStream inStream = new FileInputStream(f); 046 InputStream gzipStream = new GZIPInputStream(inStream); 047 048 Reader decoder = new InputStreamReader(gzipStream); 049 BufferedReader buf = new BufferedReader(decoder); 050 051 String line = null; 052 053 List<Site > data = new ArrayList<Site>(); 054 055 List<String> headerFields = null; 056 057 int proteinIndex = -1; 058 int uniprotIndex = -1; 059 int residueIndex = -1; 060 int orgIndex = -1; 061 int groupIndex = -1; 062 int geneIndex = -1; 063 064 boolean inHeader = true; 065 066 067 while ((line = buf.readLine()) != null){ 068 if ( line.startsWith("GENE") || 069 line.startsWith("PROTEIN")) { 070 071 headerFields = parseHeaderFields(line); 072 073 proteinIndex = headerFields.indexOf("PROTEIN"); 074 uniprotIndex = headerFields.indexOf("ACC_ID"); 075 residueIndex = headerFields.indexOf("MOD_RSD"); 076 orgIndex = headerFields.indexOf("ORGANISM"); 077 groupIndex = headerFields.indexOf("SITE_GRP_ID"); 078 geneIndex = headerFields.indexOf("GENE"); 079 080 inHeader = false; 081 continue; 082 } 083 if ( inHeader) 084 continue; 085 086 if ( line.trim().length() == 0) 087 continue; 088 089 // fields are: 090 String[] spl = line.split("\t"); 091 if ( spl.length < 5){ 092 logger.info("Found wrong line length: " + line); 093 continue; 094 095 } 096 097 String protein = spl[proteinIndex]; 098 String uniprot = spl[uniprotIndex]; 099 100 String residue = spl[residueIndex]; 101 102 String[] resSpl = residue.split("-"); 103 String modType = null; 104 if ( resSpl.length == 2) { 105 106 modType = resSpl[1]; 107 } 108 String group = spl[groupIndex]; 109 110 String organism = spl[orgIndex]; 111 112 String geneSymb = spl[geneIndex]; 113 114 Site s = new Site(); 115 s.setProtein(protein); 116 s.setUniprot(uniprot); 117 s.setGeneSymb(geneSymb); 118 s.setModType(modType); 119 s.setResidue(residue); 120 s.setGroup(group); 121 s.setOrganism(organism); 122 data.add(s); 123 124 } 125 buf.close(); 126 127 return data; 128 129 } 130 131 private static List<String> parseHeaderFields(String line) { 132 String[] spl = line.split("\t"); 133 134 List<String> h = new ArrayList<String>(); 135 for (String s: spl){ 136 h.add(s); 137 138 } 139 140 return h; 141 } 142 143 String protein; 144 String uniprot; 145 String geneSymb; 146 String chrLoc; 147 String modType; 148 String residue ; 149 String group; 150 String organism; 151 152 public String getProtein() { 153 return protein; 154 } 155 156 public void setProtein(String protein) { 157 this.protein = protein; 158 } 159 160 public String getUniprot() { 161 return uniprot; 162 } 163 164 public void setUniprot(String uniprot) { 165 this.uniprot = uniprot; 166 } 167 168 public String getGeneSymb() { 169 return geneSymb; 170 } 171 172 public void setGeneSymb(String geneSymb) { 173 this.geneSymb = geneSymb; 174 } 175 176 public String getChrLoc() { 177 return chrLoc; 178 } 179 180 public void setChrLoc(String chrLoc) { 181 this.chrLoc = chrLoc; 182 } 183 184 public String getModType() { 185 return modType; 186 } 187 188 public void setModType(String modType) { 189 this.modType = modType; 190 } 191 192 public String getResidue() { 193 return residue; 194 } 195 196 public void setResidue(String residue) { 197 this.residue = residue; 198 } 199 200 public String getGroup() { 201 return group; 202 } 203 204 public void setGroup(String group) { 205 this.group = group; 206 } 207 208 public String getOrganism() { 209 return organism; 210 } 211 212 public void setOrganism(String organism) { 213 this.organism = organism; 214 } 215 216 @Override 217 public String toString() { 218 StringBuffer s = new StringBuffer(); 219 220 s.append("Site{" + 221 "protein='" + protein + '\''); 222 if ( uniprot != null) 223 s.append(", uniprot='" + uniprot + '\'' ); 224 if ( geneSymb != null) 225 s.append( 226 ", geneSymb='" + geneSymb + '\'' ); 227 if (chrLoc != null) 228 s.append(", chrLoc='" + chrLoc + '\'' ); 229 if (modType != null) 230 s.append(", modType='" + modType + '\'' ); 231 232 if (residue != null) 233 s.append( ", residue='" + residue + '\'' ); 234 if ( group != null) 235 s.append(", group='" + group + '\'' ); 236 if (organism != null) 237 s.append(", organism='" + organism + '\'' ); 238 239 s.append( '}'); 240 241 return s.toString(); 242 } 243} 244 245