001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.phosphosite; 022 023import org.slf4j.Logger; 024import org.slf4j.LoggerFactory; 025 026import java.io.*; 027import java.util.ArrayList; 028import java.util.List; 029import java.util.zip.GZIPInputStream; 030 031/** 032 * Created by ap3 on 31/10/2014. 033 */ 034public class Site { 035 036 private final static Logger logger = LoggerFactory.getLogger(Site.class); 037 038 public Site(){ 039 040 041 } 042 043 public static List<Site> parseSites(File f) throws IOException { 044 045 InputStream gzipStream; 046 try (InputStream inStream = new FileInputStream(f)) { 047 gzipStream = new GZIPInputStream(inStream); 048 } 049 050 Reader decoder = new InputStreamReader(gzipStream); 051 BufferedReader buf = new BufferedReader(decoder); 052 053 String line = null; 054 055 List<Site > data = new ArrayList<>(); 056 057 List<String> headerFields = null; 058 059 int proteinIndex = -1; 060 int uniprotIndex = -1; 061 int residueIndex = -1; 062 int orgIndex = -1; 063 int groupIndex = -1; 064 int geneIndex = -1; 065 066 boolean inHeader = true; 067 068 069 while ((line = buf.readLine()) != null){ 070 if ( line.startsWith("GENE") || 071 line.startsWith("PROTEIN")) { 072 073 headerFields = parseHeaderFields(line); 074 075 proteinIndex = headerFields.indexOf("PROTEIN"); 076 uniprotIndex = headerFields.indexOf("ACC_ID"); 077 residueIndex = headerFields.indexOf("MOD_RSD"); 078 orgIndex = headerFields.indexOf("ORGANISM"); 079 groupIndex = headerFields.indexOf("SITE_GRP_ID"); 080 geneIndex = headerFields.indexOf("GENE"); 081 082 inHeader = false; 083 continue; 084 } 085 if ( inHeader) 086 continue; 087 088 if ( line.trim().length() == 0) 089 continue; 090 091 // fields are: 092 String[] spl = line.split("\t"); 093 if ( spl.length < 5){ 094 logger.info("Found wrong line length: " + line); 095 continue; 096 097 } 098 099 String protein = spl[proteinIndex]; 100 String uniprot = spl[uniprotIndex]; 101 102 String residue = spl[residueIndex]; 103 104 String[] resSpl = residue.split("-"); 105 String modType = null; 106 if ( resSpl.length == 2) { 107 108 modType = resSpl[1]; 109 } 110 String group = spl[groupIndex]; 111 112 String organism = spl[orgIndex]; 113 114 String geneSymb = spl[geneIndex]; 115 116 Site s = new Site(); 117 s.setProtein(protein); 118 s.setUniprot(uniprot); 119 s.setGeneSymb(geneSymb); 120 s.setModType(modType); 121 s.setResidue(residue); 122 s.setGroup(group); 123 s.setOrganism(organism); 124 data.add(s); 125 126 } 127 buf.close(); 128 129 return data; 130 131 } 132 133 private static List<String> parseHeaderFields(String line) { 134 String[] spl = line.split("\t"); 135 136 List<String> h = new ArrayList<>(); 137 for (String s: spl){ 138 h.add(s); 139 140 } 141 142 return h; 143 } 144 145 String protein; 146 String uniprot; 147 String geneSymb; 148 String chrLoc; 149 String modType; 150 String residue ; 151 String group; 152 String organism; 153 154 public String getProtein() { 155 return protein; 156 } 157 158 public void setProtein(String protein) { 159 this.protein = protein; 160 } 161 162 public String getUniprot() { 163 return uniprot; 164 } 165 166 public void setUniprot(String uniprot) { 167 this.uniprot = uniprot; 168 } 169 170 public String getGeneSymb() { 171 return geneSymb; 172 } 173 174 public void setGeneSymb(String geneSymb) { 175 this.geneSymb = geneSymb; 176 } 177 178 public String getChrLoc() { 179 return chrLoc; 180 } 181 182 public void setChrLoc(String chrLoc) { 183 this.chrLoc = chrLoc; 184 } 185 186 public String getModType() { 187 return modType; 188 } 189 190 public void setModType(String modType) { 191 this.modType = modType; 192 } 193 194 public String getResidue() { 195 return residue; 196 } 197 198 public void setResidue(String residue) { 199 this.residue = residue; 200 } 201 202 public String getGroup() { 203 return group; 204 } 205 206 public void setGroup(String group) { 207 this.group = group; 208 } 209 210 public String getOrganism() { 211 return organism; 212 } 213 214 public void setOrganism(String organism) { 215 this.organism = organism; 216 } 217 218 @Override 219 public String toString() { 220 StringBuffer s = new StringBuffer(); 221 222 s.append("Site{" + 223 "protein='" + protein + '\''); 224 if ( uniprot != null) 225 s.append(", uniprot='" + uniprot + '\'' ); 226 if ( geneSymb != null) 227 s.append( 228 ", geneSymb='" + geneSymb + '\'' ); 229 if (chrLoc != null) 230 s.append(", chrLoc='" + chrLoc + '\'' ); 231 if (modType != null) 232 s.append(", modType='" + modType + '\'' ); 233 234 if (residue != null) 235 s.append( ", residue='" + residue + '\'' ); 236 if ( group != null) 237 s.append(", group='" + group + '\'' ); 238 if (organism != null) 239 s.append(", organism='" + organism + '\'' ); 240 241 s.append( '}'); 242 243 return s.toString(); 244 } 245} 246 247