001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Author: Daniel Asarnow 021 * Date: 2012-7-23 022 */ 023 024package org.biojava.nbio.structure.cath; 025 026/** 027 * @author Daniel Asarnow 028 */ 029 030import java.io.IOException; 031import java.io.Serializable; 032import java.util.ArrayList; 033import java.util.Date; 034import java.util.HashSet; 035import java.util.List; 036import java.util.Set; 037 038import org.biojava.nbio.structure.ResidueRange; 039import org.biojava.nbio.structure.Structure; 040import org.biojava.nbio.structure.StructureException; 041import org.biojava.nbio.structure.StructureIdentifier; 042import org.biojava.nbio.structure.SubstructureIdentifier; 043import org.biojava.nbio.structure.align.util.AtomCache; 044 045/** 046 * A class which represents a single CATH domain. 047 */ 048public class CathDomain implements Serializable, StructureIdentifier { 049 050 public static final long serialVersionUID = 1L; 051 052 /** 053 * The CATH domain code. Always 7 characters in length, combining the PDB and chain letter with the number of the domain within CATH. 054 * Example: 1aoiA00 055 * If the chain letter '0', domain refers to an entire PDB entry. 056 */ 057 String domainName; // 7 characters 1oaiA00 058 059 /** 060 * The class number of this domain. 061 */ 062 Integer classId; // C 063 064 /** 065 * The architecture number of this domain. 066 */ 067 Integer architectureId; // A 068 069 /** 070 * The topology number of this domain. 071 */ 072 Integer topologyId; // T 073 074 /** 075 * The homologous superfamily number of this domain. 076 */ 077 Integer homologyId; // H 078 079 /** 080 * The sequence family (35% identity) number of this domain. 081 */ 082 Integer sequenceFamilyId; // S 083 084 /** 085 * The "orthologous" sequence family (60% identity) number of this domain. 086 */ 087 Integer orthologousSequenceFamilyId; // O 088 089 /** 090 * The "Like" sequence family (95% identity) number of this domain. 091 */ 092 Integer likeSequenceFamilyId; // L 093 094 /** 095 * The identical sequence family (100% identity) number of this domain. 096 */ 097 098 Integer identicalSequenceFamilyId; // I 099 100 /** 101 * The count of this domain among the identical sequence family members. 102 */ 103 Integer domainCounter; // D 104 105 /** 106 * The domain length.. 107 */ 108 Integer length; 109 110 /** 111 * The resolution of the domain structure. Nominally in Angstroms, 112 * the values 999.000 and 1000.000 signify NMR structures and obsolete structures, respectively. 113 */ 114 Double resolution; 115 116 /** 117 * The format and version of the CathDomainDescriptionFile. 118 */ 119 String format; 120 121 /** 122 * The CATH version. 123 */ 124 String version; 125 126 Date date; 127 128 /** 129 * The so-called name field holds a potentially long description of the domain. 130 */ 131 String name; 132 133 /** 134 * Complete source organism listing. 135 */ 136 String source; 137 138 /** 139 * FASTA header. 140 */ 141 String sequenceHeader; 142 143 /** 144 * FASTA sequence. 145 */ 146 String sequence; 147 148 /** 149 * List of all sub-domain segments. 150 */ 151 List<CathSegment> segments; 152 153 /** 154 * A (potentially long) comment. Usually empty. 155 */ 156 String comment; 157 158 public String getDomainName() { 159 return domainName; 160 } 161 162 public void setDomainName(String domainName) { 163 this.domainName = domainName; 164 } 165 166 /** 167 * Returns the PDB ID. 168 */ 169 public String getThePdbId() { 170 return domainName.substring(0, 4); 171 } 172 173 /** 174 * Returns a string of the form {@code PDBID.CHAIN}. 175 * For example: {@code 1hiv.A}. 176 */ 177 public String getPdbIdAndChain() { 178 return domainName.substring(0, 4) + 179 (!domainName.substring(4, 5).equals("0") ? "." + domainName.substring(4, 5) : ""); 180 } 181 182 public Integer getDomainId() { 183 return Integer.parseInt(domainName.substring(5)); 184 } 185 186 public Integer getClassId() { 187 return classId; 188 } 189 190 public void setClassId(Integer classId) { 191 this.classId = classId; 192 } 193 194 public Integer getArchitectureId() { 195 return architectureId; 196 } 197 198 public void setArchitectureId(Integer architectureId) { 199 this.architectureId = architectureId; 200 } 201 202 public Integer getTopologyId() { 203 return topologyId; 204 } 205 206 public void setTopologyId(Integer topologyId) { 207 this.topologyId = topologyId; 208 } 209 210 public Integer getHomologyId() { 211 return homologyId; 212 } 213 214 public void setHomologyId(Integer homologyId) { 215 this.homologyId = homologyId; 216 } 217 218 public Integer getSequenceFamilyId() { 219 return sequenceFamilyId; 220 } 221 222 public void setSequenceFamilyId(Integer sequenceFamilyId) { 223 this.sequenceFamilyId = sequenceFamilyId; 224 } 225 226 public Integer getOrthologousSequenceFamilyId() { 227 return orthologousSequenceFamilyId; 228 } 229 230 public void setOrthologousSequenceFamilyId(Integer orthologousSequenceFamilyId) { 231 this.orthologousSequenceFamilyId = orthologousSequenceFamilyId; 232 } 233 234 public Integer getLikeSequenceFamilyId() { 235 return likeSequenceFamilyId; 236 } 237 238 public void setLikeSequenceFamilyId(Integer likeSequenceFamilyId) { 239 this.likeSequenceFamilyId = likeSequenceFamilyId; 240 } 241 242 public Integer getIdenticalSequenceFamilyId() { 243 return identicalSequenceFamilyId; 244 } 245 246 public void setIdenticalSequenceFamilyId(Integer identicalSequenceFamilyId) { 247 this.identicalSequenceFamilyId = identicalSequenceFamilyId; 248 } 249 250 public Integer getDomainCounter() { 251 return domainCounter; 252 } 253 254 public void setDomainCounter(Integer domainCounter) { 255 this.domainCounter = domainCounter; 256 } 257 258 public Integer getLength() { 259 return length; 260 } 261 262 public void setLength(Integer length) { 263 this.length = length; 264 } 265 266 public Double getResolution() { 267 return resolution; 268 } 269 270 public void setResolution(Double resolution) { 271 this.resolution = resolution; 272 } 273 274 public void setCATH(String cathCode) { 275 String[] token = cathCode.split("[.]"); 276 setClassId(Integer.parseInt(token[0])); 277 setArchitectureId(Integer.parseInt(token[1])); 278 setTopologyId(Integer.parseInt(token[2])); 279 setHomologyId(Integer.parseInt(token[3])); 280 } 281 282 public String getCATH() { 283 return Integer.toString(getClassId()) + "." + 284 Integer.toString(getArchitectureId()) + "." + 285 Integer.toString(getTopologyId()) + "." + 286 Integer.toString(getHomologyId()); 287 } 288 289 public void setSOLID(String cathCode) { 290 String[] token = cathCode.split("[.]"); 291 setSequenceFamilyId(Integer.parseInt(token[0])); 292 setOrthologousSequenceFamilyId(Integer.parseInt(token[1])); 293 setLikeSequenceFamilyId(Integer.parseInt(token[2])); 294 setIdenticalSequenceFamilyId(Integer.parseInt(token[3])); 295 setDomainCounter(Integer.parseInt(token[4])); 296 } 297 298 public String getSOILD() { 299 return Integer.toString(getSequenceFamilyId()) + "." + 300 Integer.toString(getOrthologousSequenceFamilyId()) + "." + 301 Integer.toString(getLikeSequenceFamilyId()) + "." + 302 Integer.toString(getIdenticalSequenceFamilyId()) + "." + 303 Integer.toString(getDomainCounter()); 304 } 305 306 public Integer getClassificationId(CathCategory cathCategory) { 307 switch (cathCategory) { 308 case Class: 309 return getClassId(); 310 case Architecture: 311 return getArchitectureId(); 312 case Topolgy: 313 return getTopologyId(); 314 case Homology: 315 return getHomologyId(); 316 case SequenceFamily: 317 return getSequenceFamilyId(); 318 case OrthologousSequenceFamily: 319 return getOrthologousSequenceFamilyId(); 320 case LikeSequenceFamily: 321 return getLikeSequenceFamilyId(); 322 case IdenticalSequenceFamily: 323 return getIdenticalSequenceFamilyId(); 324 case DomainCounter: 325 return getDomainCounter(); 326 default: 327 return null; 328 } 329 } 330 331 public String getFormat() { 332 return format; 333 } 334 335 public void setFormat(String format) { 336 this.format = format; 337 } 338 339 public String getVersion() { 340 return version; 341 } 342 343 public void setVersion(String version) { 344 this.version = version; 345 } 346 347 public Date getDate() { 348 return date; 349 } 350 351 public void setDate(Date date) { 352 this.date = date; 353 } 354 355 public String getName() { 356 return name; 357 } 358 359 public void setName(String name) { 360 this.name = name; 361 } 362 363 public String getSource() { 364 return source; 365 } 366 367 public void setSource(String source) { 368 this.source = source; 369 } 370 371 public String getSequenceHeader() { 372 return sequenceHeader; 373 } 374 375 public void setSequenceHeader(String sequenceHeader) { 376 this.sequenceHeader = sequenceHeader; 377 } 378 379 public String getSequence() { 380 return sequence; 381 } 382 383 public void setSequence(String sequence) { 384 this.sequence = sequence; 385 } 386 387 public List<CathSegment> getSegments() { 388 return segments; 389 } 390 391 public void setSegments(List<CathSegment> segments) { 392 this.segments = segments; 393 } 394 395 public String getComment() { 396 return comment; 397 } 398 399 public void setComment(String comment) { 400 this.comment = comment; 401 } 402 403 @Override 404 public String toString() { 405 return "CathDomain [domainName=" + domainName + ", classId=" + classId 406 + ", architectureId=" + architectureId + ", topologyId=" 407 + topologyId + ", homologyId=" + homologyId 408 + ", sequenceFamilyId=" + sequenceFamilyId 409 + ", orthologousSequenceFamilyId=" 410 + orthologousSequenceFamilyId + ", likeSequenceFamilyId=" 411 + likeSequenceFamilyId + ", identicalSequenceFamilyId=" 412 + identicalSequenceFamilyId + ", domainCounter=" 413 + domainCounter + ", length=" + length + ", resolution=" 414 + resolution + ", format=" + format + ", version=" + version 415 + ", date=" + date + ", name=" + name + ", source=" + source 416 + ", sequenceHeader=" + sequenceHeader + ", sequence=" 417 + sequence + ", segments=" + segments + ", comment=" + comment 418 + "]"; 419 } 420 421 /** 422 * Returns the chains this domain is defined over; contains more than 1 element only if this domains is a multi-chain domain. 423 * @throws StructureException 424 */ 425 public Set<String> getChains() throws StructureException { 426 Set<String> chains = new HashSet<String>(); 427 List<ResidueRange> rrs = toCanonical().getResidueRanges(); 428 for (ResidueRange rr : rrs) chains.add(rr.getChainName()); 429 return chains; 430 } 431 432 @Override 433 public String getIdentifier() { 434 return getCATH(); 435 } 436 437 @Override 438 public SubstructureIdentifier toCanonical() throws StructureException{ 439 List<ResidueRange> ranges = new ArrayList<ResidueRange>(); 440 String chain = String.valueOf(getDomainName().charAt(getDomainName().length() - 3)); 441 for (CathSegment segment : this.getSegments()) { 442 ranges.add(new ResidueRange(chain, segment.getStart(), segment.getStop())); 443 } 444 445 return new SubstructureIdentifier(getThePdbId(), ranges); 446 } 447 448 @Override 449 public Structure reduce(Structure input) throws StructureException { 450 return toCanonical().reduce(input); 451 } 452 453 @Override 454 public Structure loadStructure(AtomCache cache) throws StructureException, 455 IOException { 456 return cache.getStructure(getThePdbId()); 457 } 458 459 460}