001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Author: Daniel Asarnow 021 * Date: 2012-7-23 022 */ 023 024package org.biojava.nbio.structure.cath; 025 026/** 027 * @author Daniel Asarnow 028 */ 029 030import java.io.IOException; 031import java.io.Serializable; 032import java.util.ArrayList; 033import java.util.Date; 034import java.util.HashSet; 035import java.util.List; 036import java.util.Set; 037 038import org.biojava.nbio.structure.ResidueRange; 039import org.biojava.nbio.structure.Structure; 040import org.biojava.nbio.structure.StructureException; 041import org.biojava.nbio.structure.StructureIdentifier; 042import org.biojava.nbio.structure.SubstructureIdentifier; 043import org.biojava.nbio.structure.align.util.AtomCache; 044 045/** 046 * A class which represents a single CATH domain. 047 */ 048public class CathDomain implements Serializable, StructureIdentifier { 049 050 public static final long serialVersionUID = 1L; 051 052 /** 053 * The CATH domain code. Always 7 characters in length, combining the PDB and chain letter with the number of the domain within CATH. 054 * Example: 1aoiA00 055 * If the chain letter '0', domain refers to an entire PDB entry. 056 */ 057 String domainName; // 7 characters 1oaiA00 058 059 /** 060 * The class number of this domain. 061 */ 062 Integer classId; // C 063 064 /** 065 * The architecture number of this domain. 066 */ 067 Integer architectureId; // A 068 069 /** 070 * The topology number of this domain. 071 */ 072 Integer topologyId; // T 073 074 /** 075 * The homologous superfamily number of this domain. 076 */ 077 Integer homologyId; // H 078 079 /** 080 * The sequence family (35% identity) number of this domain. 081 */ 082 Integer sequenceFamilyId; // S 083 084 /** 085 * The "orthologous" sequence family (60% identity) number of this domain. 086 */ 087 Integer orthologousSequenceFamilyId; // O 088 089 /** 090 * The "Like" sequence family (95% identity) number of this domain. 091 */ 092 Integer likeSequenceFamilyId; // L 093 094 /** 095 * The identical sequence family (100% identity) number of this domain. 096 */ 097 098 Integer identicalSequenceFamilyId; // I 099 100 /** 101 * The count of this domain among the identical sequence family members. 102 */ 103 Integer domainCounter; // D 104 105 /** 106 * The domain length.. 107 */ 108 Integer length; 109 110 /** 111 * The resolution of the domain structure. Nominally in Angstroms, 112 * the values 999.000 and 1000.000 signify NMR structures and obsolete structures, respectively. 113 */ 114 Double resolution; 115 116 /** 117 * The format and version of the CathDomainDescriptionFile. 118 */ 119 String format; 120 121 /** 122 * The CATH version. 123 */ 124 String version; 125 126 Date date; 127 128 /** 129 * The so-called name field holds a potentially long description of the domain. 130 */ 131 String name; 132 133 /** 134 * Complete source organism listing. 135 */ 136 String source; 137 138 /** 139 * FASTA header. 140 */ 141 String sequenceHeader; 142 143 /** 144 * FASTA sequence. 145 */ 146 String sequence; 147 148 /** 149 * List of all sub-domain segments. 150 */ 151 List<CathSegment> segments; 152 153 /** 154 * A (potentially long) comment. Usually empty. 155 */ 156 String comment; 157 158 public String getDomainName() { 159 return domainName; 160 } 161 162 public void setDomainName(String domainName) { 163 this.domainName = domainName; 164 } 165 166 /** 167 * Returns a string of the form {@code PDBID.CHAIN}. 168 * For example: {@code 1hiv.A}. 169 * @deprecated This method is poorly named; use {@link #getThePdbId()} or {@link #getPdbIdAndChain()} instead 170 */ 171 @Deprecated 172 public String getPdbId() { 173 return getPdbIdAndChain(); 174 } 175 176 /** 177 * Returns the PDB ID. 178 */ 179 public String getThePdbId() { 180 return domainName.substring(0, 4); 181 } 182 183 /** 184 * Returns a string of the form {@code PDBID.CHAIN}. 185 * For example: {@code 1hiv.A}. 186 */ 187 public String getPdbIdAndChain() { 188 return domainName.substring(0, 4) + 189 (!domainName.substring(4, 5).equals("0") ? "." + domainName.substring(4, 5) : ""); 190 } 191 192 public Integer getDomainId() { 193 return Integer.parseInt(domainName.substring(5)); 194 } 195 196 public Integer getClassId() { 197 return classId; 198 } 199 200 public void setClassId(Integer classId) { 201 this.classId = classId; 202 } 203 204 public Integer getArchitectureId() { 205 return architectureId; 206 } 207 208 public void setArchitectureId(Integer architectureId) { 209 this.architectureId = architectureId; 210 } 211 212 public Integer getTopologyId() { 213 return topologyId; 214 } 215 216 public void setTopologyId(Integer topologyId) { 217 this.topologyId = topologyId; 218 } 219 220 public Integer getHomologyId() { 221 return homologyId; 222 } 223 224 public void setHomologyId(Integer homologyId) { 225 this.homologyId = homologyId; 226 } 227 228 public Integer getSequenceFamilyId() { 229 return sequenceFamilyId; 230 } 231 232 public void setSequenceFamilyId(Integer sequenceFamilyId) { 233 this.sequenceFamilyId = sequenceFamilyId; 234 } 235 236 public Integer getOrthologousSequenceFamilyId() { 237 return orthologousSequenceFamilyId; 238 } 239 240 public void setOrthologousSequenceFamilyId(Integer orthologousSequenceFamilyId) { 241 this.orthologousSequenceFamilyId = orthologousSequenceFamilyId; 242 } 243 244 public Integer getLikeSequenceFamilyId() { 245 return likeSequenceFamilyId; 246 } 247 248 public void setLikeSequenceFamilyId(Integer likeSequenceFamilyId) { 249 this.likeSequenceFamilyId = likeSequenceFamilyId; 250 } 251 252 public Integer getIdenticalSequenceFamilyId() { 253 return identicalSequenceFamilyId; 254 } 255 256 public void setIdenticalSequenceFamilyId(Integer identicalSequenceFamilyId) { 257 this.identicalSequenceFamilyId = identicalSequenceFamilyId; 258 } 259 260 public Integer getDomainCounter() { 261 return domainCounter; 262 } 263 264 public void setDomainCounter(Integer domainCounter) { 265 this.domainCounter = domainCounter; 266 } 267 268 public Integer getLength() { 269 return length; 270 } 271 272 public void setLength(Integer length) { 273 this.length = length; 274 } 275 276 public Double getResolution() { 277 return resolution; 278 } 279 280 public void setResolution(Double resolution) { 281 this.resolution = resolution; 282 } 283 284 public void setCATH(String cathCode) { 285 String[] token = cathCode.split("[.]"); 286 setClassId(Integer.parseInt(token[0])); 287 setArchitectureId(Integer.parseInt(token[1])); 288 setTopologyId(Integer.parseInt(token[2])); 289 setHomologyId(Integer.parseInt(token[3])); 290 } 291 292 public String getCATH() { 293 return Integer.toString(getClassId()) + "." + 294 Integer.toString(getArchitectureId()) + "." + 295 Integer.toString(getTopologyId()) + "." + 296 Integer.toString(getHomologyId()); 297 } 298 299 public void setSOLID(String cathCode) { 300 String[] token = cathCode.split("[.]"); 301 setSequenceFamilyId(Integer.parseInt(token[0])); 302 setOrthologousSequenceFamilyId(Integer.parseInt(token[1])); 303 setLikeSequenceFamilyId(Integer.parseInt(token[2])); 304 setIdenticalSequenceFamilyId(Integer.parseInt(token[3])); 305 setDomainCounter(Integer.parseInt(token[4])); 306 } 307 308 public String getSOILD() { 309 return Integer.toString(getSequenceFamilyId()) + "." + 310 Integer.toString(getOrthologousSequenceFamilyId()) + "." + 311 Integer.toString(getLikeSequenceFamilyId()) + "." + 312 Integer.toString(getIdenticalSequenceFamilyId()) + "." + 313 Integer.toString(getDomainCounter()); 314 } 315 316 public Integer getClassificationId(CathCategory cathCategory) { 317 switch (cathCategory) { 318 case Class: 319 return getClassId(); 320 case Architecture: 321 return getArchitectureId(); 322 case Topolgy: 323 return getTopologyId(); 324 case Homology: 325 return getHomologyId(); 326 case SequenceFamily: 327 return getSequenceFamilyId(); 328 case OrthologousSequenceFamily: 329 return getOrthologousSequenceFamilyId(); 330 case LikeSequenceFamily: 331 return getLikeSequenceFamilyId(); 332 case IdenticalSequenceFamily: 333 return getIdenticalSequenceFamilyId(); 334 case DomainCounter: 335 return getDomainCounter(); 336 default: 337 return null; 338 } 339 } 340 341 public String getFormat() { 342 return format; 343 } 344 345 public void setFormat(String format) { 346 this.format = format; 347 } 348 349 public String getVersion() { 350 return version; 351 } 352 353 public void setVersion(String version) { 354 this.version = version; 355 } 356 357 public Date getDate() { 358 return date; 359 } 360 361 public void setDate(Date date) { 362 this.date = date; 363 } 364 365 public String getName() { 366 return name; 367 } 368 369 public void setName(String name) { 370 this.name = name; 371 } 372 373 public String getSource() { 374 return source; 375 } 376 377 public void setSource(String source) { 378 this.source = source; 379 } 380 381 public String getSequenceHeader() { 382 return sequenceHeader; 383 } 384 385 public void setSequenceHeader(String sequenceHeader) { 386 this.sequenceHeader = sequenceHeader; 387 } 388 389 public String getSequence() { 390 return sequence; 391 } 392 393 public void setSequence(String sequence) { 394 this.sequence = sequence; 395 } 396 397 public List<CathSegment> getSegments() { 398 return segments; 399 } 400 401 public void setSegments(List<CathSegment> segments) { 402 this.segments = segments; 403 } 404 405 public String getComment() { 406 return comment; 407 } 408 409 public void setComment(String comment) { 410 this.comment = comment; 411 } 412 413 @Override 414 public String toString() { 415 return "CathDomain [domainName=" + domainName + ", classId=" + classId 416 + ", architectureId=" + architectureId + ", topologyId=" 417 + topologyId + ", homologyId=" + homologyId 418 + ", sequenceFamilyId=" + sequenceFamilyId 419 + ", orthologousSequenceFamilyId=" 420 + orthologousSequenceFamilyId + ", likeSequenceFamilyId=" 421 + likeSequenceFamilyId + ", identicalSequenceFamilyId=" 422 + identicalSequenceFamilyId + ", domainCounter=" 423 + domainCounter + ", length=" + length + ", resolution=" 424 + resolution + ", format=" + format + ", version=" + version 425 + ", date=" + date + ", name=" + name + ", source=" + source 426 + ", sequenceHeader=" + sequenceHeader + ", sequence=" 427 + sequence + ", segments=" + segments + ", comment=" + comment 428 + "]"; 429 } 430 431 /** 432 * Returns the chains this domain is defined over; contains more than 1 element only if this domains is a multi-chain domain. 433 */ 434 public Set<String> getChains() { 435 Set<String> chains = new HashSet<String>(); 436 List<ResidueRange> rrs = toCanonical().getResidueRanges(); 437 for (ResidueRange rr : rrs) chains.add(rr.getChainId()); 438 return chains; 439 } 440 441 @Override 442 public String getIdentifier() { 443 return getCATH(); 444 } 445 446 @Override 447 public SubstructureIdentifier toCanonical() { 448 List<ResidueRange> ranges = new ArrayList<ResidueRange>(); 449 String chain = String.valueOf(getDomainName().charAt(getDomainName().length() - 3)); 450 for (CathSegment segment : this.getSegments()) { 451 ranges.add(new ResidueRange(chain, segment.getStart(), segment.getStop())); 452 } 453 454 return new SubstructureIdentifier(getThePdbId(), ranges); 455 } 456 457 @Override 458 public Structure reduce(Structure input) throws StructureException { 459 return toCanonical().reduce(input); 460 } 461 462 @Override 463 public Structure loadStructure(AtomCache cache) throws StructureException, 464 IOException { 465 return cache.getStructure(getThePdbId()); 466 } 467 468 469}