001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 12.03.2004 021 * @author Andreas Prlic 022 * 023 */ 024package org.biojava.nbio.structure; 025 026 027import org.biojava.nbio.structure.io.FileConvert; 028import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 029import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 030import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 031import org.biojava.nbio.core.exceptions.CompoundNotFoundException; 032import org.biojava.nbio.core.sequence.ProteinSequence; 033import org.biojava.nbio.core.sequence.compound.AminoAcidCompound; 034import org.biojava.nbio.core.sequence.template.Sequence; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import java.io.Serializable; 039import java.util.*; 040 041 042/** 043 * A Chain in a PDB file. It contains several groups which can be of 044 * one of the types defined in the {@link GroupType} constants. 045 * 046 * @author Andreas Prlic 047 * @author Jules Jacobsen 048 * @since 1.4 049 */ 050public class ChainImpl implements Chain, Serializable { 051 052 private final static Logger logger = LoggerFactory.getLogger(ChainImpl.class); 053 054 private static final long serialVersionUID = 1990171805277911840L; 055 056 /** 057 * The default chain identifier used to be an empty space 058 */ 059 public static String DEFAULT_CHAIN_ID = "A"; 060 061 private String swissprot_id ; 062 private String chainID ; // the chain identifier as in PDB files 063 064 private List <Group> groups; 065 private List<Group> seqResGroups; 066 067 private Long id; 068 private Compound mol; 069 private Structure parent; 070 071 private Map<String, Integer> pdbResnumMap; 072 private String internalChainID; // the chain identifier used in mmCIF files 073 074 075 private List<SeqMisMatch> seqMisMatches = null; 076 /** 077 * Constructs a ChainImpl object. 078 */ 079 public ChainImpl() { 080 super(); 081 082 chainID = DEFAULT_CHAIN_ID; 083 groups = new ArrayList<Group>() ; 084 085 seqResGroups = new ArrayList<Group>(); 086 pdbResnumMap = new HashMap<String,Integer>(); 087 internalChainID = null; 088 089 } 090 091 /** {@inheritDoc} 092 * 093 */ 094 @Override 095 public Long getId() { 096 return id; 097 } 098 099 /** {@inheritDoc} 100 * 101 */ 102 @Override 103 public void setId(Long id) { 104 this.id = id; 105 } 106 107 /** {@inheritDoc} 108 * 109 */ 110 @Override 111 @Deprecated 112 public void setParent(Structure parent) { 113 setStructure(parent); 114 } 115 116 /** {@inheritDoc} 117 * 118 */ 119 @Override 120 public void setStructure(Structure parent){ 121 this.parent = parent; 122 } 123 124 /** Returns the parent Structure of this chain. 125 * 126 * @return the parent Structure object 127 */ 128 @Override 129 public Structure getStructure() { 130 131 return parent; 132 } 133 134 135 /** Returns the parent Structure of this chain. 136 * 137 * @return the parent Structure object 138 * @deprecated use getStructure instead. 139 */ 140 @Override 141 @Deprecated 142 public Structure getParent() { 143 144 145 return getStructure(); 146 } 147 148 /** Returns an identical copy of this Chain . 149 * @return an identical copy of this Chain 150 */ 151 @Override 152 public Object clone() { 153 // go through all groups and add to new Chain. 154 ChainImpl n = new ChainImpl(); 155 // copy chain data: 156 157 n.setChainID( getChainID()); 158 n.setSwissprotId ( getSwissprotId()); 159 160 // NOTE the Compound will be reset at the parent level (Structure) if cloning is happening from parent level 161 // here we don't deep-copy it and just keep the same reference, in case the cloning is happening at the Chain level only 162 n.setCompound(this.mol); 163 164 n.setInternalChainID(internalChainID); 165 166 for (Group group : groups) { 167 Group g = (Group) group.clone(); 168 n.addGroup(g); 169 g.setChain(n); 170 } 171 172 173 174 if (seqResGroups!=null){ 175 176 List<Group> tmpSeqRes = new ArrayList<Group>(); 177 178 // cloning seqres and atom groups is ugly, due to their 179 // nested relationship (some of the atoms can be in the seqres, but not all) 180 181 for (Group seqResGroup : seqResGroups) { 182 183 int i = findMathingGroupIndex(groups, seqResGroup); 184 185 Group g = null; 186 187 if (i!=-1) { 188 // group found in atom groups, we get the equivalent reference from the newly cloned atom groups 189 g = n.getAtomGroup(i); 190 } else { 191 // group not found in atom groups, we clone the seqres group 192 g = (Group) seqResGroup.clone(); 193 } 194 g.setChain(n); 195 tmpSeqRes.add(g); 196 } 197 198 n.setSeqResGroups(tmpSeqRes); 199 } 200 201 202 return n ; 203 } 204 205 private static int findMathingGroupIndex(List<Group> atomGroups, Group g) { 206 int i = 0; 207 for (Group atomGroup: atomGroups) { 208 if (g==atomGroup) return i; 209 i++; 210 } 211 return -1; 212 } 213 214 215 216 /** {@inheritDoc} 217 * 218 */ 219 @Override 220 public void setCompound(Compound mol) { 221 this.mol = mol; 222 } 223 224 /** {@inheritDoc} 225 * 226 */ 227 @Override 228 public Compound getCompound() { 229 return this.mol; 230 } 231 232 /** set the Swissprot id of this chains . 233 * @param sp_id a String specifying the swissprot id value 234 * @see #getSwissprotId 235 */ 236 @Override 237 public void setSwissprotId(String sp_id){ 238 swissprot_id = sp_id ; 239 } 240 241 /** get the Swissprot id of this chains . 242 * @return a String representing the swissprot id value 243 * @see #setSwissprotId 244 */ 245 @Override 246 public String getSwissprotId() { 247 return swissprot_id ; 248 } 249 250 /** {@inheritDoc} 251 * 252 */ 253 @Override 254 public void addGroup(Group group) { 255 256 group.setChain(this); 257 258 // Set the altlocs chain as well 259 for(Group g : group.getAltLocs()) { 260 g.setChain(this); 261 } 262 263 groups.add(group); 264 265 // store the position internally for quick access of this group 266 267 String pdbResnum = null ; 268 ResidueNumber resNum = group.getResidueNumber(); 269 if ( resNum != null) 270 pdbResnum = resNum.toString(); 271 if ( pdbResnum != null) { 272 Integer pos = groups.size() - 1; 273 // ARGH sometimes numbering in PDB files is confusing. 274 // e.g. PDB: 1sfe 275 /* 276 * ATOM 620 N GLY 93 -24.320 -6.591 4.210 1.00 46.82 N 277 * ATOM 621 CA GLY 93 -24.960 -6.849 5.497 1.00 47.35 C 278 * ATOM 622 C GLY 93 -26.076 -5.873 5.804 1.00 47.24 C 279 * ATOM 623 O GLY 93 -26.382 -4.986 5.006 1.00 47.56 O 280 * and ... 281 * HETATM 1348 O HOH 92 -21.853 -16.886 19.138 1.00 66.92 O 282 * HETATM 1349 O HOH 93 -26.126 1.226 29.069 1.00 71.69 O 283 * HETATM 1350 O HOH 94 -22.250 -18.060 -6.401 1.00 61.97 O 284 */ 285 286 // this check is to give in this case the entry priority that is an AminoAcid / comes first... 287 // a good example of same residue number for 2 residues is 3th3, chain T, residue 201 (a LYS and a sugar BGC covalently attached to it) - JD 2016-03-09 288 if ( pdbResnumMap.containsKey(pdbResnum)) { 289 290 logger.warn("Adding residue {}({}) to chain {} but a residue with same residue number is already present: {}({}). Will add only the aminoacid residue (if any) to the lookup, lookups for that residue number won't work properly.", 291 pdbResnum, group.getPDBName(), getChainID(), groups.get(pdbResnumMap.get(pdbResnum)).getResidueNumber(), groups.get(pdbResnumMap.get(pdbResnum)).getPDBName()); 292 if ( group instanceof AminoAcid) 293 pdbResnumMap.put(pdbResnum,pos); 294 } else 295 pdbResnumMap.put(pdbResnum,pos); 296 } 297 298 } 299 300 301 /** 302 * {@inheritDoc} 303 */ 304 @Override 305 public Group getAtomGroup(int position) { 306 307 return groups.get(position); 308 } 309 310 /** 311 * {@inheritDoc} 312 */ 313 @Override 314 public List<Group> getAtomGroups(GroupType type){ 315 316 List<Group> tmp = new ArrayList<Group>() ; 317 for (Group g : groups) { 318 if (g.getType().equals(type)) { 319 tmp.add(g); 320 } 321 } 322 323 return tmp ; 324 } 325 326 327 /** {@inheritDoc} 328 * 329 */ 330 @Override 331 public List<Group> getAtomGroups(){ 332 return groups ; 333 } 334 335 /** {@inheritDoc} 336 * 337 */ 338 @Override 339 public void setAtomGroups(List<Group> groups){ 340 for (Group g:groups){ 341 g.setChain(this); 342 } 343 this.groups = groups; 344 } 345 346 @Override 347 @Deprecated // TODO dmyersturnbull: why is this deprecated if it's declared in Chain? 348 public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end, boolean ignoreMissing) 349 throws StructureException { 350 351 if (! ignoreMissing ) 352 return getGroupsByPDB(start, end); 353 354 355 List<Group> retlst = new ArrayList<Group>(); 356 357 String pdbresnumStart = start.toString(); 358 String pdbresnumEnd = end.toString(); 359 360 361 int startPos = Integer.MIN_VALUE; 362 int endPos = Integer.MAX_VALUE; 363 364 365 startPos = start.getSeqNum(); 366 endPos = end.getSeqNum(); 367 368 369 370 boolean adding = false; 371 boolean foundStart = false; 372 373 for (Group g: groups){ 374 375 if ( g.getResidueNumber().toString().equals(pdbresnumStart)) { 376 adding = true; 377 foundStart = true; 378 } 379 380 if ( ! (foundStart && adding) ) { 381 382 383 int pos = g.getResidueNumber().getSeqNum(); 384 385 if ( pos >= startPos) { 386 foundStart = true; 387 adding = true; 388 } 389 390 391 } 392 393 if ( adding) 394 retlst.add(g); 395 396 if ( g.getResidueNumber().toString().equals(pdbresnumEnd)) { 397 if ( ! adding) 398 throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID); 399 adding = false; 400 break; 401 } 402 if (adding){ 403 404 int pos = g.getResidueNumber().getSeqNum(); 405 if (pos >= endPos) { 406 adding = false; 407 break; 408 } 409 410 } 411 } 412 413 if ( ! foundStart){ 414 throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID); 415 } 416 417 418 //not checking if the end has been found in this case... 419 420 return retlst.toArray(new Group[retlst.size()] ); 421 } 422 423 424 /** 425 * {@inheritDoc} 426 * 427 */ 428 @Override 429 public Group getGroupByPDB(ResidueNumber resNum) throws StructureException { 430 String pdbresnum = resNum.toString(); 431 if ( pdbResnumMap.containsKey(pdbresnum)) { 432 Integer pos = pdbResnumMap.get(pdbresnum); 433 return groups.get(pos); 434 } else { 435 throw new StructureException("unknown PDB residue number " + pdbresnum + " in chain " + chainID); 436 } 437 } 438 439 /** 440 * {@inheritDoc} 441 * 442 */ 443 @Override 444 public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end) 445 throws StructureException { 446 447 String pdbresnumStart = start.toString(); 448 String pdbresnumEnd = end.toString(); 449 450 List<Group> retlst = new ArrayList<Group>(); 451 452 Iterator<Group> iter = groups.iterator(); 453 boolean adding = false; 454 boolean foundStart = false; 455 456 while ( iter.hasNext()){ 457 Group g = iter.next(); 458 if ( g.getResidueNumber().toString().equals(pdbresnumStart)) { 459 adding = true; 460 foundStart = true; 461 } 462 463 if ( adding) 464 retlst.add(g); 465 466 if ( g.getResidueNumber().toString().equals(pdbresnumEnd)) { 467 if ( ! adding) 468 throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID); 469 adding = false; 470 break; 471 } 472 } 473 474 if ( ! foundStart){ 475 throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID); 476 } 477 if ( adding) { 478 throw new StructureException("did not find end PDB residue number " + pdbresnumEnd + " in chain " + chainID); 479 } 480 481 return retlst.toArray(new Group[retlst.size()] ); 482 } 483 484 485 486 /** 487 * {@inheritDoc} 488 */ 489 @Override 490 public int getSeqResLength() { 491 //new method returns the length of the sequence defined in the SEQRES records 492 return seqResGroups.size(); 493 } 494 495 /** 496 * {@inheritDoc} 497 */ 498 @Override 499 public void setChainID(String nam) { chainID = nam; } 500 501 502 /** 503 * {@inheritDoc} 504 */ 505 @Override 506 public String getChainID() { return chainID; } 507 508 509 510 /** String representation. 511 * @return String representation of the Chain 512 */ 513 @Override 514 public String toString(){ 515 String newline = System.getProperty("line.separator"); 516 StringBuilder str = new StringBuilder(); 517 str.append("Chain >").append(getChainID()).append("<").append(newline); 518 if ( mol != null ){ 519 if ( mol.getMolName() != null){ 520 str.append(mol.getMolName()).append(newline); 521 } 522 } 523 str.append("total SEQRES length: ").append(getSeqResGroups().size()).append(" total ATOM length:") 524 .append(getAtomLength()).append(" residues ").append(newline); 525 526 return str.toString() ; 527 528 } 529 530 /** 531 * {@inheritDoc} 532 */ 533 @Override 534 public Sequence<?> getBJSequence() { 535 536 String seq = getSeqResSequence(); 537 538 Sequence<AminoAcidCompound> s = null; 539 540 try { 541 s = new ProteinSequence(seq); 542 } catch (CompoundNotFoundException e) { 543 logger.error("Could not create sequence object from seqres sequence. Some unknown compound: {}",e.getMessage()); 544 } 545 546 //TODO: return a DNA sequence if the content is DNA... 547 return s; 548 549 } 550 551 /** 552 * {@inheritDoc} 553 */ 554 @Override 555 public String getAtomSequence(){ 556 557 558 List<Group> groups = getAtomGroups(); 559 StringBuilder sequence = new StringBuilder() ; 560 561 for ( Group g: groups){ 562 ChemComp cc = g.getChemComp(); 563 564 if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) || 565 PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){ 566 // an amino acid residue.. use for alignment 567 String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc); 568 if ( oneLetter == null) 569 oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL); 570 sequence.append(oneLetter); 571 } 572 573 } 574 return sequence.toString(); 575 576 577 } 578 579 /** 580 * {@inheritDoc} 581 */ 582 @Override 583 public String getSeqResSequence(){ 584 585 StringBuilder str = new StringBuilder(); 586 for (Group g : seqResGroups) { 587 ChemComp cc = g.getChemComp(); 588 if ( cc == null) { 589 logger.warn("Could not load ChemComp for group: ", g); 590 str.append(StructureTools.UNKNOWN_GROUP_LABEL); 591 } else if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) || 592 PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){ 593 // an amino acid residue.. use for alignment 594 String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc); 595 if ( oneLetter == null || oneLetter.isEmpty() || oneLetter.equals("?")) 596 oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL); 597 str.append(oneLetter); 598 } else { 599 str.append(StructureTools.UNKNOWN_GROUP_LABEL); 600 } 601 } 602 return str.toString(); 603 } 604 605 606 /** 607 * {@inheritDoc} 608 */ 609 @Override 610 public Group getSeqResGroup(int position) { 611 612 return seqResGroups.get(position); 613 } 614 615 /** 616 * {@inheritDoc} 617 */ 618 @Override 619 public List<Group> getSeqResGroups(GroupType type) { 620 List<Group> tmp = new ArrayList<Group>() ; 621 for (Group g : seqResGroups) { 622 if (g.getType().equals(type)) { 623 tmp.add(g); 624 } 625 } 626 627 return tmp ; 628 } 629 630 /** {@inheritDoc} 631 * 632 */ 633 @Override 634 public List<Group> getSeqResGroups() { 635 return seqResGroups; 636 } 637 638 /** {@inheritDoc} 639 * 640 */ 641 @Override 642 public void setSeqResGroups(List<Group> groups){ 643 for (Group g: groups){ 644 g.setChain(this); 645 } 646 this.seqResGroups = groups; 647 } 648 649 protected void addSeqResGroup(Group g){ 650 seqResGroups.add(g); 651 } 652 653 654 /** {@inheritDoc} 655 * 656 */ 657 @Override 658 public int getAtomLength() { 659 660 return groups.size(); 661 } 662 663 /** {@inheritDoc} 664 * 665 */ 666 @Override 667 public List<Group> getAtomLigands(){ 668 List<Group> ligands = new ArrayList<Group>(); 669 670 for (Group g : groups) 671 if (!seqResGroups.contains(g) && !g.isWater()) 672 ligands.add(g); 673 674 return ligands; 675 } 676 677 @Override 678 public String getInternalChainID() { 679 return internalChainID; 680 } 681 682 @Override 683 public void setInternalChainID(String internalChainID) { 684 this.internalChainID = internalChainID; 685 686 } 687 688 @Override 689 public String toPDB() { 690 return FileConvert.toPDB(this); 691 } 692 693 @Override 694 public String toMMCIF() { 695 return FileConvert.toMMCIF(this, true); 696 } 697 698 @Override 699 public void setSeqMisMatches(List<SeqMisMatch> seqMisMatches) { 700 this.seqMisMatches = seqMisMatches; 701 } 702 703 @Override 704 public List<SeqMisMatch> getSeqMisMatches() { 705 return seqMisMatches; 706 } 707} 708