001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jan 4, 2006 021 * 022 */ 023package org.biojava.nbio.structure; 024 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Collection; 031import java.util.Collections; 032import java.util.HashMap; 033import java.util.HashSet; 034import java.util.Iterator; 035import java.util.LinkedHashSet; 036import java.util.List; 037import java.util.Map; 038import java.util.Set; 039 040import javax.vecmath.Point3d; 041 042import org.biojava.nbio.structure.align.util.AtomCache; 043import org.biojava.nbio.structure.contact.AtomContactSet; 044import org.biojava.nbio.structure.contact.Grid; 045import org.biojava.nbio.structure.io.FileParsingParameters; 046import org.biojava.nbio.structure.io.PDBFileParser; 047import org.biojava.nbio.core.util.FileDownloadUtils; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051/** 052 * A class that provides some tool methods. 053 * 054 * @author Andreas Prlic 055 * @author Jules Jacobsen 056 * @since 1.0 057 */ 058public class StructureTools { 059 060 private static final Logger logger = LoggerFactory 061 .getLogger(StructureTools.class); 062 063 // Amino Acid backbone 064 /** 065 * The atom name of the backbone C-alpha atom. Note that this can be 066 * ambiguous depending on the context since Calcium atoms use the same name 067 * in PDB. 068 */ 069 public static final String CA_ATOM_NAME = "CA"; 070 071 /** 072 * The atom name for the backbone amide nitrogen 073 */ 074 public static final String N_ATOM_NAME = "N"; 075 076 /** 077 * The atom name for the backbone carbonyl 078 */ 079 public static final String C_ATOM_NAME = "C"; 080 081 /** 082 * The atom name for the backbone carbonyl oxygen 083 */ 084 public static final String O_ATOM_NAME = "O"; 085 086 /** 087 * The atom name of the side-chain C-beta atom 088 */ 089 public static final String CB_ATOM_NAME = "CB"; 090 091 // Nucleotide backbone 092 /** 093 * The atom name of the backbone C1' in RNA 094 */ 095 public static final String C1_ATOM_NAME = "C1'"; 096 /** 097 * The atom name of the backbone C2' in RNA 098 */ 099 public static final String C2_ATOM_NAME = "C2'"; 100 /** 101 * The atom name of the backbone C3' in RNA 102 */ 103 public static final String C3_ATOM_NAME = "C3'"; 104 /** 105 * The atom name of the backbone C4' in RNA 106 */ 107 public static final String C4_ATOM_NAME = "C4'"; 108 /** 109 * The atom name of the backbone O2' in RNA 110 */ 111 public static final String O2_ATOM_NAME = "O2'"; 112 /** 113 * The atom name of the backbone O3' in RNA 114 */ 115 public static final String O3_ATOM_NAME = "O3'"; 116 /** 117 * The atom name of the backbone O4' in RNA 118 */ 119 public static final String O4_ATOM_NAME = "O4'"; 120 /** 121 * The atom name of the backbone O4' in RNA 122 */ 123 public static final String O5_ATOM_NAME = "O5'"; 124 /** 125 * The atom name of the backbone O4' in RNA 126 */ 127 public static final String OP1_ATOM_NAME = "OP1"; 128 /** 129 * The atom name of the backbone O4' in RNA 130 */ 131 public static final String OP2_ATOM_NAME = "OP2"; 132 /** 133 * The atom name of the backbone phosphate in RNA 134 */ 135 public static final String P_ATOM_NAME = "P"; 136 137 /** 138 * The atom used as representative for nucleotides, equivalent to 139 * {@link #CA_ATOM_NAME} for proteins 140 */ 141 public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME; 142 143 /** 144 * The character to use for unknown compounds in sequence strings 145 */ 146 public static final char UNKNOWN_GROUP_LABEL = 'X'; 147 148 /** 149 * Below this ratio of aminoacid/nucleotide residues to the sequence total, 150 * we use simple majority of aminoacid/nucleotide residues to decide the 151 * character of the chain (protein/nucleotide) 152 */ 153 public static final double RATIO_RESIDUES_TO_TOTAL = 0.95; 154 155 /** 156 * Threshold for plausible binding of a ligand to the selected substructure 157 */ 158 public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5; 159 160 // there is a file format change in PDB 3.0 and nucleotides are being 161 // renamed 162 private static final Map<String, Character> nucleotides30; 163 private static final Map<String, Character> nucleotides23; 164 165 // amino acid 3 and 1 letter code definitions 166 private static final Map<String, Character> aminoAcids; 167 168 private static final Set<Element> hBondDonorAcceptors; 169 170 static { 171 nucleotides30 = new HashMap<String, Character>(); 172 nucleotides30.put("DA", 'A'); 173 nucleotides30.put("DC", 'C'); 174 nucleotides30.put("DG", 'G'); 175 nucleotides30.put("DT", 'T'); 176 nucleotides30.put("DI", 'I'); 177 nucleotides30.put("A", 'A'); 178 nucleotides30.put("G", 'G'); 179 nucleotides30.put("C", 'C'); 180 nucleotides30.put("U", 'U'); 181 nucleotides30.put("I", 'I'); 182 183 // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced 184 // with these: 185 nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 186 nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl 187 nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 188 nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3' 189 // terminator) 190 nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant 191 nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator 192 nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic 193 // sugar 194 nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine 195 nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine 196 nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide 197 // nucleic acid, with 198 // added methyl 199 nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide 200 // nucleic acid, with 201 // added methyl 202 nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide 203 // nucleic acid 204 nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide 205 // nucleic acid 206 nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide 207 // nucleic acid 208 nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide 209 // nucleic acid 210 nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant 211 212 // store nucleic acids (C, G, A, T, U, and I), and 213 // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), 214 // and 215 nucleotides23 = new HashMap<String, Character>(); 216 String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A", 217 "+T", "+U", "+I" }; 218 for (String n : names) { 219 nucleotides23.put(n, n.charAt(n.length() - 1)); 220 } 221 222 aminoAcids = new HashMap<String, Character>(); 223 aminoAcids.put("GLY", 'G'); 224 aminoAcids.put("ALA", 'A'); 225 aminoAcids.put("VAL", 'V'); 226 aminoAcids.put("LEU", 'L'); 227 aminoAcids.put("ILE", 'I'); 228 aminoAcids.put("PHE", 'F'); 229 aminoAcids.put("TYR", 'Y'); 230 aminoAcids.put("TRP", 'W'); 231 aminoAcids.put("PRO", 'P'); 232 aminoAcids.put("HIS", 'H'); 233 aminoAcids.put("LYS", 'K'); 234 aminoAcids.put("ARG", 'R'); 235 aminoAcids.put("SER", 'S'); 236 aminoAcids.put("THR", 'T'); 237 aminoAcids.put("GLU", 'E'); 238 aminoAcids.put("GLN", 'Q'); 239 aminoAcids.put("ASP", 'D'); 240 aminoAcids.put("ASN", 'N'); 241 aminoAcids.put("CYS", 'C'); 242 aminoAcids.put("MET", 'M'); 243 // MSE is only found as a molecular replacement for MET 244 aminoAcids.put("MSE", 'M'); 245 // 'non-standard', genetically encoded 246 // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html 247 // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE' 248 // likewise 'PYL' (IUBMB) and 'PYH' (PDB) 249 aminoAcids.put("CSE", 'U'); 250 aminoAcids.put("SEC", 'U'); 251 aminoAcids.put("PYH", 'O'); 252 aminoAcids.put("PYL", 'O'); 253 254 hBondDonorAcceptors = new HashSet<Element>(); 255 hBondDonorAcceptors.add(Element.N); 256 hBondDonorAcceptors.add(Element.O); 257 hBondDonorAcceptors.add(Element.S); 258 259 } 260 261 /** 262 * Count how many Atoms are contained within a Structure object. 263 * 264 * @param s 265 * the structure object 266 * @return the number of Atoms in this Structure 267 */ 268 public static final int getNrAtoms(Structure s) { 269 270 int nrAtoms = 0; 271 272 Iterator<Group> iter = new GroupIterator(s); 273 274 while (iter.hasNext()) { 275 Group g = iter.next(); 276 nrAtoms += g.size(); 277 } 278 279 return nrAtoms; 280 } 281 282 /** 283 * Count how many groups are contained within a structure object. 284 * 285 * @param s 286 * the structure object 287 * @return the number of groups in the structure 288 */ 289 public static final int getNrGroups(Structure s) { 290 int nrGroups = 0; 291 292 List<Chain> chains = s.getChains(0); 293 for (Chain c : chains) { 294 nrGroups += c.getAtomLength(); 295 } 296 return nrGroups; 297 } 298 299 /** 300 * Returns an array of the requested Atoms from the Structure object. 301 * Iterates over all groups and checks if the requested atoms are in this 302 * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} 303 * group. If the group does not contain all requested atoms then no atoms 304 * are added for that group. For structures with more than one model, only 305 * model 0 will be used. 306 * 307 * @param s 308 * the structure to get the atoms from 309 * 310 * @param atomNames 311 * contains the atom names to be used. 312 * @return an Atom[] array 313 */ 314 public static final Atom[] getAtomArray(Structure s, String[] atomNames) { 315 List<Chain> chains = s.getModel(0); 316 317 List<Atom> atoms = new ArrayList<Atom>(); 318 319 extractAtoms(atomNames, chains, atoms); 320 return atoms.toArray(new Atom[atoms.size()]); 321 322 } 323 324 /** 325 * Returns an array of the requested Atoms from the Structure object. In 326 * contrast to {@link #getAtomArray(Structure, String[])} this method 327 * iterates over all chains. Iterates over all chains and groups and checks 328 * if the requested atoms are in this group, no matter if this is a 329 * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not 330 * contain all requested atoms then no atoms are added for that group. For 331 * structures with more than one model, only model 0 will be used. 332 * 333 * @param s 334 * the structure to get the atoms from 335 * 336 * @param atomNames 337 * contains the atom names to be used. 338 * @return an Atom[] array 339 */ 340 public static final Atom[] getAtomArrayAllModels(Structure s, 341 String[] atomNames) { 342 343 List<Atom> atoms = new ArrayList<Atom>(); 344 345 for (int i = 0; i < s.nrModels(); i++) { 346 List<Chain> chains = s.getModel(i); 347 extractAtoms(atomNames, chains, atoms); 348 } 349 return atoms.toArray(new Atom[atoms.size()]); 350 351 } 352 353 /** 354 * Convert all atoms of the structure (all models) into an Atom array 355 * 356 * @param s 357 * input structure 358 * @return all atom array 359 */ 360 public static final Atom[] getAllAtomArray(Structure s) { 361 List<Atom> atoms = new ArrayList<Atom>(); 362 363 AtomIterator iter = new AtomIterator(s); 364 while (iter.hasNext()) { 365 Atom a = iter.next(); 366 atoms.add(a); 367 } 368 return atoms.toArray(new Atom[atoms.size()]); 369 } 370 /** 371 * Convert all atoms of the structure (specified model) into an Atom array 372 * 373 * @param s 374 * input structure 375 * @return all atom array 376 */ 377 public static final Atom[] getAllAtomArray(Structure s, int model) { 378 List<Atom> atoms = new ArrayList<Atom>(); 379 380 AtomIterator iter = new AtomIterator(s,model); 381 while (iter.hasNext()) { 382 Atom a = iter.next(); 383 atoms.add(a); 384 } 385 return atoms.toArray(new Atom[atoms.size()]); 386 387 } 388 389 /** 390 * Returns and array of all atoms of the chain, including 391 * Hydrogens (if present) and all HETATOMs. Waters are not included. 392 * 393 * @param c 394 * input chain 395 * @return all atom array 396 */ 397 public static final Atom[] getAllAtomArray(Chain c) { 398 List<Atom> atoms = new ArrayList<Atom>(); 399 400 for (Group g : c.getAtomGroups()) { 401 if (g.isWater()) 402 continue; 403 for (Atom a : g.getAtoms()) { 404 atoms.add(a); 405 } 406 } 407 return atoms.toArray(new Atom[atoms.size()]); 408 } 409 410 /** 411 * List of groups from the structure not included in ca (e.g. ligands). 412 * 413 * Unaligned groups are searched from all chains referenced in ca, as well 414 * as any chains in the first model of the structure from ca[0], if any. 415 * 416 * @param ca an array of atoms 417 * @return 418 */ 419 public static List<Group> getUnalignedGroups(Atom[] ca) { 420 Set<Chain> chains = new HashSet<Chain>(); 421 Set<Group> caGroups = new HashSet<Group>(); 422 423 // Create list of all chains in this structure 424 Structure s = null; 425 if (ca.length > 0) { 426 Group g = ca[0].getGroup(); 427 if (g != null) { 428 Chain c = g.getChain(); 429 if (c != null) { 430 s = c.getStructure(); 431 } 432 } 433 } 434 if (s != null) { 435 // Add all chains from the structure 436 for (Chain c : s.getChains(0)) { 437 chains.add(c); 438 } 439 } 440 441 // Add groups and chains from ca 442 for (Atom a : ca) { 443 Group g = a.getGroup(); 444 if (g != null) { 445 caGroups.add(g); 446 447 Chain c = g.getChain(); 448 if (c != null) { 449 chains.add(c); 450 } 451 } 452 } 453 454 // Iterate through all chains, finding groups not in ca 455 List<Group> unadded = new ArrayList<Group>(); 456 for (Chain c : chains) { 457 for (Group g : c.getAtomGroups()) { 458 if (!caGroups.contains(g)) { 459 unadded.add(g); 460 } 461 } 462 } 463 return unadded; 464 } 465 466 /** 467 * Finds all ligand groups from the target which fall within the cutoff distance 468 * of some atom from the query set. 469 * 470 * @param target Set of groups including the ligands 471 * @param query Atom selection 472 * @param cutoff Distance from query atoms to consider, in angstroms 473 * @return All groups from the target with at least one atom within cutoff of a query atom 474 * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF 475 */ 476 public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) { 477 // Geometric hashing of the reduced structure 478 Grid grid = new Grid(cutoff); 479 grid.addAtoms(query); 480 481 List<Group> ligands = new ArrayList<>(); 482 for(Group g :target ) { 483 // don't worry about waters 484 if(g.isWater()) { 485 continue; 486 } 487 488 if(g.isPolymeric() ) { 489 // Polymers aren't ligands 490 continue; 491 } 492 493 // It is a ligand! 494 495 // Check that it's within cutoff of something in reduced 496 List<Atom> groupAtoms = g.getAtoms(); 497 if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) { 498 continue; 499 } 500 501 ligands.add(g); 502 } 503 return ligands; 504 } 505 506 /** 507 * Adds a particular group to a structure. A new chain will be created if necessary. 508 * 509 * <p>When adding multiple groups, pass the return value of one call as the 510 * chainGuess parameter of the next call for efficiency. 511 * <pre> 512 * Chain guess = null; 513 * for(Group g : groups) { 514 * guess = addGroupToStructure(s, g, guess ); 515 * } 516 * </pre> 517 * @param s structure to receive the group 518 * @param g group to add 519 * @param chainGuess (optional) If not null, should be a chain from s. Used 520 * to improve performance when adding many groups from the same chain 521 * @param clone Indicates whether the input group should be cloned before 522 * being added to the new chain 523 * @return the chain g was added to 524 */ 525 public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) { 526 synchronized(s) { 527 // Find or create the chain 528 String chainId = g.getChainId(); 529 assert !chainId.isEmpty(); 530 Chain chain; 531 if(chainGuess != null && chainGuess.getId() == chainId) { 532 // previously guessed chain 533 chain = chainGuess; 534 } else { 535 // Try to guess 536 chain = s.getChain(chainId, model); 537 if(chain == null) { 538 // no chain found 539 chain = new ChainImpl(); 540 chain.setId(chainId); 541 542 Chain oldChain = g.getChain(); 543 chain.setName(oldChain.getName()); 544 545 EntityInfo oldEntityInfo = oldChain.getEntityInfo(); 546 547 EntityInfo newEntityInfo = s.getEntityById(oldEntityInfo.getMolId()); 548 if( newEntityInfo == null ) { 549 newEntityInfo = new EntityInfo(oldEntityInfo); 550 s.addEntityInfo(newEntityInfo); 551 } 552 newEntityInfo.addChain(chain); 553 chain.setEntityInfo(newEntityInfo); 554 555 // TODO Do the seqres need to be cloned too? -SB 2016-10-7 556 chain.setSeqResGroups(oldChain.getSeqResGroups()); 557 chain.setSeqMisMatches(oldChain.getSeqMisMatches()); 558 559 s.addChain(chain,model); 560 } 561 } 562 563 // Add cloned group 564 if(clone) { 565 g = (Group)g.clone(); 566 } 567 chain.addGroup(g); 568 569 return chain; 570 } 571 } 572 573 /** 574 * Add a list of groups to a new structure. Chains will be automatically 575 * created in the new structure as needed. 576 * @param s structure to receive the group 577 * @param g group to add 578 * @param clone Indicates whether the input groups should be cloned before 579 * being added to the new chain 580 */ 581 public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) { 582 Chain chainGuess = null; 583 for(Group g : groups) { 584 chainGuess = addGroupToStructure(s, g, model, chainGuess, clone); 585 } 586 } 587 588 /** 589 * Expand a set of atoms into all groups from the same structure. 590 * 591 * If the structure is set, only the first atom is used (assuming all 592 * atoms come from the same original structure). 593 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 594 * searches all chains of all atoms for groups. 595 * @param atoms Sample of atoms 596 * @return All groups from all chains accessible from the input atoms 597 */ 598 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) { 599 return getAllGroupsFromSubset(atoms,null); 600 } 601 /** 602 * Expand a set of atoms into all groups from the same structure. 603 * 604 * If the structure is set, only the first atom is used (assuming all 605 * atoms come from the same original structure). 606 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 607 * searches all chains of all atoms for groups. 608 * @param atoms Sample of atoms 609 * @param types Type of groups to return (useful for getting only ligands, for instance). 610 * Null gets all groups. 611 * @return All groups from all chains accessible from the input atoms 612 */ 613 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) { 614 // Get the full structure 615 Structure s = null; 616 if (atoms.length > 0) { 617 Group g = atoms[0].getGroup(); 618 if (g != null) { 619 Chain c = g.getChain(); 620 if (c != null) { 621 s = c.getStructure(); 622 } 623 } 624 } 625 // Collect all groups from the structure 626 Set<Chain> allChains = new HashSet<>(); 627 if( s != null ) { 628 allChains.addAll(s.getChains()); 629 } 630 // In case the structure wasn't set, need to use ca chains too 631 for(Atom a : atoms) { 632 Group g = a.getGroup(); 633 if(g != null) { 634 Chain c = g.getChain(); 635 if( c != null ) { 636 allChains.add(c); 637 } 638 } 639 } 640 641 if(allChains.isEmpty() ) { 642 return Collections.emptySet(); 643 } 644 645 // Extract all ligand groups 646 Set<Group> full = new HashSet<>(); 647 for(Chain c : allChains) { 648 if(types == null) { 649 full.addAll(c.getAtomGroups()); 650 } else { 651 full.addAll(c.getAtomGroups(types)); 652 } 653 } 654 655 return full; 656 } 657 658 659 /** 660 * Returns and array of all non-Hydrogen atoms in the given Structure, 661 * optionally including HET atoms or not. Waters are not included. 662 * 663 * @param s 664 * @param hetAtoms 665 * if true HET atoms are included in array, if false they are not 666 * @return 667 */ 668 public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) { 669 AtomIterator iter = new AtomIterator(s); 670 return getAllNonHAtomArray(s, hetAtoms, iter); 671 } 672 /** 673 * Returns and array of all non-Hydrogen atoms in the given Structure, 674 * optionally including HET atoms or not. Waters are not included. 675 * 676 * @param s 677 * @param hetAtoms 678 * if true HET atoms are included in array, if false they are not 679 * @param modelNr Model number to draw atoms from 680 * @return 681 */ 682 public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) { 683 AtomIterator iter = new AtomIterator(s,modelNr); 684 return getAllNonHAtomArray(s, hetAtoms, iter); 685 } 686 private static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) { 687 List<Atom> atoms = new ArrayList<Atom>(); 688 689 while (iter.hasNext()) { 690 Atom a = iter.next(); 691 if (a.getElement() == Element.H) 692 continue; 693 694 Group g = a.getGroup(); 695 696 if (g.isWater()) 697 continue; 698 699 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 700 continue; 701 702 atoms.add(a); 703 } 704 return atoms.toArray(new Atom[atoms.size()]); 705 } 706 707 /** 708 * Returns and array of all non-Hydrogen atoms in the given Chain, 709 * optionally including HET atoms or not Waters are not included. 710 * 711 * @param c 712 * @param hetAtoms 713 * if true HET atoms are included in array, if false they are not 714 * @return 715 */ 716 public static final Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) { 717 List<Atom> atoms = new ArrayList<Atom>(); 718 719 for (Group g : c.getAtomGroups()) { 720 if (g.isWater()) 721 continue; 722 for (Atom a : g.getAtoms()) { 723 724 if (a.getElement() == Element.H) 725 continue; 726 727 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 728 continue; 729 730 atoms.add(a); 731 } 732 } 733 return atoms.toArray(new Atom[atoms.size()]); 734 } 735 736 /** 737 * Returns and array of all non-Hydrogen atoms coordinates in the given Chain, 738 * optionally including HET atoms or not Waters are not included. 739 * 740 * @param c 741 * @param hetAtoms 742 * if true HET atoms are included in array, if false they are not 743 * @return 744 */ 745 public static final Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) { 746 List<Point3d> atoms = new ArrayList<Point3d>(); 747 748 for (Group g : c.getAtomGroups()) { 749 if (g.isWater()) 750 continue; 751 for (Atom a : g.getAtoms()) { 752 753 if (a.getElement() == Element.H) 754 continue; 755 756 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 757 continue; 758 759 atoms.add(a.getCoordsAsPoint3d()); 760 } 761 } 762 return atoms.toArray(new Point3d[atoms.size()]); 763 } 764 765 /** 766 * Adds to the given atoms list, all atoms of groups that contained all 767 * requested atomNames, i.e. if a group does not contain all of the 768 * requested atom names, its atoms won't be added. 769 * 770 * @param atomNames 771 * @param chains 772 * @param atoms 773 */ 774 private static void extractAtoms(String[] atomNames, List<Chain> chains, 775 List<Atom> atoms) { 776 777 for (Chain c : chains) { 778 779 for (Group g : c.getAtomGroups()) { 780 781 // a temp container for the atoms of this group 782 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 783 // flag to check if this group contains all the requested atoms. 784 boolean thisGroupAllAtoms = true; 785 for (String atomName : atomNames) { 786 Atom a = g.getAtom(atomName); 787 788 if (a == null) { 789 // this group does not have a required atom, skip it... 790 thisGroupAllAtoms = false; 791 break; 792 } 793 thisGroupAtoms.add(a); 794 } 795 if (thisGroupAllAtoms) { 796 // add the atoms of this group to the array. 797 for (Atom a : thisGroupAtoms) { 798 atoms.add(a); 799 } 800 } 801 802 } 803 } 804 } 805 806 /** 807 * Returns an array of the requested Atoms from the Chain object. Iterates 808 * over all groups and checks if the requested atoms are in this group, no 809 * matter if this is a AminoAcid or Hetatom group. If the group does not 810 * contain all requested atoms then no atoms are added for that group. 811 * 812 * @param c 813 * the Chain to get the atoms from 814 * 815 * @param atomNames 816 * contains the atom names to be used. 817 * @return an Atom[] array 818 */ 819 public static final Atom[] getAtomArray(Chain c, String[] atomNames) { 820 821 List<Atom> atoms = new ArrayList<Atom>(); 822 823 for (Group g : c.getAtomGroups()) { 824 825 // a temp container for the atoms of this group 826 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 827 // flag to check if this group contains all the requested atoms. 828 boolean thisGroupAllAtoms = true; 829 for (String atomName : atomNames) { 830 Atom a = g.getAtom(atomName); 831 if (a == null) { 832 logger.debug("Group " + g.getResidueNumber() + " (" 833 + g.getPDBName() 834 + ") does not have the required atom '" + atomName 835 + "'"); 836 // this group does not have a required atom, skip it... 837 thisGroupAllAtoms = false; 838 break; 839 } 840 thisGroupAtoms.add(a); 841 } 842 843 if (thisGroupAllAtoms) { 844 // add the atoms of this group to the array. 845 for (Atom a : thisGroupAtoms) { 846 atoms.add(a); 847 } 848 } 849 850 } 851 return atoms.toArray(new Atom[atoms.size()]); 852 853 } 854 855 /** 856 * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and 857 * has CA name will be returned. 858 * 859 * @param c 860 * the structure object 861 * @return an Atom[] array 862 * @see #getRepresentativeAtomArray(Chain) 863 */ 864 public static final Atom[] getAtomCAArray(Chain c) { 865 List<Atom> atoms = new ArrayList<Atom>(); 866 867 for (Group g : c.getAtomGroups()) { 868 if (g.hasAtom(CA_ATOM_NAME) 869 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 870 atoms.add(g.getAtom(CA_ATOM_NAME)); 871 } 872 } 873 874 return atoms.toArray(new Atom[atoms.size()]); 875 } 876 877 /** 878 * Gets a representative atom for each group that is part of the chain 879 * backbone. Note that modified aminoacids won't be returned as part of the 880 * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the 881 * structure. 882 * 883 * For amino acids, the representative is a CA carbon. For nucleotides, the 884 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 885 * types will be ignored. 886 * 887 * @param c 888 * @return representative Atoms of the chain backbone 889 * @since Biojava 4.1.0 890 */ 891 public static final Atom[] getRepresentativeAtomArray(Chain c) { 892 List<Atom> atoms = new ArrayList<Atom>(); 893 894 for (Group g : c.getAtomGroups()) { 895 896 switch (g.getType()) { 897 case AMINOACID: 898 if (g.hasAtom(CA_ATOM_NAME) 899 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 900 atoms.add(g.getAtom(CA_ATOM_NAME)); 901 } 902 break; 903 case NUCLEOTIDE: 904 if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) { 905 atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE)); 906 } 907 break; 908 default: 909 // don't add 910 } 911 } 912 913 return atoms.toArray(new Atom[atoms.size()]); 914 915 } 916 917 /** 918 * Provides an equivalent copy of Atoms in a new array. Clones everything, 919 * starting with parent groups and chains. The chain will only contain 920 * groups that are part of the input array. 921 * 922 * @param ca 923 * array of representative atoms, e.g. CA atoms 924 * @return Atom array 925 * @since Biojava 4.1.0 926 */ 927 public static final Atom[] cloneAtomArray(Atom[] ca) { 928 Atom[] newCA = new Atom[ca.length]; 929 930 List<Chain> model = new ArrayList<Chain>(); 931 int apos = -1; 932 for (Atom a : ca) { 933 apos++; 934 Group parentG = a.getGroup(); 935 Chain parentC = parentG.getChain(); 936 937 Chain newChain = null; 938 for (Chain c : model) { 939 if (c.getName().equals(parentC.getName())) { 940 newChain = c; 941 break; 942 } 943 } 944 if (newChain == null) { 945 newChain = new ChainImpl(); 946 newChain.setId(parentC.getId()); 947 newChain.setName(parentC.getName()); 948 model.add(newChain); 949 } 950 951 Group parentN = (Group) parentG.clone(); 952 953 newCA[apos] = parentN.getAtom(a.getName()); 954 try { 955 // if the group doesn't exist yet, this produces a StructureException 956 newChain.getGroupByPDB(parentN.getResidueNumber()); 957 } catch (StructureException e) { 958 // the group doesn't exist yet in the newChain, let's add it 959 newChain.addGroup(parentN); 960 } 961 962 } 963 return newCA; 964 } 965 966 /** 967 * Clone a set of representative Atoms, but returns the parent groups 968 * 969 * @param ca 970 * Atom array 971 * @return Group array 972 */ 973 public static Group[] cloneGroups(Atom[] ca) { 974 Group[] newGroup = new Group[ca.length]; 975 976 List<Chain> model = new ArrayList<Chain>(); 977 int apos = -1; 978 for (Atom a : ca) { 979 apos++; 980 Group parentG = a.getGroup(); 981 Chain parentC = parentG.getChain(); 982 983 Chain newChain = null; 984 for (Chain c : model) { 985 if (c.getName().equals(parentC.getName())) { 986 newChain = c; 987 break; 988 } 989 } 990 if (newChain == null) { 991 newChain = new ChainImpl(); 992 newChain.setName(parentC.getName()); 993 model.add(newChain); 994 } 995 996 Group ng = (Group) parentG.clone(); 997 newGroup[apos] = ng; 998 newChain.addGroup(ng); 999 } 1000 return newGroup; 1001 } 1002 1003 /** 1004 * Utility method for working with circular permutations. Creates a 1005 * duplicated and cloned set of Calpha atoms from the input array. 1006 * 1007 * @param ca2 1008 * atom array 1009 * @return cloned and duplicated set of input array 1010 */ 1011 public static Atom[] duplicateCA2(Atom[] ca2) { 1012 // we don't want to rotate input atoms, do we? 1013 Atom[] ca2clone = new Atom[ca2.length * 2]; 1014 1015 int pos = 0; 1016 1017 Chain c = null; 1018 String prevChainId = ""; 1019 for (Atom a : ca2) { 1020 Group g = (Group) a.getGroup().clone(); // works because each group 1021 // has only a single atom 1022 1023 if (c == null) { 1024 c = new ChainImpl(); 1025 Chain orig = a.getGroup().getChain(); 1026 c.setId(orig.getId()); 1027 c.setName(orig.getName()); 1028 } else { 1029 Chain orig = a.getGroup().getChain(); 1030 if (!orig.getId().equals(prevChainId)) { 1031 c = new ChainImpl(); 1032 c.setId(orig.getId()); 1033 c.setName(orig.getName()); 1034 } 1035 } 1036 1037 c.addGroup(g); 1038 ca2clone[pos] = g.getAtom(a.getName()); 1039 1040 pos++; 1041 } 1042 1043 // Duplicate ca2! 1044 c = null; 1045 prevChainId = ""; 1046 for (Atom a : ca2) { 1047 Group g = (Group) a.getGroup().clone(); 1048 1049 if (c == null) { 1050 c = new ChainImpl(); 1051 Chain orig = a.getGroup().getChain(); 1052 c.setId(orig.getId()); 1053 c.setName(orig.getName()); 1054 } else { 1055 Chain orig = a.getGroup().getChain(); 1056 if (!orig.getId().equals(prevChainId)) { 1057 c = new ChainImpl(); 1058 c.setId(orig.getId()); 1059 c.setName(orig.getName()); 1060 } 1061 } 1062 1063 c.addGroup(g); 1064 ca2clone[pos] = g.getAtom(a.getName()); 1065 1066 pos++; 1067 } 1068 1069 return ca2clone; 1070 1071 } 1072 1073 /** 1074 * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and 1075 * has CA name will be returned. 1076 * 1077 * @param s 1078 * the structure object 1079 * @return an Atom[] array 1080 * @see #getRepresentativeAtomArray(Structure) 1081 */ 1082 public static Atom[] getAtomCAArray(Structure s) { 1083 1084 List<Atom> atoms = new ArrayList<Atom>(); 1085 1086 for (Chain c : s.getChains()) { 1087 for (Group g : c.getAtomGroups()) { 1088 if (g.hasAtom(CA_ATOM_NAME) 1089 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 1090 atoms.add(g.getAtom(CA_ATOM_NAME)); 1091 } 1092 } 1093 } 1094 1095 return atoms.toArray(new Atom[atoms.size()]); 1096 } 1097 1098 /** 1099 * Gets a representative atom for each group that is part of the chain 1100 * backbone. Note that modified aminoacids won't be returned as part of the 1101 * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the 1102 * structure. 1103 * 1104 * For amino acids, the representative is a CA carbon. For nucleotides, the 1105 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 1106 * types will be ignored. 1107 * 1108 * @param s 1109 * Input structure 1110 * @return representative Atoms of the structure backbone 1111 * @since Biojava 4.1.0 1112 */ 1113 public static Atom[] getRepresentativeAtomArray(Structure s) { 1114 1115 List<Atom> atoms = new ArrayList<Atom>(); 1116 1117 for (Chain c : s.getChains()) { 1118 Atom[] chainAtoms = getRepresentativeAtomArray(c); 1119 for (Atom a : chainAtoms) { 1120 atoms.add(a); 1121 } 1122 } 1123 1124 return atoms.toArray(new Atom[atoms.size()]); 1125 } 1126 1127 /** 1128 * Return an Atom array of the main chain atoms: CA, C, N, O Any group that 1129 * contains those atoms will be included, be it a standard aminoacid or not 1130 * 1131 * @param s 1132 * the structure object 1133 * @return an Atom[] array 1134 */ 1135 public static Atom[] getBackboneAtomArray(Structure s) { 1136 1137 List<Atom> atoms = new ArrayList<Atom>(); 1138 1139 for (Chain c : s.getChains()) { 1140 for (Group g : c.getAtomGroups()) { 1141 if (g.hasAminoAtoms()) { 1142 // this means we will only take atoms grom groups that have 1143 // complete backbones 1144 for (Atom a : g.getAtoms()) { 1145 switch (g.getType()) { 1146 case NUCLEOTIDE: 1147 // Nucleotide backbone 1148 if (a.getName().equals(C1_ATOM_NAME)) 1149 atoms.add(a); 1150 if (a.getName().equals(C2_ATOM_NAME)) 1151 atoms.add(a); 1152 if (a.getName().equals(C3_ATOM_NAME)) 1153 atoms.add(a); 1154 if (a.getName().equals(C4_ATOM_NAME)) 1155 atoms.add(a); 1156 if (a.getName().equals(O2_ATOM_NAME)) 1157 atoms.add(a); 1158 if (a.getName().equals(O3_ATOM_NAME)) 1159 atoms.add(a); 1160 if (a.getName().equals(O4_ATOM_NAME)) 1161 atoms.add(a); 1162 if (a.getName().equals(O5_ATOM_NAME)) 1163 atoms.add(a); 1164 if (a.getName().equals(OP1_ATOM_NAME)) 1165 atoms.add(a); 1166 if (a.getName().equals(OP2_ATOM_NAME)) 1167 atoms.add(a); 1168 if (a.getName().equals(P_ATOM_NAME)) 1169 atoms.add(a); 1170 // TODO Allow C4* names as well as C4'? -SB 3/2015 1171 break; 1172 case AMINOACID: 1173 default: 1174 // we do it this way instead of with g.getAtom() to 1175 // be sure we always use the same order as original 1176 if (a.getName().equals(CA_ATOM_NAME)) 1177 atoms.add(a); 1178 if (a.getName().equals(C_ATOM_NAME)) 1179 atoms.add(a); 1180 if (a.getName().equals(N_ATOM_NAME)) 1181 atoms.add(a); 1182 if (a.getName().equals(O_ATOM_NAME)) 1183 atoms.add(a); 1184 break; 1185 } 1186 } 1187 } 1188 } 1189 1190 } 1191 1192 return atoms.toArray(new Atom[atoms.size()]); 1193 } 1194 1195 /** 1196 * Convert three character amino acid codes into single character e.g. 1197 * convert CYS to C. Valid 3-letter codes will be those of the standard 20 1198 * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids} 1199 * map) 1200 * 1201 * @return the 1 letter code, or null if the given 3 letter code does not 1202 * correspond to an amino acid code 1203 * @param groupCode3 1204 * a three character amino acid representation String 1205 * @see {@link #get1LetterCode(String)} 1206 */ 1207 public static final Character get1LetterCodeAmino(String groupCode3) { 1208 return aminoAcids.get(groupCode3); 1209 } 1210 1211 /** 1212 * Convert a three letter amino acid or nucleotide code into a single 1213 * character code. If the code does not correspond to an amino acid or 1214 * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}. 1215 * 1216 * Returned null for nucleotides prior to version 4.0.1. 1217 * 1218 * @param groupCode3 1219 * three letter representation 1220 * @return The 1-letter abbreviation 1221 */ 1222 public static final Character get1LetterCode(String groupCode3) { 1223 1224 Character code1; 1225 1226 // is it a standard amino acid ? 1227 code1 = get1LetterCodeAmino(groupCode3); 1228 1229 if (code1 == null) { 1230 // hm groupCode3 is not standard 1231 // perhaps it is a nucleotide? 1232 groupCode3 = groupCode3.trim(); 1233 if (isNucleotide(groupCode3)) { 1234 code1 = nucleotides30.get(groupCode3); 1235 if (code1 == null) { 1236 code1 = nucleotides23.get(groupCode3); 1237 } 1238 if (code1 == null) { 1239 code1 = UNKNOWN_GROUP_LABEL; 1240 } 1241 } else { 1242 // does not seem to be so let's assume it is 1243 // nonstandard aminoacid and label it "X" 1244 // logger.warning("unknown group name "+groupCode3 ); 1245 code1 = UNKNOWN_GROUP_LABEL; 1246 } 1247 } 1248 1249 return code1; 1250 1251 } 1252 1253 /** 1254 * Test if the three-letter code of an ATOM entry corresponds to a 1255 * nucleotide or to an aminoacid. 1256 * 1257 * @param groupCode3 1258 * 3-character code for a group. 1259 * 1260 */ 1261 public static final boolean isNucleotide(String groupCode3) { 1262 String code = groupCode3.trim(); 1263 return nucleotides30.containsKey(code) 1264 || nucleotides23.containsKey(code); 1265 } 1266 1267 /** 1268 * Reduce a structure to provide a smaller representation . Only takes the 1269 * first model of the structure. If chainName is provided only return a 1270 * structure containing that Chain ID. Converts lower case chain IDs to 1271 * upper case if structure does not contain a chain with that ID. 1272 * 1273 * @param s 1274 * @param chainId 1275 * @return Structure 1276 * @since 3.0 1277 * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0) 1278 */ 1279 @Deprecated 1280 public static final Structure getReducedStructure(Structure s, 1281 String chainId) throws StructureException { 1282 // since we deal here with structure alignments, 1283 // only use Model 1... 1284 1285 Structure newS = new StructureImpl(); 1286 newS.setPDBCode(s.getPDBCode()); 1287 newS.setPDBHeader(s.getPDBHeader()); 1288 newS.setName(s.getName()); 1289 newS.setSSBonds(s.getSSBonds()); 1290 newS.setDBRefs(s.getDBRefs()); 1291 newS.setSites(s.getSites()); 1292 newS.setBiologicalAssembly(s.isBiologicalAssembly()); 1293 newS.setEntityInfos(s.getEntityInfos()); 1294 newS.setSSBonds(s.getSSBonds()); 1295 newS.setSites(s.getSites()); 1296 1297 if (chainId != null) 1298 chainId = chainId.trim(); 1299 1300 if (chainId == null || chainId.equals("")) { 1301 // only get model 0 1302 List<Chain> model0 = s.getModel(0); 1303 for (Chain c : model0) { 1304 newS.addChain(c); 1305 } 1306 return newS; 1307 1308 } 1309 1310 Chain c = null; 1311 try { 1312 c = s.getChainByPDB(chainId); 1313 } catch (StructureException e) { 1314 logger.warn(e.getMessage() + ". Chain id " + chainId 1315 + " did not match, trying upper case Chain id."); 1316 c = s.getChainByPDB(chainId.toUpperCase()); 1317 1318 } 1319 if (c != null) { 1320 newS.addChain(c); 1321 for (EntityInfo comp : s.getEntityInfos()) { 1322 if (comp.getChainIds() != null 1323 && comp.getChainIds().contains(c.getChainID())) { 1324 // found matching entity info. set description... 1325 newS.getPDBHeader().setDescription( 1326 "Chain " + c.getChainID() + " of " + s.getPDBCode() 1327 + " " + comp.getDescription()); 1328 } 1329 } 1330 } 1331 1332 return newS; 1333 } 1334 1335 public static final String convertAtomsToSeq(Atom[] atoms) { 1336 1337 StringBuilder buf = new StringBuilder(); 1338 Group prevGroup = null; 1339 for (Atom a : atoms) { 1340 Group g = a.getGroup(); 1341 if (prevGroup != null) { 1342 if (prevGroup.equals(g)) { 1343 // we add each group only once. 1344 continue; 1345 } 1346 } 1347 String code3 = g.getPDBName(); 1348 Character code1 = get1LetterCodeAmino(code3); 1349 if (code1 == null) 1350 code1 = UNKNOWN_GROUP_LABEL; 1351 1352 buf.append(code1); 1353 1354 prevGroup = g; 1355 1356 } 1357 return buf.toString(); 1358 } 1359 1360 /** 1361 * Get a group represented by a ResidueNumber. 1362 * 1363 * @param struc 1364 * a {@link Structure} 1365 * @param pdbResNum 1366 * a {@link ResidueNumber} 1367 * @return a group in the structure that is represented by the pdbResNum. 1368 * @throws StructureException 1369 * if the group cannot be found. 1370 */ 1371 public static final Group getGroupByPDBResidueNumber(Structure struc, 1372 ResidueNumber pdbResNum) throws StructureException { 1373 if (struc == null || pdbResNum == null) { 1374 throw new IllegalArgumentException("Null argument(s)."); 1375 } 1376 1377 Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName()); 1378 1379 return chain.getGroupByPDB(pdbResNum); 1380 } 1381 1382 /** 1383 * Returns the set of intra-chain contacts for the given chain for given 1384 * atom names, i.e. the contact map. Uses a geometric hashing algorithm that 1385 * speeds up the calculation without need of full distance matrix. The 1386 * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs 1387 * to be set to true for this to work. 1388 * 1389 * @param chain 1390 * @param atomNames 1391 * the array with atom names to be used. Beware: CA will do both 1392 * C-alphas an Calciums if null all non-H atoms of non-hetatoms 1393 * will be used 1394 * @param cutoff 1395 * @return 1396 */ 1397 public static AtomContactSet getAtomsInContact(Chain chain, 1398 String[] atomNames, double cutoff) { 1399 Grid grid = new Grid(cutoff); 1400 1401 Atom[] atoms = null; 1402 if (atomNames == null) { 1403 atoms = getAllNonHAtomArray(chain, false); 1404 } else { 1405 atoms = getAtomArray(chain, atomNames); 1406 } 1407 // If tha 1408 if(atoms.length==0){ 1409 logger.warn("No atoms found for buidling grid!"); 1410 return new AtomContactSet(cutoff); 1411 } 1412 grid.addAtoms(atoms); 1413 1414 return grid.getAtomContacts(); 1415 } 1416 1417 /** 1418 * Returns the set of intra-chain contacts for the given chain for all non-H 1419 * atoms of non-hetatoms, i.e. the contact map. Uses a geometric hashing 1420 * algorithm that speeds up the calculation without need of full distance 1421 * matrix. The parsing mode 1422 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1423 * true for this to work. 1424 * 1425 * @param chain 1426 * @param cutoff 1427 * @return 1428 */ 1429 public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) { 1430 return getAtomsInContact(chain, (String[]) null, cutoff); 1431 } 1432 1433 /** 1434 * Returns the set of intra-chain contacts for the given chain for C-alpha 1435 * atoms (including non-standard aminoacids appearing as HETATM groups), 1436 * i.e. the contact map. Uses a geometric hashing algorithm that speeds up 1437 * the calculation without need of full distance matrix. The parsing mode 1438 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1439 * true for this to work. 1440 * 1441 * @param chain 1442 * @param cutoff 1443 * @return 1444 * @see {@link #getRepresentativeAtomsInContact(Chain, double)} 1445 */ 1446 public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) { 1447 Grid grid = new Grid(cutoff); 1448 1449 Atom[] atoms = getAtomCAArray(chain); 1450 1451 grid.addAtoms(atoms); 1452 1453 return grid.getAtomContacts(); 1454 } 1455 1456 /** 1457 * Returns the set of intra-chain contacts for the given chain for C-alpha 1458 * or C3' atoms (including non-standard aminoacids appearing as HETATM 1459 * groups), i.e. the contact map. Uses a geometric hashing algorithm that 1460 * speeds up the calculation without need of full distance matrix. 1461 * 1462 * @param chain 1463 * @param cutoff 1464 * @return 1465 * @since Biojava 4.1.0 1466 */ 1467 public static AtomContactSet getRepresentativeAtomsInContact(Chain chain, 1468 double cutoff) { 1469 Grid grid = new Grid(cutoff); 1470 1471 Atom[] atoms = getRepresentativeAtomArray(chain); 1472 1473 grid.addAtoms(atoms); 1474 1475 return grid.getAtomContacts(); 1476 } 1477 1478 /** 1479 * Returns the set of inter-chain contacts between the two given chains for 1480 * the given atom names. Uses a geometric hashing algorithm that speeds up 1481 * the calculation without need of full distance matrix. The parsing mode 1482 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1483 * true for this to work. 1484 * 1485 * @param chain1 1486 * @param chain2 1487 * @param atomNames 1488 * the array with atom names to be used. For Calphas use {"CA"}, 1489 * if null all non-H atoms will be used. Note HET atoms are 1490 * ignored unless this parameter is null. 1491 * @param cutoff 1492 * @param hetAtoms 1493 * if true HET atoms are included, if false they are not 1494 * @return 1495 */ 1496 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1497 String[] atomNames, double cutoff, boolean hetAtoms) { 1498 Grid grid = new Grid(cutoff); 1499 Atom[] atoms1 = null; 1500 Atom[] atoms2 = null; 1501 if (atomNames == null) { 1502 atoms1 = getAllNonHAtomArray(chain1, hetAtoms); 1503 atoms2 = getAllNonHAtomArray(chain2, hetAtoms); 1504 } else { 1505 atoms1 = getAtomArray(chain1, atomNames); 1506 atoms2 = getAtomArray(chain2, atomNames); 1507 } 1508 grid.addAtoms(atoms1, atoms2); 1509 1510 return grid.getAtomContacts(); 1511 } 1512 1513 /** 1514 * Returns the set of inter-chain contacts between the two given chains for 1515 * all non-H atoms. Uses a geometric hashing algorithm that speeds up the 1516 * calculation without need of full distance matrix. The parsing mode 1517 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1518 * true for this to work. 1519 * 1520 * @param chain1 1521 * @param chain2 1522 * @param cutoff 1523 * @param hetAtoms 1524 * if true HET atoms are included, if false they are not 1525 * @return 1526 */ 1527 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1528 double cutoff, boolean hetAtoms) { 1529 return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms); 1530 } 1531 1532 /** 1533 * Finds Groups in {@code structure} that contain at least one Atom that is 1534 * within {@code radius} Angstroms of {@code centroid}. 1535 * 1536 * @param structure 1537 * The structure from which to find Groups 1538 * @param centroid 1539 * The centroid of the shell 1540 * @param excludeResidues 1541 * A list of ResidueNumbers to exclude 1542 * @param radius 1543 * The radius from {@code centroid}, in Angstroms 1544 * @param includeWater 1545 * Whether to include Groups whose <em>only</em> atoms are water 1546 * @param useAverageDistance 1547 * When set to true, distances are the arithmetic mean (1-norm) 1548 * of the distances of atoms that belong to the group and that 1549 * are within the shell; otherwise, distances are the minimum of 1550 * these values 1551 * @return A map of Groups within (or partially within) the shell, to their 1552 * distances in Angstroms 1553 */ 1554 public static Map<Group, Double> getGroupDistancesWithinShell( 1555 Structure structure, Atom centroid, 1556 Set<ResidueNumber> excludeResidues, double radius, 1557 boolean includeWater, boolean useAverageDistance) { 1558 1559 // for speed, we avoid calculating square roots 1560 radius = radius * radius; 1561 1562 Map<Group, Double> distances = new HashMap<Group, Double>(); 1563 1564 // we only need this if we're averaging distances 1565 // note that we can't use group.getAtoms().size() because some the 1566 // group's atoms be outside the shell 1567 Map<Group, Integer> atomCounts = new HashMap<Group, Integer>(); 1568 1569 for (Chain chain : structure.getChains()) { 1570 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1571 1572 // exclude water 1573 if (!includeWater && chainGroup.isWater()) 1574 continue; 1575 1576 // check blacklist of residue numbers 1577 for (ResidueNumber rn : excludeResidues) { 1578 if (rn.equals(chainGroup.getResidueNumber())) 1579 continue groupLoop; 1580 } 1581 1582 for (Atom testAtom : chainGroup.getAtoms()) { 1583 1584 // use getDistanceFast as we are doing a lot of comparisons 1585 double dist = Calc.getDistanceFast(centroid, testAtom); 1586 1587 // if we're the shell 1588 if (dist <= radius) { 1589 if (!distances.containsKey(chainGroup)) 1590 distances.put(chainGroup, Double.POSITIVE_INFINITY); 1591 if (useAverageDistance) { 1592 // sum the distance; we'll divide by the total 1593 // number later 1594 // here, we CANNOT use fastDistance (distance 1595 // squared) because we want the arithmetic mean 1596 distances.put(chainGroup, distances.get(chainGroup) 1597 + Math.sqrt(dist)); 1598 if (!atomCounts.containsKey(chainGroup)) 1599 atomCounts.put(chainGroup, 0); 1600 atomCounts.put(chainGroup, 1601 atomCounts.get(chainGroup) + 1); 1602 } else { 1603 // take the minimum distance among all atoms of 1604 // chainGroup 1605 // note that we can't break here because we might 1606 // find a smaller distance 1607 if (dist < distances.get(chainGroup)) { 1608 distances.put(chainGroup, dist); 1609 } 1610 } 1611 } 1612 1613 } 1614 } 1615 } 1616 1617 if (useAverageDistance) { 1618 for (Map.Entry<Group, Double> entry : distances.entrySet()) { 1619 int count = atomCounts.get(entry.getKey()); 1620 distances.put(entry.getKey(), entry.getValue() / count); 1621 } 1622 } else { 1623 // in this case we used getDistanceFast 1624 for (Map.Entry<Group, Double> entry : distances.entrySet()) { 1625 distances.put(entry.getKey(), Math.sqrt(entry.getValue())); 1626 } 1627 } 1628 1629 return distances; 1630 1631 } 1632 1633 public static Set<Group> getGroupsWithinShell(Structure structure, 1634 Atom atom, Set<ResidueNumber> excludeResidues, double distance, 1635 boolean includeWater) { 1636 1637 // square the distance to use as a comparison against getDistanceFast 1638 // which returns the square of a distance. 1639 distance = distance * distance; 1640 1641 Set<Group> returnSet = new LinkedHashSet<Group>(); 1642 for (Chain chain : structure.getChains()) { 1643 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1644 if (!includeWater && chainGroup.isWater()) 1645 continue; 1646 for (ResidueNumber rn : excludeResidues) { 1647 if (rn.equals(chainGroup.getResidueNumber())) 1648 continue groupLoop; 1649 } 1650 for (Atom atomB : chainGroup.getAtoms()) { 1651 1652 // use getDistanceFast as we are doing a lot of comparisons 1653 double dist = Calc.getDistanceFast(atom, atomB); 1654 if (dist <= distance) { 1655 returnSet.add(chainGroup); 1656 break; 1657 } 1658 1659 } 1660 } 1661 } 1662 return returnSet; 1663 } 1664 1665 /** 1666 * <p> 1667 * Returns a Set of Groups in a structure within the distance specified of a 1668 * given group. 1669 * </p> 1670 * <p> 1671 * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of 1672 * Groups returned 1673 * 1674 * @param structure 1675 * The structure to work with 1676 * @param group 1677 * The 'query' group 1678 * @param distance 1679 * The cutoff distance 1680 * @param includeWater 1681 * Should water residues be included in the output? 1682 * @return {@link LinkedHashSet} of {@link Group}s within at least one atom 1683 * with {@code distance} of at least one atom in {@code group} 1684 */ 1685 public static Set<Group> getGroupsWithinShell(Structure structure, 1686 Group group, double distance, boolean includeWater) { 1687 1688 Set<Group> returnList = new LinkedHashSet<Group>(); 1689 1690 Set<ResidueNumber> excludeGroups = new HashSet<ResidueNumber>(); 1691 excludeGroups.add(group.getResidueNumber()); 1692 for (Atom atom : group.getAtoms()) { 1693 Set<Group> set = getGroupsWithinShell(structure, atom, 1694 excludeGroups, distance, includeWater); 1695 returnList.addAll(set); 1696 } 1697 1698 return returnList; 1699 } 1700 1701 /** 1702 * Remove all models from a Structure and keep only the first 1703 * 1704 * @param s 1705 * original Structure 1706 * @return a structure that contains only the first model 1707 * @since 3.0.5 1708 */ 1709 public static Structure removeModels(Structure s) { 1710 if (s.nrModels() == 1) 1711 return s; 1712 1713 Structure n = new StructureImpl(); 1714 // go through whole substructure and clone ... 1715 1716 // copy structure data 1717 1718 n.setPDBCode(s.getPDBCode()); 1719 n.setName(s.getName()); 1720 1721 // TODO: do deep copying of data! 1722 n.setPDBHeader(s.getPDBHeader()); 1723 n.setDBRefs(s.getDBRefs()); 1724 1725 n.setSites(s.getSites()); 1726 1727 n.setChains(s.getModel(0)); 1728 1729 return n; 1730 1731 } 1732 1733 /** 1734 * Removes all polymeric and solvent groups from a list of groups 1735 * 1736 */ 1737 public static List<Group> filterLigands(List<Group> allGroups) { 1738 1739 List<Group> groups = new ArrayList<Group>(); 1740 for (Group g : allGroups) { 1741 1742 if ( g.isPolymeric()) 1743 continue; 1744 1745 if (!g.isWater()) { 1746 groups.add(g); 1747 } 1748 } 1749 1750 return groups; 1751 } 1752 1753 /** 1754 * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)} 1755 * which creates new parsers when needed 1756 * 1757 * @param name 1758 * @return 1759 * @throws IOException 1760 * @throws StructureException 1761 */ 1762 public static Structure getStructure(String name) throws IOException, 1763 StructureException { 1764 return StructureTools.getStructure(name, null, null); 1765 } 1766 1767 /** 1768 * Flexibly get a structure from an input String. The intent of this method 1769 * is to allow any reasonable string which could refer to a structure to be 1770 * correctly parsed. The following are currently supported: 1771 * <ol> 1772 * <li>Filename (if name refers to an existing file) 1773 * <li>PDB ID 1774 * <li>SCOP domains 1775 * <li>PDP domains 1776 * <li>Residue ranges 1777 * <li>Other formats supported by AtomCache 1778 * </ol> 1779 * 1780 * @param name 1781 * Some reference to the protein structure 1782 * @param parser 1783 * A clean PDBFileParser to use if it is a file. If null, a 1784 * PDBFileParser will be instantiated if needed. 1785 * @param cache 1786 * An AtomCache to use if the structure can be fetched from the 1787 * PDB. If null, a AtomCache will be instantiated if needed. 1788 * @return A Structure object 1789 * @throws IOException 1790 * if name is an existing file, but doesn't parse correctly 1791 * @throws StructureException 1792 * if the format is unknown, or if AtomCache throws an 1793 * exception. 1794 */ 1795 public static Structure getStructure(String name, PDBFileParser parser, 1796 AtomCache cache) throws IOException, StructureException { 1797 File f = new File(FileDownloadUtils.expandUserHome(name)); 1798 if (f.exists()) { 1799 if (parser == null) { 1800 parser = new PDBFileParser(); 1801 } 1802 InputStream inStream = new FileInputStream(f); 1803 return parser.parsePDBFile(inStream); 1804 } else { 1805 if (cache == null) { 1806 cache = new AtomCache(); 1807 } 1808 return cache.getStructure(name); 1809 } 1810 } 1811 1812 /** 1813 * @deprecated use {@link Chain#isProtein()} instead. 1814 */ 1815 @Deprecated 1816 public static boolean isProtein(Chain c) { 1817 1818 return c.isProtein(); 1819 } 1820 1821 /** 1822 * @deprecated use {@link Chain#isNucleicAcid()} instead. 1823 */ 1824 @Deprecated 1825 public static boolean isNucleicAcid(Chain c) { 1826 return c.isNucleicAcid(); 1827 } 1828 1829 /** 1830 * @deprecated use {@link Chain#getPredominantGroupType()} instead. 1831 */ 1832 @Deprecated 1833 public static GroupType getPredominantGroupType(Chain c) { 1834 return c.getPredominantGroupType(); 1835 } 1836 1837 /** 1838 * @deprecated use {@link Chain#isWaterOnly()} instead. 1839 */ 1840 @Deprecated 1841 public static boolean isChainWaterOnly(Chain c) { 1842 return c.isWaterOnly(); 1843 } 1844 1845 /** @deprecated use {@link Chain#isPureNonPolymer()} instead. 1846 */ 1847 @Deprecated 1848 public static boolean isChainPureNonPolymer(Chain c) { 1849 1850 return c.isPureNonPolymer(); 1851 } 1852 1853 /** 1854 * Cleans up the structure's alternate location groups. All alternate location groups should have all atoms (except in the case of microheterogenity) or when a deuetuim exiss. 1855 * Ensure that all the alt loc groups have all the atoms in the main group 1856 * @param structure The Structure to be cleaned up 1857 */ 1858 public static void cleanUpAltLocs(Structure structure) { 1859 for (int i =0; i< structure.nrModels() ; i++){ 1860 for (Chain chain : structure.getModel(i)) { 1861 for (Group group : chain.getAtomGroups()) { 1862 for (Group altLocGroup : group.getAltLocs()) { 1863 for ( Atom groupAtom : group.getAtoms()) { 1864 // If this alt loc doesn't have this atom 1865 if (! altLocGroup.hasAtom(groupAtom.getName())) { 1866 // Fix for microheterogenity 1867 if (altLocGroup.getPDBName().equals(group.getPDBName())) { 1868 // If it's a Hydrogen then we check for it's Deuterated brother 1869 if(hasDeuteratedEquiv(groupAtom, altLocGroup)){ 1870 1871 } 1872 else{ 1873 altLocGroup.addAtom(groupAtom); 1874 } 1875 } 1876 } 1877 } 1878 } 1879 } 1880 } 1881 } 1882 } 1883 1884 /** 1885 * Check to see if an Deuterated atom has a non deuterated brother in the group. 1886 * @param atom the input atom that is putatively deuterium 1887 * @param currentGroup the group the atom is in 1888 * @return true if the atom is deuterated and it's hydrogen equive exists. 1889 */ 1890 public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) { 1891 if(atom.getElement()==Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'D', 'H'))) { 1892 // If it's deuterated and has a non-deuterated brother 1893 return true; 1894 } 1895 return false; 1896 } 1897 1898 /** 1899 * Check to see if a Hydorgen has a Deuterated brother in the group. 1900 * @param atom the input atom that is putatively hydorgen 1901 * @param currentGroup the group the atom is in 1902 * @return true if the atom is hydrogen and it's Deuterium equiv exists. 1903 */ 1904 public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) { 1905 if(atom.getElement()==Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'H', 'D'))) { 1906 // If it's hydrogen and has a deuterated brother 1907 return true; 1908 } 1909 return false; 1910 } 1911 1912 private static String replaceFirstChar(String name, char c, char d) { 1913 if(name.charAt(0)==c){ 1914 return name.replaceFirst(String.valueOf(c), String.valueOf(d)); 1915 } 1916 return name; 1917 } 1918}