001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jan 4, 2006 021 * 022 */ 023package org.biojava.nbio.structure; 024 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Collection; 032import java.util.Collections; 033import java.util.HashMap; 034import java.util.HashSet; 035import java.util.Iterator; 036import java.util.LinkedHashSet; 037import java.util.List; 038import java.util.Map; 039import java.util.Set; 040 041import javax.vecmath.Point3d; 042 043import org.biojava.nbio.structure.align.util.AtomCache; 044import org.biojava.nbio.structure.contact.AtomContactSet; 045import org.biojava.nbio.structure.contact.Grid; 046import org.biojava.nbio.structure.io.FileParsingParameters; 047import org.biojava.nbio.structure.io.PDBFileParser; 048import org.biojava.nbio.core.util.FileDownloadUtils; 049import org.slf4j.Logger; 050import org.slf4j.LoggerFactory; 051 052/** 053 * A class that provides some tool methods. 054 * 055 * @author Andreas Prlic 056 * @author Jules Jacobsen 057 * @since 1.0 058 */ 059public class StructureTools { 060 061 private static final Logger logger = LoggerFactory.getLogger(StructureTools.class); 062 063 // Amino Acid backbone 064 /** 065 * The atom name of the backbone C-alpha atom. Note that this can be 066 * ambiguous depending on the context since Calcium atoms use the same name 067 * in PDB. 068 */ 069 public static final String CA_ATOM_NAME = "CA"; 070 071 /** 072 * The atom name for the backbone amide nitrogen 073 */ 074 public static final String N_ATOM_NAME = "N"; 075 076 /** 077 * The atom name for the backbone carbonyl 078 */ 079 public static final String C_ATOM_NAME = "C"; 080 081 /** 082 * The atom name for the backbone carbonyl oxygen 083 */ 084 public static final String O_ATOM_NAME = "O"; 085 086 /** 087 * The atom name of the side-chain C-beta atom 088 */ 089 public static final String CB_ATOM_NAME = "CB"; 090 091 // Nucleotide backbone 092 /** 093 * The atom name of the backbone C1' in RNA 094 */ 095 public static final String C1_ATOM_NAME = "C1'"; 096 /** 097 * The atom name of the backbone C2' in RNA 098 */ 099 public static final String C2_ATOM_NAME = "C2'"; 100 /** 101 * The atom name of the backbone C3' in RNA 102 */ 103 public static final String C3_ATOM_NAME = "C3'"; 104 /** 105 * The atom name of the backbone C4' in RNA 106 */ 107 public static final String C4_ATOM_NAME = "C4'"; 108 /** 109 * The atom name of the backbone O2' in RNA 110 */ 111 public static final String O2_ATOM_NAME = "O2'"; 112 /** 113 * The atom name of the backbone O3' in RNA 114 */ 115 public static final String O3_ATOM_NAME = "O3'"; 116 /** 117 * The atom name of the backbone O4' in RNA 118 */ 119 public static final String O4_ATOM_NAME = "O4'"; 120 /** 121 * The atom name of the backbone O4' in RNA 122 */ 123 public static final String O5_ATOM_NAME = "O5'"; 124 /** 125 * The atom name of the backbone O4' in RNA 126 */ 127 public static final String OP1_ATOM_NAME = "OP1"; 128 /** 129 * The atom name of the backbone O4' in RNA 130 */ 131 public static final String OP2_ATOM_NAME = "OP2"; 132 /** 133 * The atom name of the backbone phosphate in RNA 134 */ 135 public static final String P_ATOM_NAME = "P"; 136 137 /** 138 * The atom used as representative for nucleotides, equivalent to 139 * {@link #CA_ATOM_NAME} for proteins 140 */ 141 public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME; 142 143 /** 144 * The character to use for unknown compounds in sequence strings 145 */ 146 public static final char UNKNOWN_GROUP_LABEL = 'X'; 147 148 /** 149 * Below this ratio of aminoacid/nucleotide residues to the sequence total, 150 * we use simple majority of aminoacid/nucleotide residues to decide the 151 * character of the chain (protein/nucleotide) 152 */ 153 public static final double RATIO_RESIDUES_TO_TOTAL = 0.95; 154 155 /** 156 * Threshold for plausible binding of a ligand to the selected substructure 157 */ 158 public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5; 159 160 // there is a file format change in PDB 3.0 and nucleotides are being 161 // renamed 162 private static final Map<String, Character> nucleotides30; 163 private static final Map<String, Character> nucleotides23; 164 165 // amino acid 3 and 1 letter code definitions 166 private static final Map<String, Character> aminoAcids; 167 168 private static final Set<Element> hBondDonorAcceptors; 169 170 static { 171 nucleotides30 = new HashMap<String, Character>(); 172 nucleotides30.put("DA", 'A'); 173 nucleotides30.put("DC", 'C'); 174 nucleotides30.put("DG", 'G'); 175 nucleotides30.put("DT", 'T'); 176 nucleotides30.put("DI", 'I'); 177 nucleotides30.put("A", 'A'); 178 nucleotides30.put("G", 'G'); 179 nucleotides30.put("C", 'C'); 180 nucleotides30.put("U", 'U'); 181 nucleotides30.put("I", 'I'); 182 183 // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced 184 // with these: 185 nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 186 nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl 187 nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 188 nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3' 189 // terminator) 190 nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant 191 nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator 192 nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic 193 // sugar 194 nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine 195 nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine 196 nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide 197 // nucleic acid, with 198 // added methyl 199 nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide 200 // nucleic acid, with 201 // added methyl 202 nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide 203 // nucleic acid 204 nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide 205 // nucleic acid 206 nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide 207 // nucleic acid 208 nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide 209 // nucleic acid 210 nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant 211 212 // store nucleic acids (C, G, A, T, U, and I), and 213 // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), 214 // and 215 nucleotides23 = new HashMap<String, Character>(); 216 String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A", 217 "+T", "+U", "+I" }; 218 for (String n : names) { 219 nucleotides23.put(n, n.charAt(n.length() - 1)); 220 } 221 222 aminoAcids = new HashMap<String, Character>(); 223 aminoAcids.put("GLY", 'G'); 224 aminoAcids.put("ALA", 'A'); 225 aminoAcids.put("VAL", 'V'); 226 aminoAcids.put("LEU", 'L'); 227 aminoAcids.put("ILE", 'I'); 228 aminoAcids.put("PHE", 'F'); 229 aminoAcids.put("TYR", 'Y'); 230 aminoAcids.put("TRP", 'W'); 231 aminoAcids.put("PRO", 'P'); 232 aminoAcids.put("HIS", 'H'); 233 aminoAcids.put("LYS", 'K'); 234 aminoAcids.put("ARG", 'R'); 235 aminoAcids.put("SER", 'S'); 236 aminoAcids.put("THR", 'T'); 237 aminoAcids.put("GLU", 'E'); 238 aminoAcids.put("GLN", 'Q'); 239 aminoAcids.put("ASP", 'D'); 240 aminoAcids.put("ASN", 'N'); 241 aminoAcids.put("CYS", 'C'); 242 aminoAcids.put("MET", 'M'); 243 // MSE is only found as a molecular replacement for MET 244 aminoAcids.put("MSE", 'M'); 245 // 'non-standard', genetically encoded 246 // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html 247 // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE' 248 // likewise 'PYL' (IUBMB) and 'PYH' (PDB) 249 aminoAcids.put("CSE", 'U'); 250 aminoAcids.put("SEC", 'U'); 251 aminoAcids.put("PYH", 'O'); 252 aminoAcids.put("PYL", 'O'); 253 254 hBondDonorAcceptors = new HashSet<Element>(); 255 hBondDonorAcceptors.add(Element.N); 256 hBondDonorAcceptors.add(Element.O); 257 hBondDonorAcceptors.add(Element.S); 258 259 } 260 261 /** 262 * Count how many Atoms are contained within a Structure object. 263 * 264 * @param s 265 * the structure object 266 * @return the number of Atoms in this Structure 267 */ 268 public static int getNrAtoms(Structure s) { 269 270 int nrAtoms = 0; 271 272 Iterator<Group> iter = new GroupIterator(s); 273 274 while (iter.hasNext()) { 275 Group g = iter.next(); 276 nrAtoms += g.size(); 277 } 278 279 return nrAtoms; 280 } 281 282 /** 283 * Count how many groups are contained within a structure object. 284 * 285 * @param s 286 * the structure object 287 * @return the number of groups in the structure 288 */ 289 public static int getNrGroups(Structure s) { 290 int nrGroups = 0; 291 292 List<Chain> chains = s.getChains(0); 293 for (Chain c : chains) { 294 nrGroups += c.getAtomLength(); 295 } 296 return nrGroups; 297 } 298 299 /** 300 * Returns an array of the requested Atoms from the Structure object. 301 * Iterates over all groups and checks if the requested atoms are in this 302 * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} 303 * group. If the group does not contain all requested atoms then no atoms 304 * are added for that group. For structures with more than one model, only 305 * model 0 will be used. 306 * 307 * @param s 308 * the structure to get the atoms from 309 * 310 * @param atomNames 311 * contains the atom names to be used. 312 * @return an Atom[] array 313 */ 314 public static Atom[] getAtomArray(Structure s, String[] atomNames) { 315 List<Chain> chains = s.getModel(0); 316 317 List<Atom> atoms = new ArrayList<Atom>(); 318 319 extractAtoms(atomNames, chains, atoms); 320 return atoms.toArray(new Atom[0]); 321 322 } 323 324 /** 325 * Returns an array of the requested Atoms from the Structure object. In 326 * contrast to {@link #getAtomArray(Structure, String[])} this method 327 * iterates over all chains. Iterates over all chains and groups and checks 328 * if the requested atoms are in this group, no matter if this is a 329 * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not 330 * contain all requested atoms then no atoms are added for that group. For 331 * structures with more than one model, only model 0 will be used. 332 * 333 * @param s 334 * the structure to get the atoms from 335 * 336 * @param atomNames 337 * contains the atom names to be used. 338 * @return an Atom[] array 339 */ 340 public static Atom[] getAtomArrayAllModels(Structure s, 341 String[] atomNames) { 342 343 List<Atom> atoms = new ArrayList<>(); 344 345 for (int i = 0; i < s.nrModels(); i++) { 346 List<Chain> chains = s.getModel(i); 347 extractAtoms(atomNames, chains, atoms); 348 } 349 return atoms.toArray(new Atom[0]); 350 351 } 352 353 /** 354 * Convert all atoms of the structure (all models) into an Atom array 355 * 356 * @param s 357 * input structure 358 * @return all atom array 359 */ 360 public static Atom[] getAllAtomArray(Structure s) { 361 List<Atom> atoms = new ArrayList<>(); 362 363 AtomIterator iter = new AtomIterator(s); 364 while (iter.hasNext()) { 365 Atom a = iter.next(); 366 atoms.add(a); 367 } 368 return atoms.toArray(new Atom[0]); 369 } 370 /** 371 * Convert all atoms of the structure (specified model) into an Atom array 372 * 373 * @param s 374 * input structure 375 * @return all atom array 376 */ 377 public static Atom[] getAllAtomArray(Structure s, int model) { 378 List<Atom> atoms = new ArrayList<>(); 379 380 AtomIterator iter = new AtomIterator(s,model); 381 while (iter.hasNext()) { 382 Atom a = iter.next(); 383 atoms.add(a); 384 } 385 return atoms.toArray(new Atom[0]); 386 387 } 388 389 /** 390 * Returns and array of all atoms of the chain, including 391 * Hydrogens (if present) and all HETATOMs. Waters are not included. 392 * 393 * @param c 394 * input chain 395 * @return all atom array 396 */ 397 public static Atom[] getAllAtomArray(Chain c) { 398 List<Atom> atoms = new ArrayList<>(); 399 400 for (Group g : c.getAtomGroups()) { 401 if (g.isWater()) 402 continue; 403 atoms.addAll(g.getAtoms()); 404 } 405 return atoms.toArray(new Atom[0]); 406 } 407 408 /** 409 * List of groups from the structure not included in ca (e.g. ligands). 410 * 411 * Unaligned groups are searched from all chains referenced in ca, as well 412 * as any chains in the first model of the structure from ca[0], if any. 413 * 414 * @param ca an array of atoms 415 * @return 416 */ 417 public static List<Group> getUnalignedGroups(Atom[] ca) { 418 Set<Chain> chains = new HashSet<>(); 419 Set<Group> caGroups = new HashSet<>(); 420 421 // Create list of all chains in this structure 422 Structure s = null; 423 if (ca.length > 0) { 424 Group g = ca[0].getGroup(); 425 if (g != null) { 426 Chain c = g.getChain(); 427 if (c != null) { 428 s = c.getStructure(); 429 } 430 } 431 } 432 if (s != null) { 433 // Add all chains from the structure 434 chains.addAll(s.getChains(0)); 435 } 436 437 // Add groups and chains from ca 438 for (Atom a : ca) { 439 Group g = a.getGroup(); 440 if (g != null) { 441 caGroups.add(g); 442 443 Chain c = g.getChain(); 444 if (c != null) { 445 chains.add(c); 446 } 447 } 448 } 449 450 // Iterate through all chains, finding groups not in ca 451 List<Group> unadded = new ArrayList<>(); 452 for (Chain c : chains) { 453 for (Group g : c.getAtomGroups()) { 454 if (!caGroups.contains(g)) { 455 unadded.add(g); 456 } 457 } 458 } 459 return unadded; 460 } 461 462 /** 463 * Finds all ligand groups from the target which fall within the cutoff distance 464 * of some atom from the query set. 465 * 466 * @param target Set of groups including the ligands 467 * @param query Atom selection 468 * @param cutoff Distance from query atoms to consider, in angstroms 469 * @return All groups from the target with at least one atom within cutoff of a query atom 470 * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF 471 */ 472 public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) { 473 // Spatial hashing of the reduced structure 474 Grid grid = new Grid(cutoff); 475 grid.addAtoms(query); 476 477 List<Group> ligands = new ArrayList<>(); 478 for(Group g :target ) { 479 // don't worry about waters 480 if(g.isWater()) { 481 continue; 482 } 483 484 if(g.isPolymeric() ) { 485 // Polymers aren't ligands 486 continue; 487 } 488 489 // It is a ligand! 490 491 // Check that it's within cutoff of something in reduced 492 List<Atom> groupAtoms = g.getAtoms(); 493 if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) { 494 continue; 495 } 496 497 ligands.add(g); 498 } 499 return ligands; 500 } 501 502 /** 503 * Adds a particular group to a structure. A new chain will be created if necessary. 504 * 505 * <p>When adding multiple groups, pass the return value of one call as the 506 * chainGuess parameter of the next call for efficiency. 507 * <pre> 508 * Chain guess = null; 509 * for(Group g : groups) { 510 * guess = addGroupToStructure(s, g, guess ); 511 * } 512 * </pre> 513 * @param s structure to receive the group 514 * @param g group to add 515 * @param chainGuess (optional) If not null, should be a chain from s. Used 516 * to improve performance when adding many groups from the same chain 517 * @param clone Indicates whether the input group should be cloned before 518 * being added to the new chain 519 * @return the chain g was added to 520 */ 521 public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) { 522 // Find or create the chain 523 String chainId = g.getChainId(); 524 assert !chainId.isEmpty(); 525 Chain chain; 526 if (chainGuess != null && chainGuess.getId().equals(chainId)) { 527 // previously guessed chain 528 chain = chainGuess; 529 } else { 530 // Try to guess 531 chain = s.getChain(chainId, model); 532 if (chain == null) { 533 // no chain found 534 chain = new ChainImpl(); 535 chain.setId(chainId); 536 537 Chain oldChain = g.getChain(); 538 chain.setName(oldChain.getName()); 539 540 EntityInfo oldEntityInfo = oldChain.getEntityInfo(); 541 542 EntityInfo newEntityInfo; 543 if (oldEntityInfo == null) { 544 newEntityInfo = new EntityInfo(); 545 s.addEntityInfo(newEntityInfo); 546 } else { 547 newEntityInfo = s.getEntityById(oldEntityInfo.getMolId()); 548 if (newEntityInfo == null) { 549 newEntityInfo = new EntityInfo(oldEntityInfo); 550 s.addEntityInfo(newEntityInfo); 551 } 552 } 553 newEntityInfo.addChain(chain); 554 chain.setEntityInfo(newEntityInfo); 555 556 // TODO Do the seqres need to be cloned too? -SB 2016-10-7 557 chain.setSeqResGroups(oldChain.getSeqResGroups()); 558 chain.setSeqMisMatches(oldChain.getSeqMisMatches()); 559 560 s.addChain(chain, model); 561 } 562 } 563 564 // Add cloned group 565 if (clone) { 566 g = (Group) g.clone(); 567 } 568 chain.addGroup(g); 569 570 return chain; 571 } 572 573 /** 574 * Add a list of groups to a new structure. Chains will be automatically 575 * created in the new structure as needed. 576 * @param s structure to receive the group 577 * @param groups groups to add 578 * @param model model number 579 * @param clone Indicates whether the input groups should be cloned before 580 * being added to the new chain 581 */ 582 public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) { 583 Chain chainGuess = null; 584 for(Group g : groups) { 585 chainGuess = addGroupToStructure(s, g, model, chainGuess, clone); 586 } 587 } 588 589 /** 590 * Expand a set of atoms into all groups from the same structure. 591 * 592 * If the structure is set, only the first atom is used (assuming all 593 * atoms come from the same original structure). 594 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 595 * searches all chains of all atoms for groups. 596 * @param atoms Sample of atoms 597 * @return All groups from all chains accessible from the input atoms 598 */ 599 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) { 600 return getAllGroupsFromSubset(atoms,null); 601 } 602 603 /** 604 * Expand a set of atoms into all groups from the same structure. 605 * 606 * If the structure is set, only the first atom is used (assuming all 607 * atoms come from the same original structure). 608 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 609 * searches all chains of all atoms for groups. 610 * @param atoms Sample of atoms 611 * @param types Type of groups to return (useful for getting only ligands, for instance). 612 * Null gets all groups. 613 * @return All groups from all chains accessible from the input atoms 614 */ 615 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) { 616 // Get the full structure 617 Structure s = null; 618 if (atoms.length > 0) { 619 Group g = atoms[0].getGroup(); 620 if (g != null) { 621 Chain c = g.getChain(); 622 if (c != null) { 623 s = c.getStructure(); 624 } 625 } 626 } 627 // Collect all groups from the structure 628 Set<Chain> allChains = new HashSet<>(); 629 if( s != null ) { 630 allChains.addAll(s.getChains()); 631 } 632 // In case the structure wasn't set, need to use ca chains too 633 for(Atom a : atoms) { 634 Group g = a.getGroup(); 635 if(g != null) { 636 Chain c = g.getChain(); 637 if( c != null ) { 638 allChains.add(c); 639 } 640 } 641 } 642 643 if(allChains.isEmpty() ) { 644 return Collections.emptySet(); 645 } 646 647 // Extract all ligand groups 648 Set<Group> full = new HashSet<>(); 649 for(Chain c : allChains) { 650 if(types == null) { 651 full.addAll(c.getAtomGroups()); 652 } else { 653 full.addAll(c.getAtomGroups(types)); 654 } 655 } 656 657 return full; 658 } 659 660 661 /** 662 * Returns and array of all non-Hydrogen atoms in the given Structure, 663 * optionally including HET atoms or not. Waters are not included. 664 * 665 * @param s 666 * @param hetAtoms 667 * if true HET atoms are included in array, if false they are not 668 * @return 669 */ 670 public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) { 671 AtomIterator iter = new AtomIterator(s); 672 return getAllNonHAtomArray(s, hetAtoms, iter); 673 } 674 /** 675 * Returns and array of all non-Hydrogen atoms in the given Structure, 676 * optionally including HET atoms or not. Waters are not included. 677 * 678 * @param s 679 * @param hetAtoms 680 * if true HET atoms are included in array, if false they are not 681 * @param modelNr Model number to draw atoms from 682 * @return 683 */ 684 public static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) { 685 AtomIterator iter = new AtomIterator(s,modelNr); 686 return getAllNonHAtomArray(s, hetAtoms, iter); 687 } 688 689 private static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) { 690 List<Atom> atoms = new ArrayList<>(); 691 692 while (iter.hasNext()) { 693 Atom a = iter.next(); 694 if (a.getElement() == Element.H) 695 continue; 696 697 Group g = a.getGroup(); 698 699 if (g.isWater()) 700 continue; 701 702 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 703 continue; 704 705 atoms.add(a); 706 } 707 return atoms.toArray(new Atom[0]); 708 } 709 710 /** 711 * Returns and array of all non-Hydrogen atoms in the given Chain, 712 * optionally including HET atoms or not Waters are not included. 713 * 714 * @param c 715 * @param hetAtoms 716 * if true HET atoms are included in array, if false they are not 717 * @return 718 */ 719 public static Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) { 720 List<Atom> atoms = new ArrayList<>(); 721 722 for (Group g : c.getAtomGroups()) { 723 if (g.isWater()) 724 continue; 725 for (Atom a : g.getAtoms()) { 726 727 if (a.getElement() == Element.H) 728 continue; 729 730 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 731 continue; 732 733 atoms.add(a); 734 } 735 } 736 return atoms.toArray(new Atom[0]); 737 } 738 739 /** 740 * Returns and array of all non-Hydrogen atoms coordinates in the given Chain, 741 * optionally including HET atoms or not Waters are not included. 742 * 743 * @param c 744 * @param hetAtoms 745 * if true HET atoms are included in array, if false they are not 746 * @return 747 */ 748 public static Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) { 749 List<Point3d> atoms = new ArrayList<>(); 750 751 for (Group g : c.getAtomGroups()) { 752 if (g.isWater()) 753 continue; 754 for (Atom a : g.getAtoms()) { 755 756 if (a.getElement() == Element.H) 757 continue; 758 759 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 760 continue; 761 762 atoms.add(a.getCoordsAsPoint3d()); 763 } 764 } 765 return atoms.toArray(new Point3d[0]); 766 } 767 768 /** 769 * Adds to the given atoms list, all atoms of groups that contained all 770 * requested atomNames, i.e. if a group does not contain all of the 771 * requested atom names, its atoms won't be added. 772 * 773 * @param atomNames 774 * @param chains 775 * @param atoms 776 */ 777 private static void extractAtoms(String[] atomNames, List<Chain> chains, List<Atom> atoms) { 778 779 for (Chain c : chains) { 780 781 for (Group g : c.getAtomGroups()) { 782 783 // a temp container for the atoms of this group 784 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 785 // flag to check if this group contains all the requested atoms. 786 boolean thisGroupAllAtoms = true; 787 for (String atomName : atomNames) { 788 Atom a = g.getAtom(atomName); 789 790 if (a == null) { 791 // this group does not have a required atom, skip it... 792 thisGroupAllAtoms = false; 793 break; 794 } 795 thisGroupAtoms.add(a); 796 } 797 if (thisGroupAllAtoms) { 798 // add the atoms of this group to the array. 799 atoms.addAll(thisGroupAtoms); 800 } 801 } 802 } 803 } 804 805 /** 806 * Returns an array of the requested Atoms from the Chain object. Iterates 807 * over all groups and checks if the requested atoms are in this group, no 808 * matter if this is a AminoAcid or Hetatom group. If the group does not 809 * contain all requested atoms then no atoms are added for that group. 810 * 811 * @param c 812 * the Chain to get the atoms from 813 * 814 * @param atomNames 815 * contains the atom names to be used. 816 * @return an Atom[] array 817 */ 818 public static Atom[] getAtomArray(Chain c, String[] atomNames) { 819 820 List<Atom> atoms = new ArrayList<>(); 821 822 for (Group g : c.getAtomGroups()) { 823 824 // a temp container for the atoms of this group 825 List<Atom> thisGroupAtoms = new ArrayList<>(); 826 // flag to check if this group contains all the requested atoms. 827 boolean thisGroupAllAtoms = true; 828 for (String atomName : atomNames) { 829 Atom a = g.getAtom(atomName); 830 if (a == null) { 831 logger.debug("Group " + g.getResidueNumber() + " (" 832 + g.getPDBName() 833 + ") does not have the required atom '" + atomName 834 + "'"); 835 // this group does not have a required atom, skip it... 836 thisGroupAllAtoms = false; 837 break; 838 } 839 thisGroupAtoms.add(a); 840 } 841 842 if (thisGroupAllAtoms) { 843 // add the atoms of this group to the array. 844 atoms.addAll(thisGroupAtoms); 845 } 846 847 } 848 return atoms.toArray(new Atom[0]); 849 850 } 851 852 /** 853 * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and 854 * has CA name will be returned. 855 * 856 * @param c 857 * the structure object 858 * @return an Atom[] array 859 * @see #getRepresentativeAtomArray(Chain) 860 */ 861 public static Atom[] getAtomCAArray(Chain c) { 862 List<Atom> atoms = new ArrayList<>(); 863 864 for (Group g : c.getAtomGroups()) { 865 if (g.hasAtom(CA_ATOM_NAME) 866 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 867 atoms.add(g.getAtom(CA_ATOM_NAME)); 868 } 869 } 870 871 return atoms.toArray(new Atom[0]); 872 } 873 874 /** 875 * Gets a representative atom for each group that is part of the chain 876 * backbone. Note that modified aminoacids won't be returned as part of the 877 * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the 878 * structure. 879 * 880 * For amino acids, the representative is a CA carbon. For nucleotides, the 881 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 882 * types will be ignored. 883 * 884 * @param c 885 * @return representative Atoms of the chain backbone 886 * @since Biojava 4.1.0 887 */ 888 public static Atom[] getRepresentativeAtomArray(Chain c) { 889 List<Atom> atoms = new ArrayList<>(); 890 891 for (Group g : c.getAtomGroups()) { 892 893 switch (g.getType()) { 894 case AMINOACID: 895 if (g.hasAtom(CA_ATOM_NAME) 896 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 897 atoms.add(g.getAtom(CA_ATOM_NAME)); 898 } 899 break; 900 case NUCLEOTIDE: 901 if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) { 902 atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE)); 903 } 904 break; 905 default: 906 // don't add 907 } 908 } 909 910 return atoms.toArray(new Atom[0]); 911 912 } 913 914 /** 915 * Provides an equivalent copy of Atoms in a new array. Clones everything, 916 * starting with parent groups and chains. The chain will only contain 917 * groups that are part of the input array. 918 * 919 * @param ca 920 * array of representative atoms, e.g. CA atoms 921 * @return Atom array 922 * @since Biojava 4.1.0 923 */ 924 public static Atom[] cloneAtomArray(Atom[] ca) { 925 Atom[] newCA = new Atom[ca.length]; 926 927 List<Chain> model = new ArrayList<Chain>(); 928 int apos = -1; 929 for (Atom a : ca) { 930 apos++; 931 Group parentG = a.getGroup(); 932 Chain parentC = parentG.getChain(); 933 934 Chain newChain = null; 935 for (Chain c : model) { 936 if (c.getName().equals(parentC.getName())) { 937 newChain = c; 938 break; 939 } 940 } 941 if (newChain == null) { 942 newChain = new ChainImpl(); 943 newChain.setId(parentC.getId()); 944 newChain.setName(parentC.getName()); 945 model.add(newChain); 946 } 947 948 Group parentN = (Group) parentG.clone(); 949 950 newCA[apos] = parentN.getAtom(a.getName()); 951 try { 952 // if the group doesn't exist yet, this produces a StructureException 953 newChain.getGroupByPDB(parentN.getResidueNumber()); 954 } catch (StructureException e) { 955 // the group doesn't exist yet in the newChain, let's add it 956 newChain.addGroup(parentN); 957 } 958 959 } 960 return newCA; 961 } 962 963 /** 964 * Clone a set of representative Atoms, but returns the parent groups 965 * 966 * @param ca 967 * Atom array 968 * @return Group array 969 */ 970 public static Group[] cloneGroups(Atom[] ca) { 971 Group[] newGroup = new Group[ca.length]; 972 973 List<Chain> model = new ArrayList<Chain>(); 974 int apos = -1; 975 for (Atom a : ca) { 976 apos++; 977 Group parentG = a.getGroup(); 978 Chain parentC = parentG.getChain(); 979 980 Chain newChain = null; 981 for (Chain c : model) { 982 if (c.getName().equals(parentC.getName())) { 983 newChain = c; 984 break; 985 } 986 } 987 if (newChain == null) { 988 newChain = new ChainImpl(); 989 newChain.setName(parentC.getName()); 990 model.add(newChain); 991 } 992 993 Group ng = (Group) parentG.clone(); 994 newGroup[apos] = ng; 995 newChain.addGroup(ng); 996 } 997 return newGroup; 998 } 999 1000 /** 1001 * Utility method for working with circular permutations. Creates a 1002 * duplicated and cloned set of Calpha atoms from the input array. 1003 * 1004 * @param ca2 1005 * atom array 1006 * @return cloned and duplicated set of input array 1007 */ 1008 public static Atom[] duplicateCA2(Atom[] ca2) { 1009 // we don't want to rotate input atoms, do we? 1010 Atom[] ca2clone = new Atom[ca2.length * 2]; 1011 1012 int pos = 0; 1013 1014 Chain c = null; 1015 String prevChainId = ""; 1016 for (Atom a : ca2) { 1017 Group g = (Group) a.getGroup().clone(); // works because each group 1018 // has only a single atom 1019 1020 if (c == null) { 1021 c = new ChainImpl(); 1022 Chain orig = a.getGroup().getChain(); 1023 c.setId(orig.getId()); 1024 c.setName(orig.getName()); 1025 } else { 1026 Chain orig = a.getGroup().getChain(); 1027 if (!orig.getId().equals(prevChainId)) { 1028 c = new ChainImpl(); 1029 c.setId(orig.getId()); 1030 c.setName(orig.getName()); 1031 } 1032 } 1033 1034 c.addGroup(g); 1035 ca2clone[pos] = g.getAtom(a.getName()); 1036 1037 pos++; 1038 } 1039 1040 // Duplicate ca2! 1041 c = null; 1042 prevChainId = ""; 1043 for (Atom a : ca2) { 1044 Group g = (Group) a.getGroup().clone(); 1045 1046 if (c == null) { 1047 c = new ChainImpl(); 1048 Chain orig = a.getGroup().getChain(); 1049 c.setId(orig.getId()); 1050 c.setName(orig.getName()); 1051 } else { 1052 Chain orig = a.getGroup().getChain(); 1053 if (!orig.getId().equals(prevChainId)) { 1054 c = new ChainImpl(); 1055 c.setId(orig.getId()); 1056 c.setName(orig.getName()); 1057 } 1058 } 1059 1060 c.addGroup(g); 1061 ca2clone[pos] = g.getAtom(a.getName()); 1062 1063 pos++; 1064 } 1065 1066 return ca2clone; 1067 1068 } 1069 1070 /** 1071 * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and 1072 * has CA name will be returned. 1073 * 1074 * @param s 1075 * the structure object 1076 * @return an Atom[] array 1077 * @see #getRepresentativeAtomArray(Structure) 1078 */ 1079 public static Atom[] getAtomCAArray(Structure s) { 1080 1081 List<Atom> atoms = new ArrayList<Atom>(); 1082 1083 for (Chain c : s.getChains()) { 1084 for (Group g : c.getAtomGroups()) { 1085 if (g.hasAtom(CA_ATOM_NAME) 1086 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 1087 atoms.add(g.getAtom(CA_ATOM_NAME)); 1088 } 1089 } 1090 } 1091 1092 return atoms.toArray(new Atom[atoms.size()]); 1093 } 1094 1095 /** 1096 * Gets a representative atom for each group that is part of the chain 1097 * backbone. Note that modified aminoacids won't be returned as part of the 1098 * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the 1099 * structure. 1100 * 1101 * For amino acids, the representative is a CA carbon. For nucleotides, the 1102 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 1103 * types will be ignored. 1104 * 1105 * @param s 1106 * Input structure 1107 * @return representative Atoms of the structure backbone 1108 * @since Biojava 4.1.0 1109 */ 1110 public static Atom[] getRepresentativeAtomArray(Structure s) { 1111 1112 List<Atom> atoms = new ArrayList<>(); 1113 1114 for (Chain c : s.getChains()) { 1115 Atom[] chainAtoms = getRepresentativeAtomArray(c); 1116 atoms.addAll(Arrays.asList(chainAtoms)); 1117 } 1118 1119 return atoms.toArray(new Atom[0]); 1120 } 1121 1122 /** 1123 * Return an Atom array of the main chain atoms: CA, C, N, O Any group that 1124 * contains those atoms will be included, be it a standard aminoacid or not 1125 * 1126 * @param s 1127 * the structure object 1128 * @return an Atom[] array 1129 */ 1130 public static Atom[] getBackboneAtomArray(Structure s) { 1131 1132 List<Atom> atoms = new ArrayList<>(); 1133 1134 for (Chain c : s.getChains()) { 1135 for (Group g : c.getAtomGroups()) { 1136 if (g.hasAminoAtoms()) { 1137 // this means we will only take atoms grom groups that have 1138 // complete backbones 1139 for (Atom a : g.getAtoms()) { 1140 switch (g.getType()) { 1141 case NUCLEOTIDE: 1142 // Nucleotide backbone 1143 if (a.getName().equals(C1_ATOM_NAME)) 1144 atoms.add(a); 1145 if (a.getName().equals(C2_ATOM_NAME)) 1146 atoms.add(a); 1147 if (a.getName().equals(C3_ATOM_NAME)) 1148 atoms.add(a); 1149 if (a.getName().equals(C4_ATOM_NAME)) 1150 atoms.add(a); 1151 if (a.getName().equals(O2_ATOM_NAME)) 1152 atoms.add(a); 1153 if (a.getName().equals(O3_ATOM_NAME)) 1154 atoms.add(a); 1155 if (a.getName().equals(O4_ATOM_NAME)) 1156 atoms.add(a); 1157 if (a.getName().equals(O5_ATOM_NAME)) 1158 atoms.add(a); 1159 if (a.getName().equals(OP1_ATOM_NAME)) 1160 atoms.add(a); 1161 if (a.getName().equals(OP2_ATOM_NAME)) 1162 atoms.add(a); 1163 if (a.getName().equals(P_ATOM_NAME)) 1164 atoms.add(a); 1165 // TODO Allow C4* names as well as C4'? -SB 3/2015 1166 break; 1167 case AMINOACID: 1168 default: 1169 // we do it this way instead of with g.getAtom() to 1170 // be sure we always use the same order as original 1171 if (a.getName().equals(CA_ATOM_NAME)) 1172 atoms.add(a); 1173 if (a.getName().equals(C_ATOM_NAME)) 1174 atoms.add(a); 1175 if (a.getName().equals(N_ATOM_NAME)) 1176 atoms.add(a); 1177 if (a.getName().equals(O_ATOM_NAME)) 1178 atoms.add(a); 1179 break; 1180 } 1181 } 1182 } 1183 } 1184 1185 } 1186 1187 return atoms.toArray(new Atom[0]); 1188 } 1189 1190 /** 1191 * Convert three character amino acid codes into single character e.g. 1192 * convert CYS to C. Valid 3-letter codes will be those of the standard 20 1193 * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids} 1194 * map) 1195 * 1196 * @return the 1 letter code, or null if the given 3 letter code does not 1197 * correspond to an amino acid code 1198 * @param groupCode3 1199 * a three character amino acid representation String 1200 * @see #get1LetterCode(String) 1201 */ 1202 public static Character get1LetterCodeAmino(String groupCode3) { 1203 return aminoAcids.get(groupCode3); 1204 } 1205 1206 /** 1207 * Convert a three letter amino acid or nucleotide code into a single 1208 * character code. If the code does not correspond to an amino acid or 1209 * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}. 1210 * 1211 * Returned null for nucleotides prior to version 4.0.1. 1212 * 1213 * @param groupCode3 1214 * three letter representation 1215 * @return The 1-letter abbreviation 1216 */ 1217 public static Character get1LetterCode(String groupCode3) { 1218 1219 Character code1; 1220 1221 // is it a standard amino acid ? 1222 code1 = get1LetterCodeAmino(groupCode3); 1223 1224 if (code1 == null) { 1225 // hm groupCode3 is not standard 1226 // perhaps it is a nucleotide? 1227 groupCode3 = groupCode3.trim(); 1228 if (isNucleotide(groupCode3)) { 1229 code1 = nucleotides30.get(groupCode3); 1230 if (code1 == null) { 1231 code1 = nucleotides23.get(groupCode3); 1232 } 1233 if (code1 == null) { 1234 code1 = UNKNOWN_GROUP_LABEL; 1235 } 1236 } else { 1237 // does not seem to be so let's assume it is 1238 // nonstandard aminoacid and label it "X" 1239 // logger.warning("unknown group name "+groupCode3 ); 1240 code1 = UNKNOWN_GROUP_LABEL; 1241 } 1242 } 1243 1244 return code1; 1245 1246 } 1247 1248 /** 1249 * Test if the three-letter code of an ATOM entry corresponds to a 1250 * nucleotide or to an aminoacid. 1251 * 1252 * @param groupCode3 1253 * 3-character code for a group. 1254 * 1255 */ 1256 public static boolean isNucleotide(String groupCode3) { 1257 String code = groupCode3.trim(); 1258 return nucleotides30.containsKey(code) 1259 || nucleotides23.containsKey(code); 1260 } 1261 1262 public static String convertAtomsToSeq(Atom[] atoms) { 1263 1264 StringBuilder buf = new StringBuilder(); 1265 Group prevGroup = null; 1266 for (Atom a : atoms) { 1267 Group g = a.getGroup(); 1268 if (prevGroup != null) { 1269 if (prevGroup.equals(g)) { 1270 // we add each group only once. 1271 continue; 1272 } 1273 } 1274 String code3 = g.getPDBName(); 1275 Character code1 = get1LetterCodeAmino(code3); 1276 if (code1 == null) 1277 code1 = UNKNOWN_GROUP_LABEL; 1278 1279 buf.append(code1); 1280 1281 prevGroup = g; 1282 1283 } 1284 return buf.toString(); 1285 } 1286 1287 /** 1288 * Get a group represented by a ResidueNumber. 1289 * 1290 * @param struc 1291 * a {@link Structure} 1292 * @param pdbResNum 1293 * a {@link ResidueNumber} 1294 * @return a group in the structure that is represented by the pdbResNum. 1295 * @throws StructureException 1296 * if the group cannot be found. 1297 */ 1298 public static Group getGroupByPDBResidueNumber(Structure struc, 1299 ResidueNumber pdbResNum) throws StructureException { 1300 if (struc == null || pdbResNum == null) { 1301 throw new IllegalArgumentException("Null argument(s)."); 1302 } 1303 1304 Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName()); 1305 1306 return chain.getGroupByPDB(pdbResNum); 1307 } 1308 1309 /** 1310 * Returns the set of intra-chain contacts for the given chain for given 1311 * atom names, i.e. the contact map. Uses a spatial hashing algorithm that 1312 * speeds up the calculation without need of full distance matrix. The 1313 * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs 1314 * to be set to true for this to work. 1315 * 1316 * @param chain 1317 * @param atomNames 1318 * the array with atom names to be used. Beware: CA will do both 1319 * C-alphas an Calciums if null all non-H atoms of non-hetatoms 1320 * will be used 1321 * @param cutoff 1322 * @return 1323 */ 1324 public static AtomContactSet getAtomsInContact(Chain chain, 1325 String[] atomNames, double cutoff) { 1326 Grid grid = new Grid(cutoff); 1327 1328 Atom[] atoms = null; 1329 if (atomNames == null) { 1330 atoms = getAllNonHAtomArray(chain, false); 1331 } else { 1332 atoms = getAtomArray(chain, atomNames); 1333 } 1334 // If tha 1335 if(atoms.length==0){ 1336 logger.warn("No atoms found for buidling grid!"); 1337 return new AtomContactSet(cutoff); 1338 } 1339 grid.addAtoms(atoms); 1340 1341 return grid.getAtomContacts(); 1342 } 1343 1344 /** 1345 * Returns the set of intra-chain contacts for the given chain for all non-H 1346 * atoms of non-hetatoms, i.e. the contact map. Uses a spatial hashing 1347 * algorithm that speeds up the calculation without need of full distance 1348 * matrix. The parsing mode 1349 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1350 * true for this to work. 1351 * 1352 * @param chain 1353 * @param cutoff 1354 * @return 1355 */ 1356 public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) { 1357 return getAtomsInContact(chain, (String[]) null, cutoff); 1358 } 1359 1360 /** 1361 * Returns the set of intra-chain contacts for the given chain for C-alpha 1362 * atoms (including non-standard aminoacids appearing as HETATM groups), 1363 * i.e. the contact map. Uses a spatial hashing algorithm that speeds up 1364 * the calculation without need of full distance matrix. The parsing mode 1365 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1366 * true for this to work. 1367 * 1368 * @param chain 1369 * @param cutoff 1370 * @return 1371 * @see #getRepresentativeAtomsInContact(Chain, double) 1372 */ 1373 public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) { 1374 Grid grid = new Grid(cutoff); 1375 1376 Atom[] atoms = getAtomCAArray(chain); 1377 1378 grid.addAtoms(atoms); 1379 1380 return grid.getAtomContacts(); 1381 } 1382 1383 /** 1384 * Returns the set of intra-chain contacts for the given chain for C-alpha 1385 * or C3' atoms (including non-standard aminoacids appearing as HETATM 1386 * groups), i.e. the contact map. Uses a spatial hashing algorithm that 1387 * speeds up the calculation without need of full distance matrix. 1388 * 1389 * @param chain 1390 * @param cutoff 1391 * @return 1392 * @since Biojava 4.1.0 1393 */ 1394 public static AtomContactSet getRepresentativeAtomsInContact(Chain chain, 1395 double cutoff) { 1396 Grid grid = new Grid(cutoff); 1397 1398 Atom[] atoms = getRepresentativeAtomArray(chain); 1399 1400 grid.addAtoms(atoms); 1401 1402 return grid.getAtomContacts(); 1403 } 1404 1405 /** 1406 * Returns the set of inter-chain contacts between the two given chains for 1407 * the given atom names. Uses a spatial hashing algorithm that speeds up 1408 * the calculation without need of full distance matrix. The parsing mode 1409 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1410 * true for this to work. 1411 * 1412 * @param chain1 1413 * @param chain2 1414 * @param atomNames 1415 * the array with atom names to be used. For Calphas use {"CA"}, 1416 * if null all non-H atoms will be used. Note HET atoms are 1417 * ignored unless this parameter is null. 1418 * @param cutoff 1419 * @param hetAtoms 1420 * if true HET atoms are included, if false they are not 1421 * @return 1422 */ 1423 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1424 String[] atomNames, double cutoff, boolean hetAtoms) { 1425 Grid grid = new Grid(cutoff); 1426 Atom[] atoms1 = null; 1427 Atom[] atoms2 = null; 1428 if (atomNames == null) { 1429 atoms1 = getAllNonHAtomArray(chain1, hetAtoms); 1430 atoms2 = getAllNonHAtomArray(chain2, hetAtoms); 1431 } else { 1432 atoms1 = getAtomArray(chain1, atomNames); 1433 atoms2 = getAtomArray(chain2, atomNames); 1434 } 1435 grid.addAtoms(atoms1, atoms2); 1436 1437 return grid.getAtomContacts(); 1438 } 1439 1440 /** 1441 * Returns the set of inter-chain contacts between the two given chains for 1442 * all non-H atoms. Uses a spatial hashing algorithm that speeds up the 1443 * calculation without need of full distance matrix. The parsing mode 1444 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1445 * true for this to work. 1446 * 1447 * @param chain1 1448 * @param chain2 1449 * @param cutoff 1450 * @param hetAtoms 1451 * if true HET atoms are included, if false they are not 1452 * @return 1453 */ 1454 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1455 double cutoff, boolean hetAtoms) { 1456 return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms); 1457 } 1458 1459 /** 1460 * Finds Groups in {@code structure} that contain at least one Atom that is 1461 * within {@code radius} Angstroms of {@code centroid}. 1462 * 1463 * @param structure 1464 * The structure from which to find Groups 1465 * @param centroid 1466 * The centroid of the shell 1467 * @param excludeResidues 1468 * A list of ResidueNumbers to exclude 1469 * @param radius 1470 * The radius from {@code centroid}, in Angstroms 1471 * @param includeWater 1472 * Whether to include Groups whose <em>only</em> atoms are water 1473 * @param useAverageDistance 1474 * When set to true, distances are the arithmetic mean (1-norm) 1475 * of the distances of atoms that belong to the group and that 1476 * are within the shell; otherwise, distances are the minimum of 1477 * these values 1478 * @return A map of Groups within (or partially within) the shell, to their 1479 * distances in Angstroms 1480 */ 1481 public static Map<Group, Double> getGroupDistancesWithinShell( 1482 Structure structure, Atom centroid, 1483 Set<ResidueNumber> excludeResidues, double radius, 1484 boolean includeWater, boolean useAverageDistance) { 1485 1486 // for speed, we avoid calculating square roots 1487 radius = radius * radius; 1488 1489 Map<Group, Double> distances = new HashMap<Group, Double>(); 1490 1491 // we only need this if we're averaging distances 1492 // note that we can't use group.getAtoms().size() because some the 1493 // group's atoms be outside the shell 1494 Map<Group, Integer> atomCounts = new HashMap<Group, Integer>(); 1495 1496 for (Chain chain : structure.getChains()) { 1497 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1498 1499 // exclude water 1500 if (!includeWater && chainGroup.isWater()) 1501 continue; 1502 1503 // check blacklist of residue numbers 1504 for (ResidueNumber rn : excludeResidues) { 1505 if (rn.equals(chainGroup.getResidueNumber())) 1506 continue groupLoop; 1507 } 1508 1509 for (Atom testAtom : chainGroup.getAtoms()) { 1510 1511 // use getDistanceFast as we are doing a lot of comparisons 1512 double dist = Calc.getDistanceFast(centroid, testAtom); 1513 1514 // if we're the shell 1515 if (dist <= radius) { 1516 if (!distances.containsKey(chainGroup)) 1517 distances.put(chainGroup, Double.POSITIVE_INFINITY); 1518 if (useAverageDistance) { 1519 // sum the distance; we'll divide by the total 1520 // number later 1521 // here, we CANNOT use fastDistance (distance 1522 // squared) because we want the arithmetic mean 1523 distances.put(chainGroup, distances.get(chainGroup) 1524 + Math.sqrt(dist)); 1525 if (!atomCounts.containsKey(chainGroup)) 1526 atomCounts.put(chainGroup, 0); 1527 atomCounts.put(chainGroup, 1528 atomCounts.get(chainGroup) + 1); 1529 } else { 1530 // take the minimum distance among all atoms of 1531 // chainGroup 1532 // note that we can't break here because we might 1533 // find a smaller distance 1534 if (dist < distances.get(chainGroup)) { 1535 distances.put(chainGroup, dist); 1536 } 1537 } 1538 } 1539 1540 } 1541 } 1542 } 1543 1544 if (useAverageDistance) { 1545 for (Map.Entry<Group, Double> entry : distances.entrySet()) { 1546 int count = atomCounts.get(entry.getKey()); 1547 distances.put(entry.getKey(), entry.getValue() / count); 1548 } 1549 } else { 1550 // in this case we used getDistanceFast 1551 distances.replaceAll((k, v) -> Math.sqrt(v)); 1552 } 1553 1554 return distances; 1555 1556 } 1557 1558 public static Set<Group> getGroupsWithinShell(Structure structure, 1559 Atom atom, Set<ResidueNumber> excludeResidues, double distance, 1560 boolean includeWater) { 1561 1562 // square the distance to use as a comparison against getDistanceFast 1563 // which returns the square of a distance. 1564 distance = distance * distance; 1565 1566 Set<Group> returnSet = new LinkedHashSet<>(); 1567 for (Chain chain : structure.getChains()) { 1568 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1569 if (!includeWater && chainGroup.isWater()) 1570 continue; 1571 for (ResidueNumber rn : excludeResidues) { 1572 if (rn.equals(chainGroup.getResidueNumber())) 1573 continue groupLoop; 1574 } 1575 for (Atom atomB : chainGroup.getAtoms()) { 1576 1577 // use getDistanceFast as we are doing a lot of comparisons 1578 double dist = Calc.getDistanceFast(atom, atomB); 1579 if (dist <= distance) { 1580 returnSet.add(chainGroup); 1581 break; 1582 } 1583 1584 } 1585 } 1586 } 1587 return returnSet; 1588 } 1589 1590 /** 1591 * <p> 1592 * Returns a Set of Groups in a structure within the distance specified of a 1593 * given group. 1594 * </p> 1595 * <p> 1596 * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of 1597 * Groups returned 1598 * 1599 * @param structure 1600 * The structure to work with 1601 * @param group 1602 * The 'query' group 1603 * @param distance 1604 * The cutoff distance 1605 * @param includeWater 1606 * Should water residues be included in the output? 1607 * @return {@link LinkedHashSet} of {@link Group}s within at least one atom 1608 * with {@code distance} of at least one atom in {@code group} 1609 */ 1610 public static Set<Group> getGroupsWithinShell(Structure structure, 1611 Group group, double distance, boolean includeWater) { 1612 1613 Set<Group> returnList = new LinkedHashSet<>(); 1614 1615 Set<ResidueNumber> excludeGroups = new HashSet<>(); 1616 excludeGroups.add(group.getResidueNumber()); 1617 for (Atom atom : group.getAtoms()) { 1618 Set<Group> set = getGroupsWithinShell(structure, atom, 1619 excludeGroups, distance, includeWater); 1620 returnList.addAll(set); 1621 } 1622 1623 return returnList; 1624 } 1625 1626 /** 1627 * Remove all models from a Structure and keep only the first 1628 * 1629 * @param s 1630 * original Structure 1631 * @return a structure that contains only the first model 1632 * @since 3.0.5 1633 */ 1634 public static Structure removeModels(Structure s) { 1635 if (s.nrModels() == 1) 1636 return s; 1637 1638 Structure n = new StructureImpl(); 1639 // go through whole substructure and clone ... 1640 1641 // copy structure data 1642 1643 n.setPdbId(s.getPdbId()); 1644 n.setName(s.getName()); 1645 1646 // TODO: do deep copying of data! 1647 n.setPDBHeader(s.getPDBHeader()); 1648 n.setDBRefs(s.getDBRefs()); 1649 1650 n.setSites(s.getSites()); 1651 1652 n.setChains(s.getModel(0)); 1653 1654 return n; 1655 1656 } 1657 1658 /** 1659 * Removes all polymeric and solvent groups from a list of groups 1660 * 1661 */ 1662 public static List<Group> filterLigands(List<Group> allGroups) { 1663 1664 List<Group> groups = new ArrayList<>(); 1665 for (Group g : allGroups) { 1666 1667 if ( g.isPolymeric()) 1668 continue; 1669 1670 if (!g.isWater()) { 1671 groups.add(g); 1672 } 1673 } 1674 1675 return groups; 1676 } 1677 1678 /** 1679 * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)} 1680 * which creates new parsers when needed 1681 * 1682 * @param name 1683 * @return 1684 * @throws IOException 1685 * @throws StructureException 1686 */ 1687 public static Structure getStructure(String name) throws IOException, 1688 StructureException { 1689 return StructureTools.getStructure(name, null, null); 1690 } 1691 1692 /** 1693 * Flexibly get a structure from an input String. The intent of this method 1694 * is to allow any reasonable string which could refer to a structure to be 1695 * correctly parsed. The following are currently supported: 1696 * <ol> 1697 * <li>Filename (if name refers to an existing file) 1698 * <li>PDB ID 1699 * <li>SCOP domains 1700 * <li>PDP domains 1701 * <li>Residue ranges 1702 * <li>Other formats supported by AtomCache 1703 * </ol> 1704 * 1705 * @param name 1706 * Some reference to the protein structure 1707 * @param parser 1708 * A clean PDBFileParser to use if it is a file. If null, a 1709 * PDBFileParser will be instantiated if needed. 1710 * @param cache 1711 * An AtomCache to use if the structure can be fetched from the 1712 * PDB. If null, a AtomCache will be instantiated if needed. 1713 * @return A Structure object 1714 * @throws IOException 1715 * if name is an existing file, but doesn't parse correctly 1716 * @throws StructureException 1717 * if the format is unknown, or if AtomCache throws an 1718 * exception. 1719 */ 1720 public static Structure getStructure(String name, PDBFileParser parser, 1721 AtomCache cache) throws IOException, StructureException { 1722 File f = new File(FileDownloadUtils.expandUserHome(name)); 1723 if (f.exists()) { 1724 if (parser == null) { 1725 parser = new PDBFileParser(); 1726 } 1727 InputStream inStream = new FileInputStream(f); 1728 return parser.parsePDBFile(inStream); 1729 } else { 1730 if (cache == null) { 1731 cache = new AtomCache(); 1732 } 1733 return cache.getStructure(name); 1734 } 1735 } 1736 1737 /** 1738 * Cleans up the structure's alternate location (altloc) groups. All alternate location groups should have all atoms (except 1739 * in the case of microheterogenity) or when a deuterium exists. 1740 * Ensure that all the alt loc groups have all the atoms in the main group. 1741 * @param structure The Structure to be cleaned up 1742 */ 1743 public static void cleanUpAltLocs(Structure structure) { 1744 for (int i =0; i< structure.nrModels() ; i++){ 1745 for (Chain chain : structure.getModel(i)) { 1746 for (Group group : chain.getAtomGroups()) { 1747 for (Group altLocGroup : group.getAltLocs()) { 1748 for ( Atom groupAtom : group.getAtoms()) { 1749 // If this alt loc doesn't have this atom 1750 if (! altLocGroup.hasAtom(groupAtom.getName())) { 1751 // Fix for microheterogenity 1752 if (altLocGroup.getPDBName().equals(group.getPDBName())) { 1753 // If it's a Hydrogen then we check for it's Deuterated brother 1754 if(!hasDeuteratedEquiv(groupAtom, altLocGroup)){ 1755 altLocGroup.addAtom(groupAtom); 1756 } 1757 } 1758 } 1759 } 1760 } 1761 } 1762 } 1763 } 1764 } 1765 1766 /** 1767 * Check to see if an Deuterated atom has a non deuterated brother in the group. 1768 * @param atom the input atom that is putatively deuterium 1769 * @param currentGroup the group the atom is in 1770 * @return true if the atom is deuterated and it's hydrogen equive exists. 1771 */ 1772 public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) { 1773 // If it's deuterated and has a non-deuterated brother 1774 return atom.getElement() == Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'D', 'H')); 1775 } 1776 1777 /** 1778 * Check to see if a Hydrogen has a Deuterated brother in the group. 1779 * @param atom the input atom that is putatively hydorgen 1780 * @param currentGroup the group the atom is in 1781 * @return true if the atom is hydrogen and it's Deuterium equiv exists. 1782 */ 1783 public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) { 1784 // If it's hydrogen and has a deuterated brother 1785 return atom.getElement() == Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'H', 'D')); 1786 } 1787 1788 private static String replaceFirstChar(String name, char c, char d) { 1789 if(name.charAt(0)==c){ 1790 return name.replaceFirst(String.valueOf(c), String.valueOf(d)); 1791 } 1792 return name; 1793 } 1794 1795 /** 1796 * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure. 1797 * @param structure the structure 1798 * @since 5.4.0 1799 */ 1800 public static void reduceToRepresentativeAtoms(Structure structure) { 1801 for (int modelIdx = 0; modelIdx<structure.nrModels(); modelIdx++) { 1802 for (Chain c : structure.getPolyChains(modelIdx)) { 1803 for (Group g : c.getAtomGroups()) { 1804 List<Atom> atoms = g.getAtoms(); 1805 if (g.isAminoAcid()) { 1806 atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME)); 1807 } else if (g.isNucleotide()) { 1808 atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE)); 1809 } 1810 // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures 1811 } 1812 } 1813 } 1814 } 1815}