001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jan 4, 2006 021 * 022 */ 023package org.biojava.nbio.structure; 024 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Collection; 032import java.util.Collections; 033import java.util.HashMap; 034import java.util.HashSet; 035import java.util.Iterator; 036import java.util.LinkedHashSet; 037import java.util.List; 038import java.util.Map; 039import java.util.Set; 040 041import javax.vecmath.Point3d; 042 043import org.biojava.nbio.structure.align.util.AtomCache; 044import org.biojava.nbio.structure.contact.AtomContactSet; 045import org.biojava.nbio.structure.contact.Grid; 046import org.biojava.nbio.structure.io.FileParsingParameters; 047import org.biojava.nbio.structure.io.PDBFileParser; 048import org.biojava.nbio.core.util.FileDownloadUtils; 049import org.slf4j.Logger; 050import org.slf4j.LoggerFactory; 051 052/** 053 * A class that provides some tool methods. 054 * 055 * @author Andreas Prlic 056 * @author Jules Jacobsen 057 * @since 1.0 058 */ 059public class StructureTools { 060 061 private static final Logger logger = LoggerFactory.getLogger(StructureTools.class); 062 063 // Amino Acid backbone 064 /** 065 * The atom name of the backbone C-alpha atom. Note that this can be 066 * ambiguous depending on the context since Calcium atoms use the same name 067 * in PDB. 068 */ 069 public static final String CA_ATOM_NAME = "CA"; 070 071 /** 072 * The atom name for the backbone amide nitrogen 073 */ 074 public static final String N_ATOM_NAME = "N"; 075 076 /** 077 * The atom name for the backbone carbonyl 078 */ 079 public static final String C_ATOM_NAME = "C"; 080 081 /** 082 * The atom name for the backbone carbonyl oxygen 083 */ 084 public static final String O_ATOM_NAME = "O"; 085 086 /** 087 * The atom name of the side-chain C-beta atom 088 */ 089 public static final String CB_ATOM_NAME = "CB"; 090 091 // Nucleotide backbone 092 /** 093 * The atom name of the backbone C1' in RNA 094 */ 095 public static final String C1_ATOM_NAME = "C1'"; 096 /** 097 * The atom name of the backbone C2' in RNA 098 */ 099 public static final String C2_ATOM_NAME = "C2'"; 100 /** 101 * The atom name of the backbone C3' in RNA 102 */ 103 public static final String C3_ATOM_NAME = "C3'"; 104 /** 105 * The atom name of the backbone C4' in RNA 106 */ 107 public static final String C4_ATOM_NAME = "C4'"; 108 /** 109 * The atom name of the backbone O2' in RNA 110 */ 111 public static final String O2_ATOM_NAME = "O2'"; 112 /** 113 * The atom name of the backbone O3' in RNA 114 */ 115 public static final String O3_ATOM_NAME = "O3'"; 116 /** 117 * The atom name of the backbone O4' in RNA 118 */ 119 public static final String O4_ATOM_NAME = "O4'"; 120 /** 121 * The atom name of the backbone O4' in RNA 122 */ 123 public static final String O5_ATOM_NAME = "O5'"; 124 /** 125 * The atom name of the backbone O4' in RNA 126 */ 127 public static final String OP1_ATOM_NAME = "OP1"; 128 /** 129 * The atom name of the backbone O4' in RNA 130 */ 131 public static final String OP2_ATOM_NAME = "OP2"; 132 /** 133 * The atom name of the backbone phosphate in RNA 134 */ 135 public static final String P_ATOM_NAME = "P"; 136 137 /** 138 * The atom used as representative for nucleotides, equivalent to 139 * {@link #CA_ATOM_NAME} for proteins 140 */ 141 public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME; 142 143 /** 144 * The character to use for unknown compounds in sequence strings 145 */ 146 public static final char UNKNOWN_GROUP_LABEL = 'X'; 147 148 /** 149 * Below this ratio of aminoacid/nucleotide residues to the sequence total, 150 * we use simple majority of aminoacid/nucleotide residues to decide the 151 * character of the chain (protein/nucleotide) 152 */ 153 public static final double RATIO_RESIDUES_TO_TOTAL = 0.95; 154 155 /** 156 * Threshold for plausible binding of a ligand to the selected substructure 157 */ 158 public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5; 159 160 // there is a file format change in PDB 3.0 and nucleotides are being 161 // renamed 162 private static final Map<String, Character> nucleotides30; 163 private static final Map<String, Character> nucleotides23; 164 165 // amino acid 3 and 1 letter code definitions 166 private static final Map<String, Character> aminoAcids; 167 168 private static final Set<Element> hBondDonorAcceptors; 169 170 private static final Set<String> NUCLEOTIDE_BACKBONE_ATOMS; 171 private static final Set<String> AMINOACID_BACKBONE_ATOMS; 172 173 static { 174 nucleotides30 = new HashMap<>(); 175 nucleotides30.put("DA", 'A'); 176 nucleotides30.put("DC", 'C'); 177 nucleotides30.put("DG", 'G'); 178 nucleotides30.put("DT", 'T'); 179 nucleotides30.put("DI", 'I'); 180 nucleotides30.put("A", 'A'); 181 nucleotides30.put("G", 'G'); 182 nucleotides30.put("C", 'C'); 183 nucleotides30.put("U", 'U'); 184 nucleotides30.put("I", 'I'); 185 186 // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced 187 // with these: 188 nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 189 nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl 190 nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 191 nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3' 192 // terminator) 193 nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant 194 nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator 195 nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic 196 // sugar 197 nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine 198 nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine 199 nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide 200 // nucleic acid, with 201 // added methyl 202 nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide 203 // nucleic acid, with 204 // added methyl 205 nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide 206 // nucleic acid 207 nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide 208 // nucleic acid 209 nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide 210 // nucleic acid 211 nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide 212 // nucleic acid 213 nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant 214 215 // store nucleic acids (C, G, A, T, U, and I), and 216 // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), 217 // and 218 nucleotides23 = new HashMap<>(); 219 String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A", 220 "+T", "+U", "+I" }; 221 for (String n : names) { 222 nucleotides23.put(n, n.charAt(n.length() - 1)); 223 } 224 225 aminoAcids = new HashMap<>(); 226 aminoAcids.put("GLY", 'G'); 227 aminoAcids.put("ALA", 'A'); 228 aminoAcids.put("VAL", 'V'); 229 aminoAcids.put("LEU", 'L'); 230 aminoAcids.put("ILE", 'I'); 231 aminoAcids.put("PHE", 'F'); 232 aminoAcids.put("TYR", 'Y'); 233 aminoAcids.put("TRP", 'W'); 234 aminoAcids.put("PRO", 'P'); 235 aminoAcids.put("HIS", 'H'); 236 aminoAcids.put("LYS", 'K'); 237 aminoAcids.put("ARG", 'R'); 238 aminoAcids.put("SER", 'S'); 239 aminoAcids.put("THR", 'T'); 240 aminoAcids.put("GLU", 'E'); 241 aminoAcids.put("GLN", 'Q'); 242 aminoAcids.put("ASP", 'D'); 243 aminoAcids.put("ASN", 'N'); 244 aminoAcids.put("CYS", 'C'); 245 aminoAcids.put("MET", 'M'); 246 // MSE is only found as a molecular replacement for MET 247 aminoAcids.put("MSE", 'M'); 248 // 'non-standard', genetically encoded 249 // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html 250 // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE' 251 // likewise 'PYL' (IUBMB) and 'PYH' (PDB) 252 aminoAcids.put("CSE", 'U'); 253 aminoAcids.put("SEC", 'U'); 254 aminoAcids.put("PYH", 'O'); 255 aminoAcids.put("PYL", 'O'); 256 257 hBondDonorAcceptors = new HashSet<>(); 258 hBondDonorAcceptors.add(Element.N); 259 hBondDonorAcceptors.add(Element.O); 260 hBondDonorAcceptors.add(Element.S); 261 262 NUCLEOTIDE_BACKBONE_ATOMS = new HashSet<>(Arrays.asList(C1_ATOM_NAME, C2_ATOM_NAME, C3_ATOM_NAME, C4_ATOM_NAME, O2_ATOM_NAME, O3_ATOM_NAME, O4_ATOM_NAME, O5_ATOM_NAME, OP1_ATOM_NAME, OP2_ATOM_NAME, P_ATOM_NAME)); 263 AMINOACID_BACKBONE_ATOMS = new HashSet<>(Arrays.asList(CA_ATOM_NAME, C_ATOM_NAME, N_ATOM_NAME, O_ATOM_NAME)); 264 265 } 266 267 /** 268 * Count how many Atoms are contained within a Structure object. 269 * 270 * @param s 271 * the structure object 272 * @return the number of Atoms in this Structure 273 */ 274 public static int getNrAtoms(Structure s) { 275 276 int nrAtoms = 0; 277 278 Iterator<Group> iter = new GroupIterator(s); 279 280 while (iter.hasNext()) { 281 Group g = iter.next(); 282 nrAtoms += g.size(); 283 } 284 285 return nrAtoms; 286 } 287 288 /** 289 * Count how many groups are contained within a structure object. 290 * 291 * @param s 292 * the structure object 293 * @return the number of groups in the structure 294 */ 295 public static int getNrGroups(Structure s) { 296 int nrGroups = 0; 297 298 List<Chain> chains = s.getChains(0); 299 for (Chain c : chains) { 300 nrGroups += c.getAtomLength(); 301 } 302 return nrGroups; 303 } 304 305 /** 306 * Returns an array of the requested Atoms from the Structure object. 307 * Iterates over all groups and checks if the requested atoms are in this 308 * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} 309 * group. If the group does not contain all requested atoms then no atoms 310 * are added for that group. For structures with more than one model, only 311 * model 0 will be used. 312 * 313 * @param s 314 * the structure to get the atoms from 315 * 316 * @param atomNames 317 * contains the atom names to be used. 318 * @return an Atom[] array 319 */ 320 public static Atom[] getAtomArray(Structure s, String[] atomNames) { 321 List<Chain> chains = s.getModel(0); 322 323 List<Atom> atoms = new ArrayList<>(); 324 325 extractAtoms(atomNames, chains, atoms); 326 return atoms.toArray(new Atom[0]); 327 328 } 329 330 /** 331 * Returns an array of the requested Atoms from the Structure object. In 332 * contrast to {@link #getAtomArray(Structure, String[])} this method 333 * iterates over all chains. Iterates over all chains and groups and checks 334 * if the requested atoms are in this group, no matter if this is a 335 * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not 336 * contain all requested atoms then no atoms are added for that group. For 337 * structures with more than one model, only model 0 will be used. 338 * 339 * @param s 340 * the structure to get the atoms from 341 * 342 * @param atomNames 343 * contains the atom names to be used. 344 * @return an Atom[] array 345 */ 346 public static Atom[] getAtomArrayAllModels(Structure s, 347 String[] atomNames) { 348 349 List<Atom> atoms = new ArrayList<>(); 350 351 for (int i = 0; i < s.nrModels(); i++) { 352 List<Chain> chains = s.getModel(i); 353 extractAtoms(atomNames, chains, atoms); 354 } 355 return atoms.toArray(new Atom[0]); 356 357 } 358 359 /** 360 * Convert all atoms of the structure (all models) into an Atom array 361 * 362 * @param s 363 * input structure 364 * @return all atom array 365 */ 366 public static Atom[] getAllAtomArray(Structure s) { 367 List<Atom> atoms = new ArrayList<>(); 368 369 AtomIterator iter = new AtomIterator(s); 370 while (iter.hasNext()) { 371 Atom a = iter.next(); 372 atoms.add(a); 373 } 374 return atoms.toArray(new Atom[0]); 375 } 376 /** 377 * Convert all atoms of the structure (specified model) into an Atom array 378 * 379 * @param s 380 * input structure 381 * @return all atom array 382 */ 383 public static Atom[] getAllAtomArray(Structure s, int model) { 384 List<Atom> atoms = new ArrayList<>(); 385 386 AtomIterator iter = new AtomIterator(s,model); 387 while (iter.hasNext()) { 388 Atom a = iter.next(); 389 atoms.add(a); 390 } 391 return atoms.toArray(new Atom[0]); 392 393 } 394 395 /** 396 * Returns and array of all atoms of the chain, including 397 * Hydrogens (if present) and all HETATOMs. Waters are not included. 398 * 399 * @param c 400 * input chain 401 * @return all atom array 402 */ 403 public static Atom[] getAllAtomArray(Chain c) { 404 List<Atom> atoms = new ArrayList<>(); 405 406 for (Group g : c.getAtomGroups()) { 407 if (g.isWater()) 408 continue; 409 atoms.addAll(g.getAtoms()); 410 } 411 return atoms.toArray(new Atom[0]); 412 } 413 414 /** 415 * List of groups from the structure not included in ca (e.g. ligands). 416 * 417 * Unaligned groups are searched from all chains referenced in ca, as well 418 * as any chains in the first model of the structure from ca[0], if any. 419 * 420 * @param ca an array of atoms 421 * @return 422 */ 423 public static List<Group> getUnalignedGroups(Atom[] ca) { 424 Set<Chain> chains = new HashSet<>(); 425 Set<Group> caGroups = new HashSet<>(); 426 427 // Create list of all chains in this structure 428 Structure s = null; 429 if (ca.length > 0) { 430 Group g = ca[0].getGroup(); 431 if (g != null) { 432 Chain c = g.getChain(); 433 if (c != null) { 434 s = c.getStructure(); 435 } 436 } 437 } 438 if (s != null) { 439 // Add all chains from the structure 440 chains.addAll(s.getChains(0)); 441 } 442 443 // Add groups and chains from ca 444 for (Atom a : ca) { 445 Group g = a.getGroup(); 446 if (g != null) { 447 caGroups.add(g); 448 449 Chain c = g.getChain(); 450 if (c != null) { 451 chains.add(c); 452 } 453 } 454 } 455 456 // Iterate through all chains, finding groups not in ca 457 List<Group> unadded = new ArrayList<>(); 458 for (Chain c : chains) { 459 for (Group g : c.getAtomGroups()) { 460 if (!caGroups.contains(g)) { 461 unadded.add(g); 462 } 463 } 464 } 465 return unadded; 466 } 467 468 /** 469 * Finds all ligand groups from the target which fall within the cutoff distance 470 * of some atom from the query set. 471 * 472 * @param target Set of groups including the ligands 473 * @param query Atom selection 474 * @param cutoff Distance from query atoms to consider, in angstroms 475 * @return All groups from the target with at least one atom within cutoff of a query atom 476 * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF 477 */ 478 public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) { 479 // Spatial hashing of the reduced structure 480 Grid grid = new Grid(cutoff); 481 grid.addAtoms(query); 482 483 List<Group> ligands = new ArrayList<>(); 484 for(Group g :target ) { 485 // don't worry about waters 486 if(g.isWater()) { 487 continue; 488 } 489 490 if(g.isPolymeric() ) { 491 // Polymers aren't ligands 492 continue; 493 } 494 495 // It is a ligand! 496 497 // Check that it's within cutoff of something in reduced 498 List<Atom> groupAtoms = g.getAtoms(); 499 if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) { 500 continue; 501 } 502 503 ligands.add(g); 504 } 505 return ligands; 506 } 507 508 /** 509 * Adds a particular group to a structure. A new chain will be created if necessary. 510 * 511 * <p>When adding multiple groups, pass the return value of one call as the 512 * chainGuess parameter of the next call for efficiency. 513 * <pre> 514 * Chain guess = null; 515 * for(Group g : groups) { 516 * guess = addGroupToStructure(s, g, guess ); 517 * } 518 * </pre> 519 * @param s structure to receive the group 520 * @param g group to add 521 * @param chainGuess (optional) If not null, should be a chain from s. Used 522 * to improve performance when adding many groups from the same chain 523 * @param clone Indicates whether the input group should be cloned before 524 * being added to the new chain 525 * @return the chain g was added to 526 */ 527 public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) { 528 // Find or create the chain 529 String chainId = g.getChainId(); 530 assert !chainId.isEmpty(); 531 Chain chain; 532 if (chainGuess != null && chainGuess.getId().equals(chainId)) { 533 // previously guessed chain 534 chain = chainGuess; 535 } else { 536 // Try to guess 537 chain = s.getChain(chainId, model); 538 if (chain == null) { 539 // no chain found 540 chain = new ChainImpl(); 541 chain.setId(chainId); 542 543 Chain oldChain = g.getChain(); 544 chain.setName(oldChain.getName()); 545 546 EntityInfo oldEntityInfo = oldChain.getEntityInfo(); 547 548 EntityInfo newEntityInfo; 549 if (oldEntityInfo == null) { 550 newEntityInfo = new EntityInfo(); 551 s.addEntityInfo(newEntityInfo); 552 } else { 553 newEntityInfo = s.getEntityById(oldEntityInfo.getMolId()); 554 if (newEntityInfo == null) { 555 newEntityInfo = new EntityInfo(oldEntityInfo); 556 s.addEntityInfo(newEntityInfo); 557 } 558 } 559 newEntityInfo.addChain(chain); 560 chain.setEntityInfo(newEntityInfo); 561 562 // TODO Do the seqres need to be cloned too? -SB 2016-10-7 563 chain.setSeqResGroups(oldChain.getSeqResGroups()); 564 chain.setSeqMisMatches(oldChain.getSeqMisMatches()); 565 566 s.addChain(chain, model); 567 } 568 } 569 570 // Add cloned group 571 if (clone) { 572 g = (Group) g.clone(); 573 } 574 chain.addGroup(g); 575 576 return chain; 577 } 578 579 /** 580 * Add a list of groups to a new structure. Chains will be automatically 581 * created in the new structure as needed. 582 * @param s structure to receive the group 583 * @param groups groups to add 584 * @param model model number 585 * @param clone Indicates whether the input groups should be cloned before 586 * being added to the new chain 587 */ 588 public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) { 589 Chain chainGuess = null; 590 for(Group g : groups) { 591 chainGuess = addGroupToStructure(s, g, model, chainGuess, clone); 592 } 593 } 594 595 /** 596 * Expand a set of atoms into all groups from the same structure. 597 * 598 * If the structure is set, only the first atom is used (assuming all 599 * atoms come from the same original structure). 600 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 601 * searches all chains of all atoms for groups. 602 * @param atoms Sample of atoms 603 * @return All groups from all chains accessible from the input atoms 604 */ 605 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) { 606 return getAllGroupsFromSubset(atoms,null); 607 } 608 609 /** 610 * Expand a set of atoms into all groups from the same structure. 611 * 612 * If the structure is set, only the first atom is used (assuming all 613 * atoms come from the same original structure). 614 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 615 * searches all chains of all atoms for groups. 616 * @param atoms Sample of atoms 617 * @param types Type of groups to return (useful for getting only ligands, for instance). 618 * Null gets all groups. 619 * @return All groups from all chains accessible from the input atoms 620 */ 621 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) { 622 // Get the full structure 623 Structure s = null; 624 if (atoms.length > 0) { 625 Group g = atoms[0].getGroup(); 626 if (g != null) { 627 Chain c = g.getChain(); 628 if (c != null) { 629 s = c.getStructure(); 630 } 631 } 632 } 633 // Collect all groups from the structure 634 Set<Chain> allChains = new HashSet<>(); 635 if( s != null ) { 636 allChains.addAll(s.getChains()); 637 } 638 // In case the structure wasn't set, need to use ca chains too 639 for(Atom a : atoms) { 640 Group g = a.getGroup(); 641 if(g != null) { 642 Chain c = g.getChain(); 643 if( c != null ) { 644 allChains.add(c); 645 } 646 } 647 } 648 649 if(allChains.isEmpty() ) { 650 return Collections.emptySet(); 651 } 652 653 // Extract all ligand groups 654 Set<Group> full = new HashSet<>(); 655 for(Chain c : allChains) { 656 if(types == null) { 657 full.addAll(c.getAtomGroups()); 658 } else { 659 full.addAll(c.getAtomGroups(types)); 660 } 661 } 662 663 return full; 664 } 665 666 667 /** 668 * Returns and array of all non-Hydrogen atoms in the given Structure, 669 * optionally including HET atoms or not. Waters are not included. 670 * 671 * @param s 672 * @param hetAtoms 673 * if true HET atoms are included in array, if false they are not 674 * @return 675 */ 676 public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) { 677 AtomIterator iter = new AtomIterator(s); 678 return getAllNonHAtomArray(s, hetAtoms, iter); 679 } 680 /** 681 * Returns and array of all non-Hydrogen atoms in the given Structure, 682 * optionally including HET atoms or not. Waters are not included. 683 * 684 * @param s 685 * @param hetAtoms 686 * if true HET atoms are included in array, if false they are not 687 * @param modelNr Model number to draw atoms from 688 * @return 689 */ 690 public static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) { 691 AtomIterator iter = new AtomIterator(s,modelNr); 692 return getAllNonHAtomArray(s, hetAtoms, iter); 693 } 694 695 private static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) { 696 List<Atom> atoms = new ArrayList<>(); 697 698 while (iter.hasNext()) { 699 Atom a = iter.next(); 700 if (a.getElement() == Element.H) 701 continue; 702 703 Group g = a.getGroup(); 704 705 if (g.isWater()) 706 continue; 707 708 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 709 continue; 710 711 atoms.add(a); 712 } 713 return atoms.toArray(new Atom[0]); 714 } 715 716 /** 717 * Returns and array of all non-Hydrogen atoms in the given Chain, 718 * optionally including HET atoms or not Waters are not included. 719 * 720 * @param c 721 * @param hetAtoms 722 * if true HET atoms are included in array, if false they are not 723 * @return 724 */ 725 public static Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) { 726 List<Atom> atoms = new ArrayList<>(); 727 728 for (Group g : c.getAtomGroups()) { 729 if (g.isWater()) 730 continue; 731 for (Atom a : g.getAtoms()) { 732 733 if (a.getElement() == Element.H) 734 continue; 735 736 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 737 continue; 738 739 atoms.add(a); 740 } 741 } 742 return atoms.toArray(new Atom[0]); 743 } 744 745 /** 746 * Returns and array of all non-Hydrogen atoms coordinates in the given Chain, 747 * optionally including HET atoms or not Waters are not included. 748 * 749 * @param c 750 * @param hetAtoms 751 * if true HET atoms are included in array, if false they are not 752 * @return 753 */ 754 public static Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) { 755 List<Point3d> atoms = new ArrayList<>(); 756 757 for (Group g : c.getAtomGroups()) { 758 if (g.isWater()) 759 continue; 760 for (Atom a : g.getAtoms()) { 761 762 if (a.getElement() == Element.H) 763 continue; 764 765 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 766 continue; 767 768 atoms.add(a.getCoordsAsPoint3d()); 769 } 770 } 771 return atoms.toArray(new Point3d[0]); 772 } 773 774 /** 775 * Adds to the given atoms list, all atoms of groups that contained all 776 * requested atomNames, i.e. if a group does not contain all of the 777 * requested atom names, its atoms won't be added. 778 * 779 * @param atomNames 780 * @param chains 781 * @param atoms 782 */ 783 private static void extractAtoms(String[] atomNames, List<Chain> chains, List<Atom> atoms) { 784 785 for (Chain c : chains) { 786 787 for (Group g : c.getAtomGroups()) { 788 789 // a temp container for the atoms of this group 790 List<Atom> thisGroupAtoms = new ArrayList<>(); 791 // flag to check if this group contains all the requested atoms. 792 boolean thisGroupAllAtoms = true; 793 for (String atomName : atomNames) { 794 Atom a = g.getAtom(atomName); 795 796 if (a == null) { 797 // this group does not have a required atom, skip it... 798 thisGroupAllAtoms = false; 799 break; 800 } 801 thisGroupAtoms.add(a); 802 } 803 if (thisGroupAllAtoms) { 804 // add the atoms of this group to the array. 805 atoms.addAll(thisGroupAtoms); 806 } 807 } 808 } 809 } 810 811 /** 812 * Returns an array of the requested Atoms from the Chain object. Iterates 813 * over all groups and checks if the requested atoms are in this group, no 814 * matter if this is a AminoAcid or Hetatom group. If the group does not 815 * contain all requested atoms then no atoms are added for that group. 816 * 817 * @param c 818 * the Chain to get the atoms from 819 * 820 * @param atomNames 821 * contains the atom names to be used. 822 * @return an Atom[] array 823 */ 824 public static Atom[] getAtomArray(Chain c, String[] atomNames) { 825 826 List<Atom> atoms = new ArrayList<>(); 827 828 for (Group g : c.getAtomGroups()) { 829 830 // a temp container for the atoms of this group 831 List<Atom> thisGroupAtoms = new ArrayList<>(); 832 // flag to check if this group contains all the requested atoms. 833 boolean thisGroupAllAtoms = true; 834 for (String atomName : atomNames) { 835 Atom a = g.getAtom(atomName); 836 if (a == null) { 837 logger.debug("Group {} ({}) does not have the required atom '{}'", g.getResidueNumber(), g.getPDBName(), atomName); 838 // this group does not have a required atom, skip it... 839 thisGroupAllAtoms = false; 840 break; 841 } 842 thisGroupAtoms.add(a); 843 } 844 845 if (thisGroupAllAtoms) { 846 // add the atoms of this group to the array. 847 atoms.addAll(thisGroupAtoms); 848 } 849 850 } 851 return atoms.toArray(new Atom[0]); 852 853 } 854 855 /** 856 * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and 857 * has CA name will be returned. 858 * 859 * @param c 860 * the structure object 861 * @return an Atom[] array 862 * @see #getRepresentativeAtomArray(Chain) 863 */ 864 public static Atom[] getAtomCAArray(Chain c) { 865 List<Atom> atoms = new ArrayList<>(); 866 867 for (Group g : c.getAtomGroups()) { 868 if (g.hasAtom(CA_ATOM_NAME) 869 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 870 atoms.add(g.getAtom(CA_ATOM_NAME)); 871 } 872 } 873 874 return atoms.toArray(new Atom[0]); 875 } 876 877 /** 878 * Gets a representative atom for each group that is part of the chain 879 * backbone. Note that modified aminoacids won't be returned as part of the 880 * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the 881 * structure. 882 * 883 * For amino acids, the representative is a CA carbon. For nucleotides, the 884 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 885 * types will be ignored. 886 * 887 * @param c 888 * @return representative Atoms of the chain backbone 889 * @since Biojava 4.1.0 890 */ 891 public static Atom[] getRepresentativeAtomArray(Chain c) { 892 List<Atom> atoms = new ArrayList<>(); 893 894 for (Group g : c.getAtomGroups()) { 895 896 switch (g.getType()) { 897 case AMINOACID: 898 if (g.hasAtom(CA_ATOM_NAME) 899 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 900 atoms.add(g.getAtom(CA_ATOM_NAME)); 901 } 902 break; 903 case NUCLEOTIDE: 904 if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) { 905 atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE)); 906 } 907 break; 908 default: 909 // don't add 910 } 911 } 912 913 return atoms.toArray(new Atom[0]); 914 915 } 916 917 /** 918 * Provides an equivalent copy of Atoms in a new array. Clones everything, 919 * starting with parent groups and chains. The chain will only contain 920 * groups that are part of the input array. 921 * 922 * @param ca 923 * array of representative atoms, e.g. CA atoms 924 * @return Atom array 925 * @since Biojava 4.1.0 926 */ 927 public static Atom[] cloneAtomArray(Atom[] ca) { 928 Atom[] newCA = new Atom[ca.length]; 929 930 List<Chain> model = new ArrayList<>(); 931 int apos = -1; 932 for (Atom a : ca) { 933 apos++; 934 Group parentG = a.getGroup(); 935 Chain parentC = parentG.getChain(); 936 937 Chain newChain = null; 938 for (Chain c : model) { 939 if (c.getName().equals(parentC.getName())) { 940 newChain = c; 941 break; 942 } 943 } 944 if (newChain == null) { 945 newChain = new ChainImpl(); 946 newChain.setId(parentC.getId()); 947 newChain.setName(parentC.getName()); 948 model.add(newChain); 949 } 950 951 Group parentN = (Group) parentG.clone(); 952 953 newCA[apos] = parentN.getAtom(a.getName()); 954 try { 955 // if the group doesn't exist yet, this produces a StructureException 956 newChain.getGroupByPDB(parentN.getResidueNumber()); 957 } catch (StructureException e) { 958 // the group doesn't exist yet in the newChain, let's add it 959 newChain.addGroup(parentN); 960 } 961 962 } 963 return newCA; 964 } 965 966 /** 967 * Clone a set of representative Atoms, but returns the parent groups 968 * 969 * @param ca 970 * Atom array 971 * @return Group array 972 */ 973 public static Group[] cloneGroups(Atom[] ca) { 974 Group[] newGroup = new Group[ca.length]; 975 976 List<Chain> model = new ArrayList<>(); 977 int apos = -1; 978 for (Atom a : ca) { 979 apos++; 980 Group parentG = a.getGroup(); 981 Chain parentC = parentG.getChain(); 982 983 Chain newChain = null; 984 for (Chain c : model) { 985 if (c.getName().equals(parentC.getName())) { 986 newChain = c; 987 break; 988 } 989 } 990 if (newChain == null) { 991 newChain = new ChainImpl(); 992 newChain.setName(parentC.getName()); 993 model.add(newChain); 994 } 995 996 Group ng = (Group) parentG.clone(); 997 newGroup[apos] = ng; 998 newChain.addGroup(ng); 999 } 1000 return newGroup; 1001 } 1002 1003 /** 1004 * Utility method for working with circular permutations. Creates a 1005 * duplicated and cloned set of Calpha atoms from the input array. 1006 * 1007 * @param ca2 1008 * atom array 1009 * @return cloned and duplicated set of input array 1010 */ 1011 public static Atom[] duplicateCA2(Atom[] ca2) { 1012 // we don't want to rotate input atoms, do we? 1013 Atom[] ca2clone = new Atom[ca2.length * 2]; 1014 1015 int pos = 0; 1016 1017 Chain c = null; 1018 String prevChainId = ""; 1019 for (Atom a : ca2) { 1020 Group g = (Group) a.getGroup().clone(); // works because each group 1021 // has only a single atom 1022 1023 if (c == null) { 1024 c = new ChainImpl(); 1025 Chain orig = a.getGroup().getChain(); 1026 c.setId(orig.getId()); 1027 c.setName(orig.getName()); 1028 } else { 1029 Chain orig = a.getGroup().getChain(); 1030 if (!orig.getId().equals(prevChainId)) { 1031 c = new ChainImpl(); 1032 c.setId(orig.getId()); 1033 c.setName(orig.getName()); 1034 } 1035 } 1036 1037 c.addGroup(g); 1038 ca2clone[pos] = g.getAtom(a.getName()); 1039 1040 pos++; 1041 } 1042 1043 // Duplicate ca2! 1044 c = null; 1045 prevChainId = ""; 1046 for (Atom a : ca2) { 1047 Group g = (Group) a.getGroup().clone(); 1048 1049 if (c == null) { 1050 c = new ChainImpl(); 1051 Chain orig = a.getGroup().getChain(); 1052 c.setId(orig.getId()); 1053 c.setName(orig.getName()); 1054 } else { 1055 Chain orig = a.getGroup().getChain(); 1056 if (!orig.getId().equals(prevChainId)) { 1057 c = new ChainImpl(); 1058 c.setId(orig.getId()); 1059 c.setName(orig.getName()); 1060 } 1061 } 1062 1063 c.addGroup(g); 1064 ca2clone[pos] = g.getAtom(a.getName()); 1065 1066 pos++; 1067 } 1068 1069 return ca2clone; 1070 1071 } 1072 1073 /** 1074 * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and 1075 * has CA name will be returned. 1076 * 1077 * @param s 1078 * the structure object 1079 * @return an Atom[] array 1080 * @see #getRepresentativeAtomArray(Structure) 1081 */ 1082 public static Atom[] getAtomCAArray(Structure s) { 1083 1084 List<Atom> atoms = new ArrayList<>(); 1085 1086 for (Chain c : s.getChains()) { 1087 for (Group g : c.getAtomGroups()) { 1088 if (g.hasAtom(CA_ATOM_NAME) 1089 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 1090 atoms.add(g.getAtom(CA_ATOM_NAME)); 1091 } 1092 } 1093 } 1094 1095 return atoms.toArray(new Atom[atoms.size()]); 1096 } 1097 1098 /** 1099 * Gets a representative atom for each group that is part of the chain 1100 * backbone. Note that modified aminoacids won't be returned as part of the 1101 * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the 1102 * structure. 1103 * 1104 * For amino acids, the representative is a CA carbon. For nucleotides, the 1105 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 1106 * types will be ignored. 1107 * 1108 * @param s 1109 * Input structure 1110 * @return representative Atoms of the structure backbone 1111 * @since Biojava 4.1.0 1112 */ 1113 public static Atom[] getRepresentativeAtomArray(Structure s) { 1114 1115 List<Atom> atoms = new ArrayList<>(); 1116 1117 for (Chain c : s.getChains()) { 1118 Atom[] chainAtoms = getRepresentativeAtomArray(c); 1119 atoms.addAll(Arrays.asList(chainAtoms)); 1120 } 1121 1122 return atoms.toArray(new Atom[0]); 1123 } 1124 1125 /** 1126 * Return an Atom array of the main chain atoms: CA, C, N, O Any group that 1127 * contains those atoms will be included, be it a standard aminoacid or not 1128 * 1129 * @param s 1130 * the structure object 1131 * @return an Atom[] array 1132 */ 1133 public static Atom[] getBackboneAtomArray(Structure s) { 1134 List<Atom> atoms = new ArrayList<>(); 1135 for (Chain c : s.getChains()) { 1136 for (Group g : c.getAtomGroups()) { 1137 if (g.hasAminoAtoms()) { 1138 if (g.getType() == GroupType.NUCLEOTIDE) { 1139 addNucleotideAndAminoAtoms(atoms, g, NUCLEOTIDE_BACKBONE_ATOMS); 1140 } else { 1141 addNucleotideAndAminoAtoms(atoms, g, AMINOACID_BACKBONE_ATOMS); 1142 } 1143 } 1144 } 1145 } 1146 return atoms.toArray(new Atom[0]); 1147 } 1148 1149 /** 1150 * This method will be used to add the Nucleotide and Amino atoms to the backbone Atom arrays based on the pre-defined Atom names. 1151 * @param atoms 1152 * @param g 1153 * @param atomNames 1154 */ 1155 private static void addNucleotideAndAminoAtoms(List<Atom> atoms, Group g, Set<String> atomNames) { 1156 for (Atom a : g.getAtoms()) { 1157 if (atomNames.contains(a.getName())) { 1158 atoms.add(a); 1159 } 1160 } 1161 } 1162 1163 1164 /** 1165 * Convert three character amino acid codes into single character e.g. 1166 * convert CYS to C. Valid 3-letter codes will be those of the standard 20 1167 * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids} 1168 * map) 1169 * 1170 * @return the 1 letter code, or null if the given 3 letter code does not 1171 * correspond to an amino acid code 1172 * @param groupCode3 1173 * a three character amino acid representation String 1174 * @see #get1LetterCode(String) 1175 */ 1176 public static Character get1LetterCodeAmino(String groupCode3) { 1177 return aminoAcids.get(groupCode3); 1178 } 1179 1180 /** 1181 * Convert a three letter amino acid or nucleotide code into a single 1182 * character code. If the code does not correspond to an amino acid or 1183 * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}. 1184 * 1185 * Returned null for nucleotides prior to version 4.0.1. 1186 * 1187 * @param groupCode3 1188 * three letter representation 1189 * @return The 1-letter abbreviation 1190 */ 1191 public static Character get1LetterCode(String groupCode3) { 1192 1193 Character code1; 1194 1195 // is it a standard amino acid ? 1196 code1 = get1LetterCodeAmino(groupCode3); 1197 1198 if (code1 == null) { 1199 // hm groupCode3 is not standard 1200 // perhaps it is a nucleotide? 1201 groupCode3 = groupCode3.trim(); 1202 if (isNucleotide(groupCode3)) { 1203 code1 = nucleotides30.get(groupCode3); 1204 if (code1 == null) { 1205 code1 = nucleotides23.get(groupCode3); 1206 } 1207 if (code1 == null) { 1208 code1 = UNKNOWN_GROUP_LABEL; 1209 } 1210 } else { 1211 // does not seem to be so let's assume it is 1212 // nonstandard aminoacid and label it "X" 1213 // logger.warning("unknown group name "+groupCode3 ); 1214 code1 = UNKNOWN_GROUP_LABEL; 1215 } 1216 } 1217 1218 return code1; 1219 1220 } 1221 1222 /** 1223 * Test if the three-letter code of an ATOM entry corresponds to a 1224 * nucleotide or to an aminoacid. 1225 * 1226 * @param groupCode3 1227 * 3-character code for a group. 1228 * 1229 */ 1230 public static boolean isNucleotide(String groupCode3) { 1231 String code = groupCode3.trim(); 1232 return nucleotides30.containsKey(code) 1233 || nucleotides23.containsKey(code); 1234 } 1235 1236 public static String convertAtomsToSeq(Atom[] atoms) { 1237 1238 StringBuilder buf = new StringBuilder(); 1239 Group prevGroup = null; 1240 for (Atom a : atoms) { 1241 Group g = a.getGroup(); 1242 if (prevGroup != null) { 1243 if (prevGroup.equals(g)) { 1244 // we add each group only once. 1245 continue; 1246 } 1247 } 1248 String code3 = g.getPDBName(); 1249 Character code1 = get1LetterCodeAmino(code3); 1250 if (code1 == null) 1251 code1 = UNKNOWN_GROUP_LABEL; 1252 1253 buf.append(code1); 1254 1255 prevGroup = g; 1256 1257 } 1258 return buf.toString(); 1259 } 1260 1261 /** 1262 * Get a group represented by a ResidueNumber. 1263 * 1264 * @param struc 1265 * a {@link Structure} 1266 * @param pdbResNum 1267 * a {@link ResidueNumber} 1268 * @return a group in the structure that is represented by the pdbResNum. 1269 * @throws StructureException 1270 * if the group cannot be found. 1271 */ 1272 public static Group getGroupByPDBResidueNumber(Structure struc, 1273 ResidueNumber pdbResNum) throws StructureException { 1274 if (struc == null || pdbResNum == null) { 1275 throw new IllegalArgumentException("Null argument(s)."); 1276 } 1277 1278 Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName()); 1279 1280 return chain.getGroupByPDB(pdbResNum); 1281 } 1282 1283 /** 1284 * Returns the set of intra-chain contacts for the given chain for given 1285 * atom names, i.e. the contact map. Uses a spatial hashing algorithm that 1286 * speeds up the calculation without need of full distance matrix. The 1287 * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs 1288 * to be set to true for this to work. 1289 * 1290 * @param chain 1291 * @param atomNames 1292 * the array with atom names to be used. Beware: CA will do both 1293 * C-alphas an Calciums if null all non-H atoms of non-hetatoms 1294 * will be used 1295 * @param cutoff 1296 * @return 1297 */ 1298 public static AtomContactSet getAtomsInContact(Chain chain, 1299 String[] atomNames, double cutoff) { 1300 Grid grid = new Grid(cutoff); 1301 1302 Atom[] atoms = null; 1303 if (atomNames == null) { 1304 atoms = getAllNonHAtomArray(chain, false); 1305 } else { 1306 atoms = getAtomArray(chain, atomNames); 1307 } 1308 // If tha 1309 if(atoms.length==0){ 1310 logger.warn("No atoms found for buidling grid!"); 1311 return new AtomContactSet(cutoff); 1312 } 1313 grid.addAtoms(atoms); 1314 1315 return grid.getAtomContacts(); 1316 } 1317 1318 /** 1319 * Returns the set of intra-chain contacts for the given chain for all non-H 1320 * atoms of non-hetatoms, i.e. the contact map. Uses a spatial hashing 1321 * algorithm that speeds up the calculation without need of full distance 1322 * matrix. The parsing mode 1323 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1324 * true for this to work. 1325 * 1326 * @param chain 1327 * @param cutoff 1328 * @return 1329 */ 1330 public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) { 1331 return getAtomsInContact(chain, (String[]) null, cutoff); 1332 } 1333 1334 /** 1335 * Returns the set of intra-chain contacts for the given chain for C-alpha 1336 * atoms (including non-standard aminoacids appearing as HETATM groups), 1337 * i.e. the contact map. Uses a spatial hashing algorithm that speeds up 1338 * the calculation without need of full distance matrix. The parsing mode 1339 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1340 * true for this to work. 1341 * 1342 * @param chain 1343 * @param cutoff 1344 * @return 1345 * @see #getRepresentativeAtomsInContact(Chain, double) 1346 */ 1347 public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) { 1348 Grid grid = new Grid(cutoff); 1349 1350 Atom[] atoms = getAtomCAArray(chain); 1351 1352 grid.addAtoms(atoms); 1353 1354 return grid.getAtomContacts(); 1355 } 1356 1357 /** 1358 * Returns the set of intra-chain contacts for the given chain for C-alpha 1359 * or C3' atoms (including non-standard aminoacids appearing as HETATM 1360 * groups), i.e. the contact map. Uses a spatial hashing algorithm that 1361 * speeds up the calculation without need of full distance matrix. 1362 * 1363 * @param chain 1364 * @param cutoff 1365 * @return 1366 * @since Biojava 4.1.0 1367 */ 1368 public static AtomContactSet getRepresentativeAtomsInContact(Chain chain, 1369 double cutoff) { 1370 Grid grid = new Grid(cutoff); 1371 1372 Atom[] atoms = getRepresentativeAtomArray(chain); 1373 1374 grid.addAtoms(atoms); 1375 1376 return grid.getAtomContacts(); 1377 } 1378 1379 /** 1380 * Returns the set of inter-chain contacts between the two given chains for 1381 * the given atom names. Uses a spatial hashing algorithm that speeds up 1382 * the calculation without need of full distance matrix. The parsing mode 1383 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1384 * true for this to work. 1385 * 1386 * @param chain1 1387 * @param chain2 1388 * @param atomNames 1389 * the array with atom names to be used. For Calphas use {"CA"}, 1390 * if null all non-H atoms will be used. Note HET atoms are 1391 * ignored unless this parameter is null. 1392 * @param cutoff 1393 * @param hetAtoms 1394 * if true HET atoms are included, if false they are not 1395 * @return 1396 */ 1397 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1398 String[] atomNames, double cutoff, boolean hetAtoms) { 1399 Grid grid = new Grid(cutoff); 1400 Atom[] atoms1 = null; 1401 Atom[] atoms2 = null; 1402 if (atomNames == null) { 1403 atoms1 = getAllNonHAtomArray(chain1, hetAtoms); 1404 atoms2 = getAllNonHAtomArray(chain2, hetAtoms); 1405 } else { 1406 atoms1 = getAtomArray(chain1, atomNames); 1407 atoms2 = getAtomArray(chain2, atomNames); 1408 } 1409 grid.addAtoms(atoms1, atoms2); 1410 1411 return grid.getAtomContacts(); 1412 } 1413 1414 /** 1415 * Returns the set of inter-chain contacts between the two given chains for 1416 * all non-H atoms. Uses a spatial hashing algorithm that speeds up the 1417 * calculation without need of full distance matrix. The parsing mode 1418 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1419 * true for this to work. 1420 * 1421 * @param chain1 1422 * @param chain2 1423 * @param cutoff 1424 * @param hetAtoms 1425 * if true HET atoms are included, if false they are not 1426 * @return 1427 */ 1428 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1429 double cutoff, boolean hetAtoms) { 1430 return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms); 1431 } 1432 1433 /** 1434 * Finds Groups in {@code structure} that contain at least one Atom that is 1435 * within {@code radius} Angstroms of {@code centroid}. 1436 * 1437 * @param structure 1438 * The structure from which to find Groups 1439 * @param centroid 1440 * The centroid of the shell 1441 * @param excludeResidues 1442 * A list of ResidueNumbers to exclude 1443 * @param radius 1444 * The radius from {@code centroid}, in Angstroms 1445 * @param includeWater 1446 * Whether to include Groups whose <em>only</em> atoms are water 1447 * @param useAverageDistance 1448 * When set to true, distances are the arithmetic mean (1-norm) 1449 * of the distances of atoms that belong to the group and that 1450 * are within the shell; otherwise, distances are the minimum of 1451 * these values 1452 * @return A map of Groups within (or partially within) the shell, to their 1453 * distances in Angstroms 1454 */ 1455 public static Map<Group, Double> getGroupDistancesWithinShell( 1456 Structure structure, Atom centroid, 1457 Set<ResidueNumber> excludeResidues, double radius, 1458 boolean includeWater, boolean useAverageDistance) { 1459 1460 // for speed, we avoid calculating square roots 1461 radius = radius * radius; 1462 1463 Map<Group, Double> distances = new HashMap<>(); 1464 1465 // we only need this if we're averaging distances 1466 // note that we can't use group.getAtoms().size() because some the 1467 // group's atoms be outside the shell 1468 Map<Group, Integer> atomCounts = new HashMap<>(); 1469 1470 for (Chain chain : structure.getChains()) { 1471 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1472 1473 // exclude water 1474 if (!includeWater && chainGroup.isWater()) 1475 continue; 1476 1477 // check blacklist of residue numbers 1478 for (ResidueNumber rn : excludeResidues) { 1479 if (rn.equals(chainGroup.getResidueNumber())) 1480 continue groupLoop; 1481 } 1482 1483 for (Atom testAtom : chainGroup.getAtoms()) { 1484 1485 // use getDistanceFast as we are doing a lot of comparisons 1486 double dist = Calc.getDistanceFast(centroid, testAtom); 1487 1488 // if we're the shell 1489 if (dist <= radius) { 1490 if (!distances.containsKey(chainGroup)) 1491 distances.put(chainGroup, Double.POSITIVE_INFINITY); 1492 if (useAverageDistance) { 1493 // sum the distance; we'll divide by the total 1494 // number later 1495 // here, we CANNOT use fastDistance (distance 1496 // squared) because we want the arithmetic mean 1497 distances.put(chainGroup, distances.get(chainGroup) 1498 + Math.sqrt(dist)); 1499 if (!atomCounts.containsKey(chainGroup)) 1500 atomCounts.put(chainGroup, 0); 1501 atomCounts.put(chainGroup, 1502 atomCounts.get(chainGroup) + 1); 1503 } else { 1504 // take the minimum distance among all atoms of 1505 // chainGroup 1506 // note that we can't break here because we might 1507 // find a smaller distance 1508 if (dist < distances.get(chainGroup)) { 1509 distances.put(chainGroup, dist); 1510 } 1511 } 1512 } 1513 1514 } 1515 } 1516 } 1517 1518 if (useAverageDistance) { 1519 for (Map.Entry<Group, Double> entry : distances.entrySet()) { 1520 int count = atomCounts.get(entry.getKey()); 1521 distances.put(entry.getKey(), entry.getValue() / count); 1522 } 1523 } else { 1524 // in this case we used getDistanceFast 1525 distances.replaceAll((k, v) -> Math.sqrt(v)); 1526 } 1527 1528 return distances; 1529 1530 } 1531 1532 public static Set<Group> getGroupsWithinShell(Structure structure, 1533 Atom atom, Set<ResidueNumber> excludeResidues, double distance, 1534 boolean includeWater) { 1535 1536 // square the distance to use as a comparison against getDistanceFast 1537 // which returns the square of a distance. 1538 distance = distance * distance; 1539 1540 Set<Group> returnSet = new LinkedHashSet<>(); 1541 for (Chain chain : structure.getChains()) { 1542 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1543 if (!includeWater && chainGroup.isWater()) 1544 continue; 1545 for (ResidueNumber rn : excludeResidues) { 1546 if (rn.equals(chainGroup.getResidueNumber())) 1547 continue groupLoop; 1548 } 1549 for (Atom atomB : chainGroup.getAtoms()) { 1550 1551 // use getDistanceFast as we are doing a lot of comparisons 1552 double dist = Calc.getDistanceFast(atom, atomB); 1553 if (dist <= distance) { 1554 returnSet.add(chainGroup); 1555 break; 1556 } 1557 1558 } 1559 } 1560 } 1561 return returnSet; 1562 } 1563 1564 /** 1565 * <p> 1566 * Returns a Set of Groups in a structure within the distance specified of a 1567 * given group. 1568 * </p> 1569 * <p> 1570 * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of 1571 * Groups returned 1572 * 1573 * @param structure 1574 * The structure to work with 1575 * @param group 1576 * The 'query' group 1577 * @param distance 1578 * The cutoff distance 1579 * @param includeWater 1580 * Should water residues be included in the output? 1581 * @return {@link LinkedHashSet} of {@link Group}s within at least one atom 1582 * with {@code distance} of at least one atom in {@code group} 1583 */ 1584 public static Set<Group> getGroupsWithinShell(Structure structure, 1585 Group group, double distance, boolean includeWater) { 1586 1587 Set<Group> returnList = new LinkedHashSet<>(); 1588 1589 Set<ResidueNumber> excludeGroups = new HashSet<>(); 1590 excludeGroups.add(group.getResidueNumber()); 1591 for (Atom atom : group.getAtoms()) { 1592 Set<Group> set = getGroupsWithinShell(structure, atom, 1593 excludeGroups, distance, includeWater); 1594 returnList.addAll(set); 1595 } 1596 1597 return returnList; 1598 } 1599 1600 /** 1601 * Remove all models from a Structure and keep only the first 1602 * 1603 * @param s 1604 * original Structure 1605 * @return a structure that contains only the first model 1606 * @since 3.0.5 1607 */ 1608 public static Structure removeModels(Structure s) { 1609 if (s.nrModels() == 1) 1610 return s; 1611 1612 Structure n = new StructureImpl(); 1613 // go through whole substructure and clone ... 1614 1615 // copy structure data 1616 1617 n.setPdbId(s.getPdbId()); 1618 n.setName(s.getName()); 1619 1620 // TODO: do deep copying of data! 1621 n.setPDBHeader(s.getPDBHeader()); 1622 n.setDBRefs(s.getDBRefs()); 1623 1624 n.setSites(s.getSites()); 1625 1626 n.setChains(s.getModel(0)); 1627 1628 return n; 1629 1630 } 1631 1632 /** 1633 * Removes all polymeric and solvent groups from a list of groups 1634 * 1635 */ 1636 public static List<Group> filterLigands(List<Group> allGroups) { 1637 1638 List<Group> groups = new ArrayList<>(); 1639 for (Group g : allGroups) { 1640 1641 if ( g.isPolymeric()) 1642 continue; 1643 1644 if (!g.isWater()) { 1645 groups.add(g); 1646 } 1647 } 1648 1649 return groups; 1650 } 1651 1652 /** 1653 * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)} 1654 * which creates new parsers when needed 1655 * 1656 * @param name 1657 * @return 1658 * @throws IOException 1659 * @throws StructureException 1660 */ 1661 public static Structure getStructure(String name) throws IOException, 1662 StructureException { 1663 return StructureTools.getStructure(name, null, null); 1664 } 1665 1666 /** 1667 * Flexibly get a structure from an input String. The intent of this method 1668 * is to allow any reasonable string which could refer to a structure to be 1669 * correctly parsed. The following are currently supported: 1670 * <ol> 1671 * <li>Filename (if name refers to an existing file) 1672 * <li>PDB ID 1673 * <li>SCOP domains 1674 * <li>PDP domains 1675 * <li>Residue ranges 1676 * <li>Other formats supported by AtomCache 1677 * </ol> 1678 * 1679 * @param name 1680 * Some reference to the protein structure 1681 * @param parser 1682 * A clean PDBFileParser to use if it is a file. If null, a 1683 * PDBFileParser will be instantiated if needed. 1684 * @param cache 1685 * An AtomCache to use if the structure can be fetched from the 1686 * PDB. If null, a AtomCache will be instantiated if needed. 1687 * @return A Structure object 1688 * @throws IOException 1689 * if name is an existing file, but doesn't parse correctly 1690 * @throws StructureException 1691 * if the format is unknown, or if AtomCache throws an 1692 * exception. 1693 */ 1694 public static Structure getStructure(String name, PDBFileParser parser, 1695 AtomCache cache) throws IOException, StructureException { 1696 File f = new File(FileDownloadUtils.expandUserHome(name)); 1697 if (f.exists()) { 1698 if (parser == null) { 1699 parser = new PDBFileParser(); 1700 } 1701 InputStream inStream = new FileInputStream(f); 1702 return parser.parsePDBFile(inStream); 1703 } else { 1704 if (cache == null) { 1705 cache = new AtomCache(); 1706 } 1707 return cache.getStructure(name); 1708 } 1709 } 1710 1711 /** 1712 * Cleans up the structure's alternate location (altloc) groups. All alternate location groups should have all atoms (except 1713 * in the case of microheterogenity) or when a deuterium exists. 1714 * Ensure that all the alt loc groups have all the atoms in the main group. 1715 * @param structure The Structure to be cleaned up 1716 */ 1717 public static void cleanUpAltLocs(Structure structure) { 1718 for (int i =0; i< structure.nrModels() ; i++){ 1719 for (Chain chain : structure.getModel(i)) { 1720 for (Group group : chain.getAtomGroups()) { 1721 for (Group altLocGroup : group.getAltLocs()) { 1722 for ( Atom groupAtom : group.getAtoms()) { 1723 // If this alt loc doesn't have this atom 1724 if (! altLocGroup.hasAtom(groupAtom.getName())) { 1725 // Fix for microheterogenity 1726 if (altLocGroup.getPDBName().equals(group.getPDBName())) { 1727 // If it's a Hydrogen then we check for it's Deuterated brother 1728 if(!hasDeuteratedEquiv(groupAtom, altLocGroup)){ 1729 altLocGroup.addAtom(groupAtom); 1730 } 1731 } 1732 } 1733 } 1734 } 1735 } 1736 } 1737 } 1738 } 1739 1740 /** 1741 * Check to see if an Deuterated atom has a non deuterated brother in the group. 1742 * @param atom the input atom that is putatively deuterium 1743 * @param currentGroup the group the atom is in 1744 * @return true if the atom is deuterated and it's hydrogen equive exists. 1745 */ 1746 public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) { 1747 // If it's deuterated and has a non-deuterated brother 1748 return atom.getElement() == Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'D', 'H')); 1749 } 1750 1751 /** 1752 * Check to see if a Hydrogen has a Deuterated brother in the group. 1753 * @param atom the input atom that is putatively hydorgen 1754 * @param currentGroup the group the atom is in 1755 * @return true if the atom is hydrogen and it's Deuterium equiv exists. 1756 */ 1757 public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) { 1758 // If it's hydrogen and has a deuterated brother 1759 return atom.getElement() == Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'H', 'D')); 1760 } 1761 1762 private static String replaceFirstChar(String name, char c, char d) { 1763 if(name.charAt(0)==c){ 1764 return name.replaceFirst(String.valueOf(c), String.valueOf(d)); 1765 } 1766 return name; 1767 } 1768 1769 /** 1770 * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure. 1771 * @param structure the structure 1772 * @since 5.4.0 1773 */ 1774 public static void reduceToRepresentativeAtoms(Structure structure) { 1775 for (int modelIdx = 0; modelIdx<structure.nrModels(); modelIdx++) { 1776 for (Chain c : structure.getPolyChains(modelIdx)) { 1777 for (Group g : c.getAtomGroups()) { 1778 List<Atom> atoms = g.getAtoms(); 1779 if (g.isAminoAcid()) { 1780 atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME)); 1781 } else if (g.isNucleotide()) { 1782 atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE)); 1783 } 1784 // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures 1785 } 1786 } 1787 } 1788 } 1789}