001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jan 4, 2006 021 * 022 */ 023package org.biojava.nbio.structure; 024 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.ArrayList; 030import java.util.Collection; 031import java.util.Collections; 032import java.util.HashMap; 033import java.util.HashSet; 034import java.util.Iterator; 035import java.util.LinkedHashSet; 036import java.util.List; 037import java.util.Map; 038import java.util.Set; 039 040import javax.vecmath.Point3d; 041 042import org.biojava.nbio.structure.align.util.AtomCache; 043import org.biojava.nbio.structure.contact.AtomContactSet; 044import org.biojava.nbio.structure.contact.Grid; 045import org.biojava.nbio.structure.io.FileParsingParameters; 046import org.biojava.nbio.structure.io.PDBFileParser; 047import org.biojava.nbio.core.util.FileDownloadUtils; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051/** 052 * A class that provides some tool methods. 053 * 054 * @author Andreas Prlic 055 * @author Jules Jacobsen 056 * @since 1.0 057 */ 058public class StructureTools { 059 060 private static final Logger logger = LoggerFactory 061 .getLogger(StructureTools.class); 062 063 // Amino Acid backbone 064 /** 065 * The atom name of the backbone C-alpha atom. Note that this can be 066 * ambiguous depending on the context since Calcium atoms use the same name 067 * in PDB. 068 */ 069 public static final String CA_ATOM_NAME = "CA"; 070 071 /** 072 * The atom name for the backbone amide nitrogen 073 */ 074 public static final String N_ATOM_NAME = "N"; 075 076 /** 077 * The atom name for the backbone carbonyl 078 */ 079 public static final String C_ATOM_NAME = "C"; 080 081 /** 082 * The atom name for the backbone carbonyl oxygen 083 */ 084 public static final String O_ATOM_NAME = "O"; 085 086 /** 087 * The atom name of the side-chain C-beta atom 088 */ 089 public static final String CB_ATOM_NAME = "CB"; 090 091 // Nucleotide backbone 092 /** 093 * The atom name of the backbone C1' in RNA 094 */ 095 public static final String C1_ATOM_NAME = "C1'"; 096 /** 097 * The atom name of the backbone C2' in RNA 098 */ 099 public static final String C2_ATOM_NAME = "C2'"; 100 /** 101 * The atom name of the backbone C3' in RNA 102 */ 103 public static final String C3_ATOM_NAME = "C3'"; 104 /** 105 * The atom name of the backbone C4' in RNA 106 */ 107 public static final String C4_ATOM_NAME = "C4'"; 108 /** 109 * The atom name of the backbone O2' in RNA 110 */ 111 public static final String O2_ATOM_NAME = "O2'"; 112 /** 113 * The atom name of the backbone O3' in RNA 114 */ 115 public static final String O3_ATOM_NAME = "O3'"; 116 /** 117 * The atom name of the backbone O4' in RNA 118 */ 119 public static final String O4_ATOM_NAME = "O4'"; 120 /** 121 * The atom name of the backbone O4' in RNA 122 */ 123 public static final String O5_ATOM_NAME = "O5'"; 124 /** 125 * The atom name of the backbone O4' in RNA 126 */ 127 public static final String OP1_ATOM_NAME = "OP1"; 128 /** 129 * The atom name of the backbone O4' in RNA 130 */ 131 public static final String OP2_ATOM_NAME = "OP2"; 132 /** 133 * The atom name of the backbone phosphate in RNA 134 */ 135 public static final String P_ATOM_NAME = "P"; 136 137 /** 138 * The atom used as representative for nucleotides, equivalent to 139 * {@link #CA_ATOM_NAME} for proteins 140 */ 141 public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME; 142 143 /** 144 * The character to use for unknown compounds in sequence strings 145 */ 146 public static final char UNKNOWN_GROUP_LABEL = 'X'; 147 148 /** 149 * Below this ratio of aminoacid/nucleotide residues to the sequence total, 150 * we use simple majority of aminoacid/nucleotide residues to decide the 151 * character of the chain (protein/nucleotide) 152 */ 153 public static final double RATIO_RESIDUES_TO_TOTAL = 0.95; 154 155 /** 156 * Threshold for plausible binding of a ligand to the selected substructure 157 */ 158 public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5; 159 160 // there is a file format change in PDB 3.0 and nucleotides are being 161 // renamed 162 private static final Map<String, Character> nucleotides30; 163 private static final Map<String, Character> nucleotides23; 164 165 // amino acid 3 and 1 letter code definitions 166 private static final Map<String, Character> aminoAcids; 167 168 private static final Set<Element> hBondDonorAcceptors; 169 170 static { 171 nucleotides30 = new HashMap<String, Character>(); 172 nucleotides30.put("DA", 'A'); 173 nucleotides30.put("DC", 'C'); 174 nucleotides30.put("DG", 'G'); 175 nucleotides30.put("DT", 'T'); 176 nucleotides30.put("DI", 'I'); 177 nucleotides30.put("A", 'A'); 178 nucleotides30.put("G", 'G'); 179 nucleotides30.put("C", 'C'); 180 nucleotides30.put("U", 'U'); 181 nucleotides30.put("I", 'I'); 182 183 // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced 184 // with these: 185 nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 186 nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl 187 nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine 188 nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3' 189 // terminator) 190 nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant 191 nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator 192 nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic 193 // sugar 194 nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine 195 nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine 196 nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide 197 // nucleic acid, with 198 // added methyl 199 nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide 200 // nucleic acid, with 201 // added methyl 202 nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide 203 // nucleic acid 204 nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide 205 // nucleic acid 206 nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide 207 // nucleic acid 208 nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide 209 // nucleic acid 210 nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant 211 212 // store nucleic acids (C, G, A, T, U, and I), and 213 // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), 214 // and 215 nucleotides23 = new HashMap<String, Character>(); 216 String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A", 217 "+T", "+U", "+I" }; 218 for (String n : names) { 219 nucleotides23.put(n, n.charAt(n.length() - 1)); 220 } 221 222 aminoAcids = new HashMap<String, Character>(); 223 aminoAcids.put("GLY", 'G'); 224 aminoAcids.put("ALA", 'A'); 225 aminoAcids.put("VAL", 'V'); 226 aminoAcids.put("LEU", 'L'); 227 aminoAcids.put("ILE", 'I'); 228 aminoAcids.put("PHE", 'F'); 229 aminoAcids.put("TYR", 'Y'); 230 aminoAcids.put("TRP", 'W'); 231 aminoAcids.put("PRO", 'P'); 232 aminoAcids.put("HIS", 'H'); 233 aminoAcids.put("LYS", 'K'); 234 aminoAcids.put("ARG", 'R'); 235 aminoAcids.put("SER", 'S'); 236 aminoAcids.put("THR", 'T'); 237 aminoAcids.put("GLU", 'E'); 238 aminoAcids.put("GLN", 'Q'); 239 aminoAcids.put("ASP", 'D'); 240 aminoAcids.put("ASN", 'N'); 241 aminoAcids.put("CYS", 'C'); 242 aminoAcids.put("MET", 'M'); 243 // MSE is only found as a molecular replacement for MET 244 aminoAcids.put("MSE", 'M'); 245 // 'non-standard', genetically encoded 246 // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html 247 // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE' 248 // likewise 'PYL' (IUBMB) and 'PYH' (PDB) 249 aminoAcids.put("CSE", 'U'); 250 aminoAcids.put("SEC", 'U'); 251 aminoAcids.put("PYH", 'O'); 252 aminoAcids.put("PYL", 'O'); 253 254 hBondDonorAcceptors = new HashSet<Element>(); 255 hBondDonorAcceptors.add(Element.N); 256 hBondDonorAcceptors.add(Element.O); 257 hBondDonorAcceptors.add(Element.S); 258 259 } 260 261 /** 262 * Count how many Atoms are contained within a Structure object. 263 * 264 * @param s 265 * the structure object 266 * @return the number of Atoms in this Structure 267 */ 268 public static final int getNrAtoms(Structure s) { 269 270 int nrAtoms = 0; 271 272 Iterator<Group> iter = new GroupIterator(s); 273 274 while (iter.hasNext()) { 275 Group g = iter.next(); 276 nrAtoms += g.size(); 277 } 278 279 return nrAtoms; 280 } 281 282 /** 283 * Count how many groups are contained within a structure object. 284 * 285 * @param s 286 * the structure object 287 * @return the number of groups in the structure 288 */ 289 public static final int getNrGroups(Structure s) { 290 int nrGroups = 0; 291 292 List<Chain> chains = s.getChains(0); 293 for (Chain c : chains) { 294 nrGroups += c.getAtomLength(); 295 } 296 return nrGroups; 297 } 298 299 /** 300 * Returns an array of the requested Atoms from the Structure object. 301 * Iterates over all groups and checks if the requested atoms are in this 302 * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} 303 * group. If the group does not contain all requested atoms then no atoms 304 * are added for that group. For structures with more than one model, only 305 * model 0 will be used. 306 * 307 * @param s 308 * the structure to get the atoms from 309 * 310 * @param atomNames 311 * contains the atom names to be used. 312 * @return an Atom[] array 313 */ 314 public static final Atom[] getAtomArray(Structure s, String[] atomNames) { 315 List<Chain> chains = s.getModel(0); 316 317 List<Atom> atoms = new ArrayList<Atom>(); 318 319 extractAtoms(atomNames, chains, atoms); 320 return atoms.toArray(new Atom[atoms.size()]); 321 322 } 323 324 /** 325 * Returns an array of the requested Atoms from the Structure object. In 326 * contrast to {@link #getAtomArray(Structure, String[])} this method 327 * iterates over all chains. Iterates over all chains and groups and checks 328 * if the requested atoms are in this group, no matter if this is a 329 * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not 330 * contain all requested atoms then no atoms are added for that group. For 331 * structures with more than one model, only model 0 will be used. 332 * 333 * @param s 334 * the structure to get the atoms from 335 * 336 * @param atomNames 337 * contains the atom names to be used. 338 * @return an Atom[] array 339 */ 340 public static final Atom[] getAtomArrayAllModels(Structure s, 341 String[] atomNames) { 342 343 List<Atom> atoms = new ArrayList<Atom>(); 344 345 for (int i = 0; i < s.nrModels(); i++) { 346 List<Chain> chains = s.getModel(i); 347 extractAtoms(atomNames, chains, atoms); 348 } 349 return atoms.toArray(new Atom[atoms.size()]); 350 351 } 352 353 /** 354 * Convert all atoms of the structure (all models) into an Atom array 355 * 356 * @param s 357 * input structure 358 * @return all atom array 359 */ 360 public static final Atom[] getAllAtomArray(Structure s) { 361 List<Atom> atoms = new ArrayList<Atom>(); 362 363 AtomIterator iter = new AtomIterator(s); 364 while (iter.hasNext()) { 365 Atom a = iter.next(); 366 atoms.add(a); 367 } 368 return atoms.toArray(new Atom[atoms.size()]); 369 } 370 /** 371 * Convert all atoms of the structure (specified model) into an Atom array 372 * 373 * @param s 374 * input structure 375 * @return all atom array 376 */ 377 public static final Atom[] getAllAtomArray(Structure s, int model) { 378 List<Atom> atoms = new ArrayList<Atom>(); 379 380 AtomIterator iter = new AtomIterator(s,model); 381 while (iter.hasNext()) { 382 Atom a = iter.next(); 383 atoms.add(a); 384 } 385 return atoms.toArray(new Atom[atoms.size()]); 386 387 } 388 389 /** 390 * Returns and array of all atoms of the chain, including 391 * Hydrogens (if present) and all HETATOMs. Waters are not included. 392 * 393 * @param c 394 * input chain 395 * @return all atom array 396 */ 397 public static final Atom[] getAllAtomArray(Chain c) { 398 List<Atom> atoms = new ArrayList<Atom>(); 399 400 for (Group g : c.getAtomGroups()) { 401 if (g.isWater()) 402 continue; 403 for (Atom a : g.getAtoms()) { 404 atoms.add(a); 405 } 406 } 407 return atoms.toArray(new Atom[atoms.size()]); 408 } 409 410 /** 411 * List of groups from the structure not included in ca (e.g. ligands). 412 * 413 * Unaligned groups are searched from all chains referenced in ca, as well 414 * as any chains in the first model of the structure from ca[0], if any. 415 * 416 * @param ca an array of atoms 417 * @return 418 */ 419 public static List<Group> getUnalignedGroups(Atom[] ca) { 420 Set<Chain> chains = new HashSet<Chain>(); 421 Set<Group> caGroups = new HashSet<Group>(); 422 423 // Create list of all chains in this structure 424 Structure s = null; 425 if (ca.length > 0) { 426 Group g = ca[0].getGroup(); 427 if (g != null) { 428 Chain c = g.getChain(); 429 if (c != null) { 430 s = c.getStructure(); 431 } 432 } 433 } 434 if (s != null) { 435 // Add all chains from the structure 436 for (Chain c : s.getChains(0)) { 437 chains.add(c); 438 } 439 } 440 441 // Add groups and chains from ca 442 for (Atom a : ca) { 443 Group g = a.getGroup(); 444 if (g != null) { 445 caGroups.add(g); 446 447 Chain c = g.getChain(); 448 if (c != null) { 449 chains.add(c); 450 } 451 } 452 } 453 454 // Iterate through all chains, finding groups not in ca 455 List<Group> unadded = new ArrayList<Group>(); 456 for (Chain c : chains) { 457 for (Group g : c.getAtomGroups()) { 458 if (!caGroups.contains(g)) { 459 unadded.add(g); 460 } 461 } 462 } 463 return unadded; 464 } 465 466 /** 467 * Finds all ligand groups from the target which fall within the cutoff distance 468 * of some atom from the query set. 469 * 470 * @param target Set of groups including the ligands 471 * @param query Atom selection 472 * @param cutoff Distance from query atoms to consider, in angstroms 473 * @return All groups from the target with at least one atom within cutoff of a query atom 474 * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF 475 */ 476 public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) { 477 // Geometric hashing of the reduced structure 478 Grid grid = new Grid(cutoff); 479 grid.addAtoms(query); 480 481 List<Group> ligands = new ArrayList<>(); 482 for(Group g :target ) { 483 // don't worry about waters 484 if(g.isWater()) { 485 continue; 486 } 487 488 if(g.isPolymeric() ) { 489 // Polymers aren't ligands 490 continue; 491 } 492 493 // It is a ligand! 494 495 // Check that it's within cutoff of something in reduced 496 List<Atom> groupAtoms = g.getAtoms(); 497 if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) { 498 continue; 499 } 500 501 ligands.add(g); 502 } 503 return ligands; 504 } 505 506 /** 507 * Adds a particular group to a structure. A new chain will be created if necessary. 508 * 509 * <p>When adding multiple groups, pass the return value of one call as the 510 * chainGuess parameter of the next call for efficiency. 511 * <pre> 512 * Chain guess = null; 513 * for(Group g : groups) { 514 * guess = addGroupToStructure(s, g, guess ); 515 * } 516 * </pre> 517 * @param s structure to receive the group 518 * @param g group to add 519 * @param chainGuess (optional) If not null, should be a chain from s. Used 520 * to improve performance when adding many groups from the same chain 521 * @param clone Indicates whether the input group should be cloned before 522 * being added to the new chain 523 * @return the chain g was added to 524 */ 525 public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) { 526 synchronized(s) { 527 // Find or create the chain 528 String chainId = g.getChainId(); 529 assert !chainId.isEmpty(); 530 Chain chain; 531 if(chainGuess != null && chainGuess.getId() == chainId) { 532 // previously guessed chain 533 chain = chainGuess; 534 } else { 535 // Try to guess 536 chain = s.getChain(chainId, model); 537 if(chain == null) { 538 // no chain found 539 chain = new ChainImpl(); 540 chain.setId(chainId); 541 542 Chain oldChain = g.getChain(); 543 chain.setName(oldChain.getName()); 544 545 EntityInfo oldEntityInfo = oldChain.getEntityInfo(); 546 547 EntityInfo newEntityInfo; 548 if(oldEntityInfo == null) { 549 newEntityInfo = new EntityInfo(); 550 s.addEntityInfo(newEntityInfo); 551 } else { 552 newEntityInfo = s.getEntityById(oldEntityInfo.getMolId()); 553 if( newEntityInfo == null ) { 554 newEntityInfo = new EntityInfo(oldEntityInfo); 555 s.addEntityInfo(newEntityInfo); 556 } 557 } 558 newEntityInfo.addChain(chain); 559 chain.setEntityInfo(newEntityInfo); 560 561 // TODO Do the seqres need to be cloned too? -SB 2016-10-7 562 chain.setSeqResGroups(oldChain.getSeqResGroups()); 563 chain.setSeqMisMatches(oldChain.getSeqMisMatches()); 564 565 s.addChain(chain,model); 566 } 567 } 568 569 // Add cloned group 570 if(clone) { 571 g = (Group)g.clone(); 572 } 573 chain.addGroup(g); 574 575 return chain; 576 } 577 } 578 579 /** 580 * Add a list of groups to a new structure. Chains will be automatically 581 * created in the new structure as needed. 582 * @param s structure to receive the group 583 * @param g group to add 584 * @param clone Indicates whether the input groups should be cloned before 585 * being added to the new chain 586 */ 587 public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) { 588 Chain chainGuess = null; 589 for(Group g : groups) { 590 chainGuess = addGroupToStructure(s, g, model, chainGuess, clone); 591 } 592 } 593 594 /** 595 * Expand a set of atoms into all groups from the same structure. 596 * 597 * If the structure is set, only the first atom is used (assuming all 598 * atoms come from the same original structure). 599 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 600 * searches all chains of all atoms for groups. 601 * @param atoms Sample of atoms 602 * @return All groups from all chains accessible from the input atoms 603 */ 604 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) { 605 return getAllGroupsFromSubset(atoms,null); 606 } 607 /** 608 * Expand a set of atoms into all groups from the same structure. 609 * 610 * If the structure is set, only the first atom is used (assuming all 611 * atoms come from the same original structure). 612 * If the atoms aren't linked to a structure (for instance, for cloned atoms), 613 * searches all chains of all atoms for groups. 614 * @param atoms Sample of atoms 615 * @param types Type of groups to return (useful for getting only ligands, for instance). 616 * Null gets all groups. 617 * @return All groups from all chains accessible from the input atoms 618 */ 619 public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) { 620 // Get the full structure 621 Structure s = null; 622 if (atoms.length > 0) { 623 Group g = atoms[0].getGroup(); 624 if (g != null) { 625 Chain c = g.getChain(); 626 if (c != null) { 627 s = c.getStructure(); 628 } 629 } 630 } 631 // Collect all groups from the structure 632 Set<Chain> allChains = new HashSet<>(); 633 if( s != null ) { 634 allChains.addAll(s.getChains()); 635 } 636 // In case the structure wasn't set, need to use ca chains too 637 for(Atom a : atoms) { 638 Group g = a.getGroup(); 639 if(g != null) { 640 Chain c = g.getChain(); 641 if( c != null ) { 642 allChains.add(c); 643 } 644 } 645 } 646 647 if(allChains.isEmpty() ) { 648 return Collections.emptySet(); 649 } 650 651 // Extract all ligand groups 652 Set<Group> full = new HashSet<>(); 653 for(Chain c : allChains) { 654 if(types == null) { 655 full.addAll(c.getAtomGroups()); 656 } else { 657 full.addAll(c.getAtomGroups(types)); 658 } 659 } 660 661 return full; 662 } 663 664 665 /** 666 * Returns and array of all non-Hydrogen atoms in the given Structure, 667 * optionally including HET atoms or not. Waters are not included. 668 * 669 * @param s 670 * @param hetAtoms 671 * if true HET atoms are included in array, if false they are not 672 * @return 673 */ 674 public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) { 675 AtomIterator iter = new AtomIterator(s); 676 return getAllNonHAtomArray(s, hetAtoms, iter); 677 } 678 /** 679 * Returns and array of all non-Hydrogen atoms in the given Structure, 680 * optionally including HET atoms or not. Waters are not included. 681 * 682 * @param s 683 * @param hetAtoms 684 * if true HET atoms are included in array, if false they are not 685 * @param modelNr Model number to draw atoms from 686 * @return 687 */ 688 public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) { 689 AtomIterator iter = new AtomIterator(s,modelNr); 690 return getAllNonHAtomArray(s, hetAtoms, iter); 691 } 692 private static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) { 693 List<Atom> atoms = new ArrayList<Atom>(); 694 695 while (iter.hasNext()) { 696 Atom a = iter.next(); 697 if (a.getElement() == Element.H) 698 continue; 699 700 Group g = a.getGroup(); 701 702 if (g.isWater()) 703 continue; 704 705 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 706 continue; 707 708 atoms.add(a); 709 } 710 return atoms.toArray(new Atom[atoms.size()]); 711 } 712 713 /** 714 * Returns and array of all non-Hydrogen atoms in the given Chain, 715 * optionally including HET atoms or not Waters are not included. 716 * 717 * @param c 718 * @param hetAtoms 719 * if true HET atoms are included in array, if false they are not 720 * @return 721 */ 722 public static final Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) { 723 List<Atom> atoms = new ArrayList<Atom>(); 724 725 for (Group g : c.getAtomGroups()) { 726 if (g.isWater()) 727 continue; 728 for (Atom a : g.getAtoms()) { 729 730 if (a.getElement() == Element.H) 731 continue; 732 733 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 734 continue; 735 736 atoms.add(a); 737 } 738 } 739 return atoms.toArray(new Atom[atoms.size()]); 740 } 741 742 /** 743 * Returns and array of all non-Hydrogen atoms coordinates in the given Chain, 744 * optionally including HET atoms or not Waters are not included. 745 * 746 * @param c 747 * @param hetAtoms 748 * if true HET atoms are included in array, if false they are not 749 * @return 750 */ 751 public static final Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) { 752 List<Point3d> atoms = new ArrayList<Point3d>(); 753 754 for (Group g : c.getAtomGroups()) { 755 if (g.isWater()) 756 continue; 757 for (Atom a : g.getAtoms()) { 758 759 if (a.getElement() == Element.H) 760 continue; 761 762 if (!hetAtoms && g.getType().equals(GroupType.HETATM)) 763 continue; 764 765 atoms.add(a.getCoordsAsPoint3d()); 766 } 767 } 768 return atoms.toArray(new Point3d[atoms.size()]); 769 } 770 771 /** 772 * Adds to the given atoms list, all atoms of groups that contained all 773 * requested atomNames, i.e. if a group does not contain all of the 774 * requested atom names, its atoms won't be added. 775 * 776 * @param atomNames 777 * @param chains 778 * @param atoms 779 */ 780 private static void extractAtoms(String[] atomNames, List<Chain> chains, 781 List<Atom> atoms) { 782 783 for (Chain c : chains) { 784 785 for (Group g : c.getAtomGroups()) { 786 787 // a temp container for the atoms of this group 788 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 789 // flag to check if this group contains all the requested atoms. 790 boolean thisGroupAllAtoms = true; 791 for (String atomName : atomNames) { 792 Atom a = g.getAtom(atomName); 793 794 if (a == null) { 795 // this group does not have a required atom, skip it... 796 thisGroupAllAtoms = false; 797 break; 798 } 799 thisGroupAtoms.add(a); 800 } 801 if (thisGroupAllAtoms) { 802 // add the atoms of this group to the array. 803 for (Atom a : thisGroupAtoms) { 804 atoms.add(a); 805 } 806 } 807 808 } 809 } 810 } 811 812 /** 813 * Returns an array of the requested Atoms from the Chain object. Iterates 814 * over all groups and checks if the requested atoms are in this group, no 815 * matter if this is a AminoAcid or Hetatom group. If the group does not 816 * contain all requested atoms then no atoms are added for that group. 817 * 818 * @param c 819 * the Chain to get the atoms from 820 * 821 * @param atomNames 822 * contains the atom names to be used. 823 * @return an Atom[] array 824 */ 825 public static final Atom[] getAtomArray(Chain c, String[] atomNames) { 826 827 List<Atom> atoms = new ArrayList<Atom>(); 828 829 for (Group g : c.getAtomGroups()) { 830 831 // a temp container for the atoms of this group 832 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 833 // flag to check if this group contains all the requested atoms. 834 boolean thisGroupAllAtoms = true; 835 for (String atomName : atomNames) { 836 Atom a = g.getAtom(atomName); 837 if (a == null) { 838 logger.debug("Group " + g.getResidueNumber() + " (" 839 + g.getPDBName() 840 + ") does not have the required atom '" + atomName 841 + "'"); 842 // this group does not have a required atom, skip it... 843 thisGroupAllAtoms = false; 844 break; 845 } 846 thisGroupAtoms.add(a); 847 } 848 849 if (thisGroupAllAtoms) { 850 // add the atoms of this group to the array. 851 for (Atom a : thisGroupAtoms) { 852 atoms.add(a); 853 } 854 } 855 856 } 857 return atoms.toArray(new Atom[atoms.size()]); 858 859 } 860 861 /** 862 * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and 863 * has CA name will be returned. 864 * 865 * @param c 866 * the structure object 867 * @return an Atom[] array 868 * @see #getRepresentativeAtomArray(Chain) 869 */ 870 public static final Atom[] getAtomCAArray(Chain c) { 871 List<Atom> atoms = new ArrayList<Atom>(); 872 873 for (Group g : c.getAtomGroups()) { 874 if (g.hasAtom(CA_ATOM_NAME) 875 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 876 atoms.add(g.getAtom(CA_ATOM_NAME)); 877 } 878 } 879 880 return atoms.toArray(new Atom[atoms.size()]); 881 } 882 883 /** 884 * Gets a representative atom for each group that is part of the chain 885 * backbone. Note that modified aminoacids won't be returned as part of the 886 * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the 887 * structure. 888 * 889 * For amino acids, the representative is a CA carbon. For nucleotides, the 890 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 891 * types will be ignored. 892 * 893 * @param c 894 * @return representative Atoms of the chain backbone 895 * @since Biojava 4.1.0 896 */ 897 public static final Atom[] getRepresentativeAtomArray(Chain c) { 898 List<Atom> atoms = new ArrayList<Atom>(); 899 900 for (Group g : c.getAtomGroups()) { 901 902 switch (g.getType()) { 903 case AMINOACID: 904 if (g.hasAtom(CA_ATOM_NAME) 905 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 906 atoms.add(g.getAtom(CA_ATOM_NAME)); 907 } 908 break; 909 case NUCLEOTIDE: 910 if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) { 911 atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE)); 912 } 913 break; 914 default: 915 // don't add 916 } 917 } 918 919 return atoms.toArray(new Atom[atoms.size()]); 920 921 } 922 923 /** 924 * Provides an equivalent copy of Atoms in a new array. Clones everything, 925 * starting with parent groups and chains. The chain will only contain 926 * groups that are part of the input array. 927 * 928 * @param ca 929 * array of representative atoms, e.g. CA atoms 930 * @return Atom array 931 * @since Biojava 4.1.0 932 */ 933 public static final Atom[] cloneAtomArray(Atom[] ca) { 934 Atom[] newCA = new Atom[ca.length]; 935 936 List<Chain> model = new ArrayList<Chain>(); 937 int apos = -1; 938 for (Atom a : ca) { 939 apos++; 940 Group parentG = a.getGroup(); 941 Chain parentC = parentG.getChain(); 942 943 Chain newChain = null; 944 for (Chain c : model) { 945 if (c.getName().equals(parentC.getName())) { 946 newChain = c; 947 break; 948 } 949 } 950 if (newChain == null) { 951 newChain = new ChainImpl(); 952 newChain.setId(parentC.getId()); 953 newChain.setName(parentC.getName()); 954 model.add(newChain); 955 } 956 957 Group parentN = (Group) parentG.clone(); 958 959 newCA[apos] = parentN.getAtom(a.getName()); 960 try { 961 // if the group doesn't exist yet, this produces a StructureException 962 newChain.getGroupByPDB(parentN.getResidueNumber()); 963 } catch (StructureException e) { 964 // the group doesn't exist yet in the newChain, let's add it 965 newChain.addGroup(parentN); 966 } 967 968 } 969 return newCA; 970 } 971 972 /** 973 * Clone a set of representative Atoms, but returns the parent groups 974 * 975 * @param ca 976 * Atom array 977 * @return Group array 978 */ 979 public static Group[] cloneGroups(Atom[] ca) { 980 Group[] newGroup = new Group[ca.length]; 981 982 List<Chain> model = new ArrayList<Chain>(); 983 int apos = -1; 984 for (Atom a : ca) { 985 apos++; 986 Group parentG = a.getGroup(); 987 Chain parentC = parentG.getChain(); 988 989 Chain newChain = null; 990 for (Chain c : model) { 991 if (c.getName().equals(parentC.getName())) { 992 newChain = c; 993 break; 994 } 995 } 996 if (newChain == null) { 997 newChain = new ChainImpl(); 998 newChain.setName(parentC.getName()); 999 model.add(newChain); 1000 } 1001 1002 Group ng = (Group) parentG.clone(); 1003 newGroup[apos] = ng; 1004 newChain.addGroup(ng); 1005 } 1006 return newGroup; 1007 } 1008 1009 /** 1010 * Utility method for working with circular permutations. Creates a 1011 * duplicated and cloned set of Calpha atoms from the input array. 1012 * 1013 * @param ca2 1014 * atom array 1015 * @return cloned and duplicated set of input array 1016 */ 1017 public static Atom[] duplicateCA2(Atom[] ca2) { 1018 // we don't want to rotate input atoms, do we? 1019 Atom[] ca2clone = new Atom[ca2.length * 2]; 1020 1021 int pos = 0; 1022 1023 Chain c = null; 1024 String prevChainId = ""; 1025 for (Atom a : ca2) { 1026 Group g = (Group) a.getGroup().clone(); // works because each group 1027 // has only a single atom 1028 1029 if (c == null) { 1030 c = new ChainImpl(); 1031 Chain orig = a.getGroup().getChain(); 1032 c.setId(orig.getId()); 1033 c.setName(orig.getName()); 1034 } else { 1035 Chain orig = a.getGroup().getChain(); 1036 if (!orig.getId().equals(prevChainId)) { 1037 c = new ChainImpl(); 1038 c.setId(orig.getId()); 1039 c.setName(orig.getName()); 1040 } 1041 } 1042 1043 c.addGroup(g); 1044 ca2clone[pos] = g.getAtom(a.getName()); 1045 1046 pos++; 1047 } 1048 1049 // Duplicate ca2! 1050 c = null; 1051 prevChainId = ""; 1052 for (Atom a : ca2) { 1053 Group g = (Group) a.getGroup().clone(); 1054 1055 if (c == null) { 1056 c = new ChainImpl(); 1057 Chain orig = a.getGroup().getChain(); 1058 c.setId(orig.getId()); 1059 c.setName(orig.getName()); 1060 } else { 1061 Chain orig = a.getGroup().getChain(); 1062 if (!orig.getId().equals(prevChainId)) { 1063 c = new ChainImpl(); 1064 c.setId(orig.getId()); 1065 c.setName(orig.getName()); 1066 } 1067 } 1068 1069 c.addGroup(g); 1070 ca2clone[pos] = g.getAtom(a.getName()); 1071 1072 pos++; 1073 } 1074 1075 return ca2clone; 1076 1077 } 1078 1079 /** 1080 * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and 1081 * has CA name will be returned. 1082 * 1083 * @param s 1084 * the structure object 1085 * @return an Atom[] array 1086 * @see #getRepresentativeAtomArray(Structure) 1087 */ 1088 public static Atom[] getAtomCAArray(Structure s) { 1089 1090 List<Atom> atoms = new ArrayList<Atom>(); 1091 1092 for (Chain c : s.getChains()) { 1093 for (Group g : c.getAtomGroups()) { 1094 if (g.hasAtom(CA_ATOM_NAME) 1095 && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) { 1096 atoms.add(g.getAtom(CA_ATOM_NAME)); 1097 } 1098 } 1099 } 1100 1101 return atoms.toArray(new Atom[atoms.size()]); 1102 } 1103 1104 /** 1105 * Gets a representative atom for each group that is part of the chain 1106 * backbone. Note that modified aminoacids won't be returned as part of the 1107 * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the 1108 * structure. 1109 * 1110 * For amino acids, the representative is a CA carbon. For nucleotides, the 1111 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group 1112 * types will be ignored. 1113 * 1114 * @param s 1115 * Input structure 1116 * @return representative Atoms of the structure backbone 1117 * @since Biojava 4.1.0 1118 */ 1119 public static Atom[] getRepresentativeAtomArray(Structure s) { 1120 1121 List<Atom> atoms = new ArrayList<Atom>(); 1122 1123 for (Chain c : s.getChains()) { 1124 Atom[] chainAtoms = getRepresentativeAtomArray(c); 1125 for (Atom a : chainAtoms) { 1126 atoms.add(a); 1127 } 1128 } 1129 1130 return atoms.toArray(new Atom[atoms.size()]); 1131 } 1132 1133 /** 1134 * Return an Atom array of the main chain atoms: CA, C, N, O Any group that 1135 * contains those atoms will be included, be it a standard aminoacid or not 1136 * 1137 * @param s 1138 * the structure object 1139 * @return an Atom[] array 1140 */ 1141 public static Atom[] getBackboneAtomArray(Structure s) { 1142 1143 List<Atom> atoms = new ArrayList<Atom>(); 1144 1145 for (Chain c : s.getChains()) { 1146 for (Group g : c.getAtomGroups()) { 1147 if (g.hasAminoAtoms()) { 1148 // this means we will only take atoms grom groups that have 1149 // complete backbones 1150 for (Atom a : g.getAtoms()) { 1151 switch (g.getType()) { 1152 case NUCLEOTIDE: 1153 // Nucleotide backbone 1154 if (a.getName().equals(C1_ATOM_NAME)) 1155 atoms.add(a); 1156 if (a.getName().equals(C2_ATOM_NAME)) 1157 atoms.add(a); 1158 if (a.getName().equals(C3_ATOM_NAME)) 1159 atoms.add(a); 1160 if (a.getName().equals(C4_ATOM_NAME)) 1161 atoms.add(a); 1162 if (a.getName().equals(O2_ATOM_NAME)) 1163 atoms.add(a); 1164 if (a.getName().equals(O3_ATOM_NAME)) 1165 atoms.add(a); 1166 if (a.getName().equals(O4_ATOM_NAME)) 1167 atoms.add(a); 1168 if (a.getName().equals(O5_ATOM_NAME)) 1169 atoms.add(a); 1170 if (a.getName().equals(OP1_ATOM_NAME)) 1171 atoms.add(a); 1172 if (a.getName().equals(OP2_ATOM_NAME)) 1173 atoms.add(a); 1174 if (a.getName().equals(P_ATOM_NAME)) 1175 atoms.add(a); 1176 // TODO Allow C4* names as well as C4'? -SB 3/2015 1177 break; 1178 case AMINOACID: 1179 default: 1180 // we do it this way instead of with g.getAtom() to 1181 // be sure we always use the same order as original 1182 if (a.getName().equals(CA_ATOM_NAME)) 1183 atoms.add(a); 1184 if (a.getName().equals(C_ATOM_NAME)) 1185 atoms.add(a); 1186 if (a.getName().equals(N_ATOM_NAME)) 1187 atoms.add(a); 1188 if (a.getName().equals(O_ATOM_NAME)) 1189 atoms.add(a); 1190 break; 1191 } 1192 } 1193 } 1194 } 1195 1196 } 1197 1198 return atoms.toArray(new Atom[atoms.size()]); 1199 } 1200 1201 /** 1202 * Convert three character amino acid codes into single character e.g. 1203 * convert CYS to C. Valid 3-letter codes will be those of the standard 20 1204 * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids} 1205 * map) 1206 * 1207 * @return the 1 letter code, or null if the given 3 letter code does not 1208 * correspond to an amino acid code 1209 * @param groupCode3 1210 * a three character amino acid representation String 1211 * @see {@link #get1LetterCode(String)} 1212 */ 1213 public static final Character get1LetterCodeAmino(String groupCode3) { 1214 return aminoAcids.get(groupCode3); 1215 } 1216 1217 /** 1218 * Convert a three letter amino acid or nucleotide code into a single 1219 * character code. If the code does not correspond to an amino acid or 1220 * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}. 1221 * 1222 * Returned null for nucleotides prior to version 4.0.1. 1223 * 1224 * @param groupCode3 1225 * three letter representation 1226 * @return The 1-letter abbreviation 1227 */ 1228 public static final Character get1LetterCode(String groupCode3) { 1229 1230 Character code1; 1231 1232 // is it a standard amino acid ? 1233 code1 = get1LetterCodeAmino(groupCode3); 1234 1235 if (code1 == null) { 1236 // hm groupCode3 is not standard 1237 // perhaps it is a nucleotide? 1238 groupCode3 = groupCode3.trim(); 1239 if (isNucleotide(groupCode3)) { 1240 code1 = nucleotides30.get(groupCode3); 1241 if (code1 == null) { 1242 code1 = nucleotides23.get(groupCode3); 1243 } 1244 if (code1 == null) { 1245 code1 = UNKNOWN_GROUP_LABEL; 1246 } 1247 } else { 1248 // does not seem to be so let's assume it is 1249 // nonstandard aminoacid and label it "X" 1250 // logger.warning("unknown group name "+groupCode3 ); 1251 code1 = UNKNOWN_GROUP_LABEL; 1252 } 1253 } 1254 1255 return code1; 1256 1257 } 1258 1259 /** 1260 * Test if the three-letter code of an ATOM entry corresponds to a 1261 * nucleotide or to an aminoacid. 1262 * 1263 * @param groupCode3 1264 * 3-character code for a group. 1265 * 1266 */ 1267 public static final boolean isNucleotide(String groupCode3) { 1268 String code = groupCode3.trim(); 1269 return nucleotides30.containsKey(code) 1270 || nucleotides23.containsKey(code); 1271 } 1272 1273 /** 1274 * Reduce a structure to provide a smaller representation . Only takes the 1275 * first model of the structure. If chainName is provided only return a 1276 * structure containing that Chain ID. Converts lower case chain IDs to 1277 * upper case if structure does not contain a chain with that ID. 1278 * 1279 * @param s 1280 * @param chainId 1281 * @return Structure 1282 * @since 3.0 1283 * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0) 1284 */ 1285 @Deprecated 1286 public static final Structure getReducedStructure(Structure s, 1287 String chainId) throws StructureException { 1288 // since we deal here with structure alignments, 1289 // only use Model 1... 1290 1291 Structure newS = new StructureImpl(); 1292 newS.setPDBCode(s.getPDBCode()); 1293 newS.setPDBHeader(s.getPDBHeader()); 1294 newS.setName(s.getName()); 1295 newS.setSSBonds(s.getSSBonds()); 1296 newS.setDBRefs(s.getDBRefs()); 1297 newS.setSites(s.getSites()); 1298 newS.setBiologicalAssembly(s.isBiologicalAssembly()); 1299 newS.setEntityInfos(s.getEntityInfos()); 1300 newS.setSSBonds(s.getSSBonds()); 1301 newS.setSites(s.getSites()); 1302 1303 if (chainId != null) 1304 chainId = chainId.trim(); 1305 1306 if (chainId == null || chainId.equals("")) { 1307 // only get model 0 1308 List<Chain> model0 = s.getModel(0); 1309 for (Chain c : model0) { 1310 newS.addChain(c); 1311 } 1312 return newS; 1313 1314 } 1315 1316 Chain c = null; 1317 try { 1318 c = s.getChainByPDB(chainId); 1319 } catch (StructureException e) { 1320 logger.warn(e.getMessage() + ". Chain id " + chainId 1321 + " did not match, trying upper case Chain id."); 1322 c = s.getChainByPDB(chainId.toUpperCase()); 1323 1324 } 1325 if (c != null) { 1326 newS.addChain(c); 1327 for (EntityInfo comp : s.getEntityInfos()) { 1328 if (comp.getChainIds() != null 1329 && comp.getChainIds().contains(c.getChainID())) { 1330 // found matching entity info. set description... 1331 newS.getPDBHeader().setDescription( 1332 "Chain " + c.getChainID() + " of " + s.getPDBCode() 1333 + " " + comp.getDescription()); 1334 } 1335 } 1336 } 1337 1338 return newS; 1339 } 1340 1341 public static final String convertAtomsToSeq(Atom[] atoms) { 1342 1343 StringBuilder buf = new StringBuilder(); 1344 Group prevGroup = null; 1345 for (Atom a : atoms) { 1346 Group g = a.getGroup(); 1347 if (prevGroup != null) { 1348 if (prevGroup.equals(g)) { 1349 // we add each group only once. 1350 continue; 1351 } 1352 } 1353 String code3 = g.getPDBName(); 1354 Character code1 = get1LetterCodeAmino(code3); 1355 if (code1 == null) 1356 code1 = UNKNOWN_GROUP_LABEL; 1357 1358 buf.append(code1); 1359 1360 prevGroup = g; 1361 1362 } 1363 return buf.toString(); 1364 } 1365 1366 /** 1367 * Get a group represented by a ResidueNumber. 1368 * 1369 * @param struc 1370 * a {@link Structure} 1371 * @param pdbResNum 1372 * a {@link ResidueNumber} 1373 * @return a group in the structure that is represented by the pdbResNum. 1374 * @throws StructureException 1375 * if the group cannot be found. 1376 */ 1377 public static final Group getGroupByPDBResidueNumber(Structure struc, 1378 ResidueNumber pdbResNum) throws StructureException { 1379 if (struc == null || pdbResNum == null) { 1380 throw new IllegalArgumentException("Null argument(s)."); 1381 } 1382 1383 Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName()); 1384 1385 return chain.getGroupByPDB(pdbResNum); 1386 } 1387 1388 /** 1389 * Returns the set of intra-chain contacts for the given chain for given 1390 * atom names, i.e. the contact map. Uses a geometric hashing algorithm that 1391 * speeds up the calculation without need of full distance matrix. The 1392 * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs 1393 * to be set to true for this to work. 1394 * 1395 * @param chain 1396 * @param atomNames 1397 * the array with atom names to be used. Beware: CA will do both 1398 * C-alphas an Calciums if null all non-H atoms of non-hetatoms 1399 * will be used 1400 * @param cutoff 1401 * @return 1402 */ 1403 public static AtomContactSet getAtomsInContact(Chain chain, 1404 String[] atomNames, double cutoff) { 1405 Grid grid = new Grid(cutoff); 1406 1407 Atom[] atoms = null; 1408 if (atomNames == null) { 1409 atoms = getAllNonHAtomArray(chain, false); 1410 } else { 1411 atoms = getAtomArray(chain, atomNames); 1412 } 1413 // If tha 1414 if(atoms.length==0){ 1415 logger.warn("No atoms found for buidling grid!"); 1416 return new AtomContactSet(cutoff); 1417 } 1418 grid.addAtoms(atoms); 1419 1420 return grid.getAtomContacts(); 1421 } 1422 1423 /** 1424 * Returns the set of intra-chain contacts for the given chain for all non-H 1425 * atoms of non-hetatoms, i.e. the contact map. Uses a geometric hashing 1426 * algorithm that speeds up the calculation without need of full distance 1427 * matrix. The parsing mode 1428 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1429 * true for this to work. 1430 * 1431 * @param chain 1432 * @param cutoff 1433 * @return 1434 */ 1435 public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) { 1436 return getAtomsInContact(chain, (String[]) null, cutoff); 1437 } 1438 1439 /** 1440 * Returns the set of intra-chain contacts for the given chain for C-alpha 1441 * atoms (including non-standard aminoacids appearing as HETATM groups), 1442 * i.e. the contact map. Uses a geometric hashing algorithm that speeds up 1443 * the calculation without need of full distance matrix. The parsing mode 1444 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1445 * true for this to work. 1446 * 1447 * @param chain 1448 * @param cutoff 1449 * @return 1450 * @see {@link #getRepresentativeAtomsInContact(Chain, double)} 1451 */ 1452 public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) { 1453 Grid grid = new Grid(cutoff); 1454 1455 Atom[] atoms = getAtomCAArray(chain); 1456 1457 grid.addAtoms(atoms); 1458 1459 return grid.getAtomContacts(); 1460 } 1461 1462 /** 1463 * Returns the set of intra-chain contacts for the given chain for C-alpha 1464 * or C3' atoms (including non-standard aminoacids appearing as HETATM 1465 * groups), i.e. the contact map. Uses a geometric hashing algorithm that 1466 * speeds up the calculation without need of full distance matrix. 1467 * 1468 * @param chain 1469 * @param cutoff 1470 * @return 1471 * @since Biojava 4.1.0 1472 */ 1473 public static AtomContactSet getRepresentativeAtomsInContact(Chain chain, 1474 double cutoff) { 1475 Grid grid = new Grid(cutoff); 1476 1477 Atom[] atoms = getRepresentativeAtomArray(chain); 1478 1479 grid.addAtoms(atoms); 1480 1481 return grid.getAtomContacts(); 1482 } 1483 1484 /** 1485 * Returns the set of inter-chain contacts between the two given chains for 1486 * the given atom names. Uses a geometric hashing algorithm that speeds up 1487 * the calculation without need of full distance matrix. The parsing mode 1488 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1489 * true for this to work. 1490 * 1491 * @param chain1 1492 * @param chain2 1493 * @param atomNames 1494 * the array with atom names to be used. For Calphas use {"CA"}, 1495 * if null all non-H atoms will be used. Note HET atoms are 1496 * ignored unless this parameter is null. 1497 * @param cutoff 1498 * @param hetAtoms 1499 * if true HET atoms are included, if false they are not 1500 * @return 1501 */ 1502 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1503 String[] atomNames, double cutoff, boolean hetAtoms) { 1504 Grid grid = new Grid(cutoff); 1505 Atom[] atoms1 = null; 1506 Atom[] atoms2 = null; 1507 if (atomNames == null) { 1508 atoms1 = getAllNonHAtomArray(chain1, hetAtoms); 1509 atoms2 = getAllNonHAtomArray(chain2, hetAtoms); 1510 } else { 1511 atoms1 = getAtomArray(chain1, atomNames); 1512 atoms2 = getAtomArray(chain2, atomNames); 1513 } 1514 grid.addAtoms(atoms1, atoms2); 1515 1516 return grid.getAtomContacts(); 1517 } 1518 1519 /** 1520 * Returns the set of inter-chain contacts between the two given chains for 1521 * all non-H atoms. Uses a geometric hashing algorithm that speeds up the 1522 * calculation without need of full distance matrix. The parsing mode 1523 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to 1524 * true for this to work. 1525 * 1526 * @param chain1 1527 * @param chain2 1528 * @param cutoff 1529 * @param hetAtoms 1530 * if true HET atoms are included, if false they are not 1531 * @return 1532 */ 1533 public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2, 1534 double cutoff, boolean hetAtoms) { 1535 return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms); 1536 } 1537 1538 /** 1539 * Finds Groups in {@code structure} that contain at least one Atom that is 1540 * within {@code radius} Angstroms of {@code centroid}. 1541 * 1542 * @param structure 1543 * The structure from which to find Groups 1544 * @param centroid 1545 * The centroid of the shell 1546 * @param excludeResidues 1547 * A list of ResidueNumbers to exclude 1548 * @param radius 1549 * The radius from {@code centroid}, in Angstroms 1550 * @param includeWater 1551 * Whether to include Groups whose <em>only</em> atoms are water 1552 * @param useAverageDistance 1553 * When set to true, distances are the arithmetic mean (1-norm) 1554 * of the distances of atoms that belong to the group and that 1555 * are within the shell; otherwise, distances are the minimum of 1556 * these values 1557 * @return A map of Groups within (or partially within) the shell, to their 1558 * distances in Angstroms 1559 */ 1560 public static Map<Group, Double> getGroupDistancesWithinShell( 1561 Structure structure, Atom centroid, 1562 Set<ResidueNumber> excludeResidues, double radius, 1563 boolean includeWater, boolean useAverageDistance) { 1564 1565 // for speed, we avoid calculating square roots 1566 radius = radius * radius; 1567 1568 Map<Group, Double> distances = new HashMap<Group, Double>(); 1569 1570 // we only need this if we're averaging distances 1571 // note that we can't use group.getAtoms().size() because some the 1572 // group's atoms be outside the shell 1573 Map<Group, Integer> atomCounts = new HashMap<Group, Integer>(); 1574 1575 for (Chain chain : structure.getChains()) { 1576 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1577 1578 // exclude water 1579 if (!includeWater && chainGroup.isWater()) 1580 continue; 1581 1582 // check blacklist of residue numbers 1583 for (ResidueNumber rn : excludeResidues) { 1584 if (rn.equals(chainGroup.getResidueNumber())) 1585 continue groupLoop; 1586 } 1587 1588 for (Atom testAtom : chainGroup.getAtoms()) { 1589 1590 // use getDistanceFast as we are doing a lot of comparisons 1591 double dist = Calc.getDistanceFast(centroid, testAtom); 1592 1593 // if we're the shell 1594 if (dist <= radius) { 1595 if (!distances.containsKey(chainGroup)) 1596 distances.put(chainGroup, Double.POSITIVE_INFINITY); 1597 if (useAverageDistance) { 1598 // sum the distance; we'll divide by the total 1599 // number later 1600 // here, we CANNOT use fastDistance (distance 1601 // squared) because we want the arithmetic mean 1602 distances.put(chainGroup, distances.get(chainGroup) 1603 + Math.sqrt(dist)); 1604 if (!atomCounts.containsKey(chainGroup)) 1605 atomCounts.put(chainGroup, 0); 1606 atomCounts.put(chainGroup, 1607 atomCounts.get(chainGroup) + 1); 1608 } else { 1609 // take the minimum distance among all atoms of 1610 // chainGroup 1611 // note that we can't break here because we might 1612 // find a smaller distance 1613 if (dist < distances.get(chainGroup)) { 1614 distances.put(chainGroup, dist); 1615 } 1616 } 1617 } 1618 1619 } 1620 } 1621 } 1622 1623 if (useAverageDistance) { 1624 for (Map.Entry<Group, Double> entry : distances.entrySet()) { 1625 int count = atomCounts.get(entry.getKey()); 1626 distances.put(entry.getKey(), entry.getValue() / count); 1627 } 1628 } else { 1629 // in this case we used getDistanceFast 1630 for (Map.Entry<Group, Double> entry : distances.entrySet()) { 1631 distances.put(entry.getKey(), Math.sqrt(entry.getValue())); 1632 } 1633 } 1634 1635 return distances; 1636 1637 } 1638 1639 public static Set<Group> getGroupsWithinShell(Structure structure, 1640 Atom atom, Set<ResidueNumber> excludeResidues, double distance, 1641 boolean includeWater) { 1642 1643 // square the distance to use as a comparison against getDistanceFast 1644 // which returns the square of a distance. 1645 distance = distance * distance; 1646 1647 Set<Group> returnSet = new LinkedHashSet<Group>(); 1648 for (Chain chain : structure.getChains()) { 1649 groupLoop: for (Group chainGroup : chain.getAtomGroups()) { 1650 if (!includeWater && chainGroup.isWater()) 1651 continue; 1652 for (ResidueNumber rn : excludeResidues) { 1653 if (rn.equals(chainGroup.getResidueNumber())) 1654 continue groupLoop; 1655 } 1656 for (Atom atomB : chainGroup.getAtoms()) { 1657 1658 // use getDistanceFast as we are doing a lot of comparisons 1659 double dist = Calc.getDistanceFast(atom, atomB); 1660 if (dist <= distance) { 1661 returnSet.add(chainGroup); 1662 break; 1663 } 1664 1665 } 1666 } 1667 } 1668 return returnSet; 1669 } 1670 1671 /** 1672 * <p> 1673 * Returns a Set of Groups in a structure within the distance specified of a 1674 * given group. 1675 * </p> 1676 * <p> 1677 * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of 1678 * Groups returned 1679 * 1680 * @param structure 1681 * The structure to work with 1682 * @param group 1683 * The 'query' group 1684 * @param distance 1685 * The cutoff distance 1686 * @param includeWater 1687 * Should water residues be included in the output? 1688 * @return {@link LinkedHashSet} of {@link Group}s within at least one atom 1689 * with {@code distance} of at least one atom in {@code group} 1690 */ 1691 public static Set<Group> getGroupsWithinShell(Structure structure, 1692 Group group, double distance, boolean includeWater) { 1693 1694 Set<Group> returnList = new LinkedHashSet<Group>(); 1695 1696 Set<ResidueNumber> excludeGroups = new HashSet<ResidueNumber>(); 1697 excludeGroups.add(group.getResidueNumber()); 1698 for (Atom atom : group.getAtoms()) { 1699 Set<Group> set = getGroupsWithinShell(structure, atom, 1700 excludeGroups, distance, includeWater); 1701 returnList.addAll(set); 1702 } 1703 1704 return returnList; 1705 } 1706 1707 /** 1708 * Remove all models from a Structure and keep only the first 1709 * 1710 * @param s 1711 * original Structure 1712 * @return a structure that contains only the first model 1713 * @since 3.0.5 1714 */ 1715 public static Structure removeModels(Structure s) { 1716 if (s.nrModels() == 1) 1717 return s; 1718 1719 Structure n = new StructureImpl(); 1720 // go through whole substructure and clone ... 1721 1722 // copy structure data 1723 1724 n.setPDBCode(s.getPDBCode()); 1725 n.setName(s.getName()); 1726 1727 // TODO: do deep copying of data! 1728 n.setPDBHeader(s.getPDBHeader()); 1729 n.setDBRefs(s.getDBRefs()); 1730 1731 n.setSites(s.getSites()); 1732 1733 n.setChains(s.getModel(0)); 1734 1735 return n; 1736 1737 } 1738 1739 /** 1740 * Removes all polymeric and solvent groups from a list of groups 1741 * 1742 */ 1743 public static List<Group> filterLigands(List<Group> allGroups) { 1744 1745 List<Group> groups = new ArrayList<Group>(); 1746 for (Group g : allGroups) { 1747 1748 if ( g.isPolymeric()) 1749 continue; 1750 1751 if (!g.isWater()) { 1752 groups.add(g); 1753 } 1754 } 1755 1756 return groups; 1757 } 1758 1759 /** 1760 * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)} 1761 * which creates new parsers when needed 1762 * 1763 * @param name 1764 * @return 1765 * @throws IOException 1766 * @throws StructureException 1767 */ 1768 public static Structure getStructure(String name) throws IOException, 1769 StructureException { 1770 return StructureTools.getStructure(name, null, null); 1771 } 1772 1773 /** 1774 * Flexibly get a structure from an input String. The intent of this method 1775 * is to allow any reasonable string which could refer to a structure to be 1776 * correctly parsed. The following are currently supported: 1777 * <ol> 1778 * <li>Filename (if name refers to an existing file) 1779 * <li>PDB ID 1780 * <li>SCOP domains 1781 * <li>PDP domains 1782 * <li>Residue ranges 1783 * <li>Other formats supported by AtomCache 1784 * </ol> 1785 * 1786 * @param name 1787 * Some reference to the protein structure 1788 * @param parser 1789 * A clean PDBFileParser to use if it is a file. If null, a 1790 * PDBFileParser will be instantiated if needed. 1791 * @param cache 1792 * An AtomCache to use if the structure can be fetched from the 1793 * PDB. If null, a AtomCache will be instantiated if needed. 1794 * @return A Structure object 1795 * @throws IOException 1796 * if name is an existing file, but doesn't parse correctly 1797 * @throws StructureException 1798 * if the format is unknown, or if AtomCache throws an 1799 * exception. 1800 */ 1801 public static Structure getStructure(String name, PDBFileParser parser, 1802 AtomCache cache) throws IOException, StructureException { 1803 File f = new File(FileDownloadUtils.expandUserHome(name)); 1804 if (f.exists()) { 1805 if (parser == null) { 1806 parser = new PDBFileParser(); 1807 } 1808 InputStream inStream = new FileInputStream(f); 1809 return parser.parsePDBFile(inStream); 1810 } else { 1811 if (cache == null) { 1812 cache = new AtomCache(); 1813 } 1814 return cache.getStructure(name); 1815 } 1816 } 1817 1818 /** 1819 * @deprecated use {@link Chain#isProtein()} instead. 1820 */ 1821 @Deprecated 1822 public static boolean isProtein(Chain c) { 1823 1824 return c.isProtein(); 1825 } 1826 1827 /** 1828 * @deprecated use {@link Chain#isNucleicAcid()} instead. 1829 */ 1830 @Deprecated 1831 public static boolean isNucleicAcid(Chain c) { 1832 return c.isNucleicAcid(); 1833 } 1834 1835 /** 1836 * @deprecated use {@link Chain#getPredominantGroupType()} instead. 1837 */ 1838 @Deprecated 1839 public static GroupType getPredominantGroupType(Chain c) { 1840 return c.getPredominantGroupType(); 1841 } 1842 1843 /** 1844 * @deprecated use {@link Chain#isWaterOnly()} instead. 1845 */ 1846 @Deprecated 1847 public static boolean isChainWaterOnly(Chain c) { 1848 return c.isWaterOnly(); 1849 } 1850 1851 /** 1852 * @deprecated use {@link Chain#isPureNonPolymer()} instead. 1853 */ 1854 @Deprecated 1855 public static boolean isChainPureNonPolymer(Chain c) { 1856 1857 return c.isPureNonPolymer(); 1858 } 1859 1860 /** 1861 * Cleans up the structure's alternate location (altloc) groups. All alternate location groups should have all atoms (except 1862 * in the case of microheterogenity) or when a deuterium exists. 1863 * Ensure that all the alt loc groups have all the atoms in the main group. 1864 * @param structure The Structure to be cleaned up 1865 */ 1866 public static void cleanUpAltLocs(Structure structure) { 1867 for (int i =0; i< structure.nrModels() ; i++){ 1868 for (Chain chain : structure.getModel(i)) { 1869 for (Group group : chain.getAtomGroups()) { 1870 for (Group altLocGroup : group.getAltLocs()) { 1871 for ( Atom groupAtom : group.getAtoms()) { 1872 // If this alt loc doesn't have this atom 1873 if (! altLocGroup.hasAtom(groupAtom.getName())) { 1874 // Fix for microheterogenity 1875 if (altLocGroup.getPDBName().equals(group.getPDBName())) { 1876 // If it's a Hydrogen then we check for it's Deuterated brother 1877 if(!hasDeuteratedEquiv(groupAtom, altLocGroup)){ 1878 altLocGroup.addAtom(groupAtom); 1879 } 1880 } 1881 } 1882 } 1883 } 1884 } 1885 } 1886 } 1887 } 1888 1889 /** 1890 * Check to see if an Deuterated atom has a non deuterated brother in the group. 1891 * @param atom the input atom that is putatively deuterium 1892 * @param currentGroup the group the atom is in 1893 * @return true if the atom is deuterated and it's hydrogen equive exists. 1894 */ 1895 public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) { 1896 if(atom.getElement()==Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'D', 'H'))) { 1897 // If it's deuterated and has a non-deuterated brother 1898 return true; 1899 } 1900 return false; 1901 } 1902 1903 /** 1904 * Check to see if a Hydrogen has a Deuterated brother in the group. 1905 * @param atom the input atom that is putatively hydorgen 1906 * @param currentGroup the group the atom is in 1907 * @return true if the atom is hydrogen and it's Deuterium equiv exists. 1908 */ 1909 public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) { 1910 if(atom.getElement()==Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'H', 'D'))) { 1911 // If it's hydrogen and has a deuterated brother 1912 return true; 1913 } 1914 return false; 1915 } 1916 1917 private static String replaceFirstChar(String name, char c, char d) { 1918 if(name.charAt(0)==c){ 1919 return name.replaceFirst(String.valueOf(c), String.valueOf(d)); 1920 } 1921 return name; 1922 } 1923 1924}