001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * 021 */ 022package org.biojava.nbio.structure.basepairs; 023 024import org.biojava.nbio.structure.Structure; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.Group; 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.contact.Pair; 029import org.biojava.nbio.structure.geometry.SuperPosition; 030import org.biojava.nbio.structure.geometry.SuperPositionQCP; 031import org.biojava.nbio.structure.io.PDBFileReader; 032 033import javax.vecmath.Matrix4d; 034import javax.vecmath.Point3d; 035import java.io.ByteArrayInputStream; 036import java.io.IOException; 037import java.io.Serializable; 038import java.util.List; 039import java.util.Arrays; 040import java.util.ArrayList; 041import java.util.Map; 042import java.util.HashMap; 043 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import static java.lang.Math.sin; 048import static java.lang.Math.cos; 049import static java.lang.Math.atan2; 050import static java.lang.Math.acos; 051import static java.lang.Math.PI; 052 053/** 054 * This module calculates the el Hassan-Calladine Base Pairing and Base-pair Step Parameters for any nucleic 055 * acid containing structure that has the information about the core base-pair rings. 056 * Citation: https://www.ncbi.nlm.nih.gov/pubmed/11601858 057 * 058 * The method that is usually overridden is findPairs(), this base implementation is used for a large-scale 059 * analysis of the most proper helical regions in almost 4000 protein-DNA structures, almost 060 * 2000 structures containing only DNA, or almost 1300 structures containing only RNA. (as of 7/2017). 061 * Those who study tertiary structures for RNA folding should use the TertiaryBasePairParameters, 062 * because this base class is only looking for base pairs between separate strands that exactly line up. 063 * To relax the lining up policy and allow for non-canonical base pairs, use the MismatchedBasePairParameters 064 * class, which will not consider intra-strand base pairing. 065 * 066 * @author Luke Czapla 067 * @since 5.0.0 068 * 069 */ 070public class BasePairParameters implements Serializable { 071 072 private static final long serialVersionUID = 6214502385L; 073 private static Logger log = LoggerFactory.getLogger(BasePairParameters.class); 074 075 // See URL http://ndbserver.rutgers.edu/ndbmodule/archives/reports/tsukuba/Table1.html 076 // and the paper cited at the top of this class (also as Table 1). 077 // These are hard-coded to avoid problems with resource paths. 078 public static final String[] STANDARD_BASES = new String[] { 079 "SEQRES 1 A 1 A\n" + 080 "ATOM 2 N9 A A 1 -1.291 4.498 0.000\n" + 081 "ATOM 3 C8 A A 1 0.024 4.897 0.000\n" + 082 "ATOM 4 N7 A A 1 0.877 3.902 0.000\n" + 083 "ATOM 5 C5 A A 1 0.071 2.771 0.000\n" + 084 "ATOM 6 C6 A A 1 0.369 1.398 0.000\n" + 085 "ATOM 8 N1 A A 1 -0.668 0.532 0.000\n" + 086 "ATOM 9 C2 A A 1 -1.912 1.023 0.000\n" + 087 "ATOM 10 N3 A A 1 -2.320 2.290 0.000\n" + 088 "ATOM 11 C4 A A 1 -1.267 3.124 0.000\n" + 089 "END", 090 "SEQRES 1 A 1 G\n" + 091 "ATOM 2 N9 G A 1 -1.289 4.551 0.000\n" + 092 "ATOM 3 C8 G A 1 0.023 4.962 0.000\n" + 093 "ATOM 4 N7 G A 1 0.870 3.969 0.000\n" + 094 "ATOM 5 C5 G A 1 0.071 2.833 0.000\n" + 095 "ATOM 6 C6 G A 1 0.424 1.460 0.000\n" + 096 "ATOM 8 N1 G A 1 -0.700 0.641 0.000\n" + 097 "ATOM 9 C2 G A 1 -1.999 1.087 0.000\n" + 098 "ATOM 11 N3 G A 1 -2.342 2.364 0.001\n" + 099 "ATOM 12 C4 G A 1 -1.265 3.177 0.000\n" + 100 "END", 101 "SEQRES 1 A 1 T\n" + 102 "ATOM 2 N1 T A 1 -1.284 4.500 0.000\n" + 103 "ATOM 3 C2 T A 1 -1.462 3.135 0.000\n" + 104 "ATOM 5 N3 T A 1 -0.298 2.407 0.000\n" + 105 "ATOM 6 C4 T A 1 0.994 2.897 0.000\n" + 106 "ATOM 8 C5 T A 1 1.106 4.338 0.000\n" + 107 "ATOM 10 C6 T A 1 -0.024 5.057 0.000\n" + 108 "END", 109 "SEQRES 1 A 1 C\n" + 110 "ATOM 2 N1 C A 1 -1.285 4.542 0.000\n" + 111 "ATOM 3 C2 C A 1 -1.472 3.158 0.000\n" + 112 "ATOM 5 N3 C A 1 -0.391 2.344 0.000\n" + 113 "ATOM 6 C4 C A 1 0.837 2.868 0.000\n" + 114 "ATOM 8 C5 C A 1 1.056 4.275 0.000\n" + 115 "ATOM 9 C6 C A 1 -0.023 5.068 0.000\n" + 116 "END", 117 "SEQRES 1 A 1 U\n" + 118 "ATOM 2 N1 U A 1 -1.284 4.500 0.000\n" + 119 "ATOM 3 C2 U A 1 -1.462 3.131 0.000\n" + 120 "ATOM 5 N3 U A 1 -0.302 2.397 0.000\n" + 121 "ATOM 6 C4 U A 1 0.989 2.884 0.000\n" + 122 "ATOM 8 C5 U A 1 1.089 4.311 0.000\n" + 123 "ATOM 9 C6 U A 1 -0.024 5.053 0.000\n" 124 }; 125 126 // this is also hard-coded data about standard WC base pairs for both DNA and RNA 127 protected static final String[] BASE_LIST_DNA = {"A", "G", "T", "C"}; 128 protected static final String[] BASE_LIST_RNA = {"A", "G", "U", "C"}; 129 protected static final Map<String, Integer> BASE_MAP; 130 // private static List<String> RNAspecific = Arrays.asList("U", "URA"), 131 // DNAspecific = Arrays.asList("DC", "C", "CYT"); 132 protected static final Map<Integer, List<String>> RING_MAP; 133 static { 134 BASE_MAP = new HashMap<>(); 135 BASE_MAP.put("DA", 0); BASE_MAP.put("ADE", 0); BASE_MAP.put("A", 0); 136 BASE_MAP.put("DG", 1); BASE_MAP.put("GUA", 1); BASE_MAP.put("G", 1); 137 BASE_MAP.put("DT", 2); BASE_MAP.put("THY", 2); BASE_MAP.put("T", 2); BASE_MAP.put("U", 2); BASE_MAP.put("URA", 2); 138 BASE_MAP.put("DC", 3); BASE_MAP.put("CYT", 3); BASE_MAP.put("C", 3); 139 140 RING_MAP = new HashMap<>(); 141 RING_MAP.put(0, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9")); 142 RING_MAP.put(1, Arrays.asList("C8", "C2", "N3", "C4", "C5", "C6", "N7", "N1", "N9")); 143 RING_MAP.put(2, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1")); 144 RING_MAP.put(3, Arrays.asList("C6", "C2", "N3", "C4", "C5", "N1")); 145 } 146 147 protected Structure structure; 148 protected boolean canonical = true; 149 protected boolean useRNA = false; 150 protected boolean nonredundant = false; 151 protected double[] pairParameters; 152 153 // this is the main data that the user wants to get back out from the procedure. 154 protected String pairSequence = ""; 155 protected double[][] pairingParameters; 156 protected double[][] stepParameters; 157 protected List<String> pairingNames = new ArrayList<>(); 158 protected List<Matrix4d> referenceFrames = new ArrayList<>(); 159 160 161 /** 162 * This constructor takes a Structure object, finds base pair and base-pair step parameters 163 * for double-helical regions within the structure. 164 * @param structure The already-loaded structure to analyze. 165 * @param useRNA whether to look for canonical RNA pairs. By default (false) it analyzes DNA. 166 * @param removeDups whether to only look for base-pair parameters for each unique sequence in 167 * the structure (if set to <i>true</i>) 168 * @param canonical Whether to consider only Watson-Crick base pairs 169 */ 170 public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups, boolean canonical) { 171 this.structure = structure; 172 this.useRNA = useRNA; 173 this.canonical = canonical; 174 this.nonredundant = removeDups; 175 176 } 177 178 /** 179 * This constructor takes a Structure object, whether to use RNA, and whether to remove duplicate sequences. 180 * @param structure The already-loaded structure to analyze. 181 * @param useRNA if true, the RNA standard bases will be used. Otherwise, if false, it will work on standard DNA bases. 182 * @param removeDups if true, duplicate sequences will not be considered. This is for the analysis of X-ray structures from 183 * RCSB, where there may be identical or similar units. 184 */ 185 public BasePairParameters(Structure structure, boolean useRNA, boolean removeDups) { 186 this(structure, useRNA, removeDups, false); 187 } 188 189 /** 190 * This constructor takes a Structure object, and whether to use the RNA standard bases. 191 * @param structure The already-loaded structure to analyze. 192 * @param useRNA if true, the RNA standard bases will be used. Otherwise, if false, it will work on standard DNA bases. 193 */ 194 public BasePairParameters(Structure structure, boolean useRNA) { 195 this(structure, useRNA, false, false); 196 } 197 198 /** 199 * This constructor takes a Structure object, finds base pair and base-pair step parameters 200 * for double-helical regions within the structure for only canonical DNA pairs. 201 * @param structure The already-loaded structure to analyze. 202 */ 203 public BasePairParameters(Structure structure) { 204 this(structure, false, false, true); 205 } 206 207 208 /** 209 * This method is the main function call to extract all step parameters, pairing parameters, and sequence 210 * information from the Structure object provided to the constructor. 211 * @return This same object with the populated data, convenient for output 212 * (e.g. <i>log.info(new BasePairParameters(structure).analyze());</i>) 213 */ 214 public BasePairParameters analyze() { 215 if (structure == null) { 216 pairingParameters = null; 217 stepParameters = null; 218 return this; 219 } 220 List<Chain> nucleics = this.getNucleicChains(nonredundant); 221 List<Pair<Group>> pairs = this.findPairs(nucleics); 222 this.pairingParameters = new double[pairs.size()][6]; 223 this.stepParameters = new double[pairs.size()][6]; 224 Matrix4d lastStep; 225 Matrix4d currentStep = null; 226 for (int i = 0; i < pairs.size(); i++) { 227 lastStep = currentStep; 228 currentStep = this.basePairReferenceFrame(pairs.get(i)); 229 referenceFrames.add((Matrix4d)currentStep.clone()); 230 for (int j = 0; j < 6; j++) pairingParameters[i][j] = pairParameters[j]; 231 if (i != 0) { 232 lastStep.invert(); 233 lastStep.mul(currentStep); 234 double[] sparms = calculateTp(lastStep); 235 for (int j = 0; j < 6; j++) stepParameters[i][j] = sparms[j]; 236 } 237 } 238 return this; 239 } 240 241 242 243 /** 244 * This method returns the total number of base pairs that were found, used after the call to analyze(). 245 * @return An integer value, number of base pairs 246 */ 247 public int getLength() { 248 if (structure == null || pairParameters == null) throw new IllegalArgumentException("This structure is not analyzed or not initialized."); 249 return pairingParameters.length; 250 } 251 252 253 /** 254 * This method reports all the pair parameters, in the order of: 255 * buckle, propeller, opening (in degrees), shear, stagger, stretch (in Å). 256 * @return A double[][] with length equal to number of base pairs for rows, and 6 columns 257 */ 258 public double[][] getPairingParameters() { 259 return pairingParameters; 260 } 261 262 /** 263 * This method reports all the base-pair step parameters, in the order of: 264 * tilt, roll, twist (in degrees), shift, slide, rise (in Å). 265 * @return A double[][] with length equal to number of base pairs (the first row 0 has no step 266 * and therefore is six zeroes), and 6 columns. 267 */ 268 public double[][] getStepParameters() { 269 return stepParameters; 270 } 271 272 273 /** 274 * This method returns the primary strand's sequence where parameters were found. 275 * There are spaces in the string anywhere there was a break in the helix or when 276 * it goes from one helix to another helix in the structure. (the "step" is still returned) 277 * @return String of primary sequence with spaces between gaps and new helices. 278 */ 279 public String getPairSequence() { 280 return pairSequence; 281 } 282 283 284 /** 285 * This method returns the names of the pairs in terms of A, G, T/U, and C for each base pair group in the 286 * list. The first character is the leading strand base and the second character is the complementary base 287 * @return 288 */ 289 public List<String> getPairingNames() { 290 return pairingNames; 291 } 292 293 public List<Matrix4d> getReferenceFrames() { 294 return referenceFrames; 295 } 296 297 /** 298 * This method is an internal test that the base pair specified is within a valid range. If not, it throws an exception 299 * with a message. 300 * @param bp The index of the base pair or base-pair step to return. 301 */ 302 private void checkArgument(int bp) { 303 if (bp < 0 || bp >= getPairingParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 304 } 305 306 /** 307 * This method returns the buckle in degrees for the given base pair 308 * @param bp the number of the base pair (starting with 0) 309 * @return the value as a double (in degrees) 310 */ 311 public Double getBuckle(int bp) { 312 checkArgument(bp); 313 return pairingParameters[bp][0]; 314 } 315 316 /** 317 * This method returns the propeller ("propeller-twist") in degrees for the given base pair 318 * @param bp the number of the base pair (starting with 0) 319 * @return the value as a double (in degrees) 320 */ 321 public Double getPropeller(int bp) { 322 checkArgument(bp); 323 return pairingParameters[bp][1]; 324 } 325 326 /** 327 * This method returns the opening in degrees for the given base pair 328 * @param bp the number of the base pair (starting with 0) 329 * @return the value as a double (in degrees) 330 */ 331 public Double getOpening(int bp) { 332 checkArgument(bp); 333 return pairingParameters[bp][2]; 334 } 335 336 /** 337 * This method returns the shear in Å for the given base pair 338 * @param bp the number of the base pair (starting with 0) 339 * @return the value as a double (in Å) 340 */ 341 public Double getShear(int bp) { 342 checkArgument(bp); 343 return pairingParameters[bp][3]; 344 } 345 346 /** 347 * This method returns the stretch in Å for the given base pair 348 * @param bp the number of the base pair (starting with 0) 349 * @return the value as a double (in Å) 350 */ 351 public Double getStretch(int bp) { 352 checkArgument(bp); 353 return pairingParameters[bp][4]; 354 } 355 356 /** 357 * This method returns the stagger in Å for the given base pair 358 * @param bp the number of the base pair (starting with 0) 359 * @return the value as a double (in Å) 360 */ 361 public Double getStagger(int bp) { 362 checkArgument(bp); 363 return pairingParameters[bp][5]; 364 } 365 366 /** 367 * This method returns the tilt for the given base pair, relative to the one before it. 368 * @param bp the number of the base pair (starting with 0) 369 * @return the value as a double (in degrees) 370 */ 371 public Double getTilt(int bp) { 372 checkArgument(bp); 373 return stepParameters[bp][0]; 374 } 375 376 /** 377 * This method returns the roll for the given base pair, relative to the one before it. 378 * @param bp the number of the base pair (starting with 0) 379 * @return the value as a double (in degrees) 380 */ 381 public Double getRoll(int bp) { 382 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 383 return stepParameters[bp][1]; 384 } 385 386 /** 387 * This method returns the twist for the given base pair, relative to the one before it. 388 * @param bp the number of the base pair (starting with 0) 389 * @return the value as a double (in degrees) 390 */ 391 public Double getTwist(int bp) { 392 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 393 return stepParameters[bp][2]; 394 } 395 396 /** 397 * Return the shift for the given base pair, relative to the one before it. 398 * @param bp the number of the base pair (starting with 0) 399 * @return the value as a double (in Å) 400 */ 401 public Double getShift(int bp) { 402 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 403 return stepParameters[bp][3]; 404 } 405 406 /** 407 * This method returns the slide for the given base pair, relative to the one before it. 408 * @param bp the number of the base pair (starting with 0) 409 * @return the value as a double (in Å) 410 */ 411 public Double getSlide(int bp) { 412 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 413 return stepParameters[bp][4]; 414 } 415 416 /** 417 * This method returns the rise for the given base pair, relative to the one before it. 418 * @param bp the number of the base pair (starting with 0) 419 * @return the value as a double (in Å) 420 */ 421 public Double getRise(int bp) { 422 if (bp < 0 || bp >= getStepParameters().length) throw new IllegalArgumentException("Base pair number is out of range."); 423 return stepParameters[bp][5]; 424 } 425 426 427 /** 428 * This method reports all the nucleic acid chains and has an option to remove duplicates if you 429 * are considering an analysis of only unique DNA or RNA helices in the Structure. 430 * @param removeDups If true, it will ignore duplicate chains 431 * @return A list of all the nucleic acid chains in order of the Structure 432 */ 433 public List<Chain> getNucleicChains(boolean removeDups) { 434 if (structure == null) return new ArrayList<>(); 435 List<Chain> chains = structure.getChains(); 436 List<Chain> result = new ArrayList<>(); 437 for (Chain c: chains) { 438 if (c.isNucleicAcid()) { 439 result.add(c); 440 } 441 } 442 if (removeDups) for (int i = 0; i < result.size(); i++) { 443 for (int j = i+2; j < result.size(); j++) { 444 // remove duplicate sequences (structures with two or more identical units) 445 if (result.get(i).getAtomSequence().equals(result.get(j).getAtomSequence())) { 446 result.remove(j); 447 } 448 } 449 } 450 return result; 451 } 452 453 /** 454 * This method performs a search for base pairs in the structure. The criteria is alignment of 455 * sequences and the canonical base pairs of DNA or RNA. Use MismatchedBasePairParameters 456 * or TertiaryBasePairParameters for finding higher-order associations. 457 * @param chains The list of chains already found to be nucleic acids 458 * @return The list of corresponding Watson-Crick groups as pairs, as a Pair of nucleic acid Groups 459 */ 460 public List<Pair<Group>> findPairs(List<Chain> chains) { 461 List<Pair<Group>> result = new ArrayList<>(); 462 for (int i = 0; i < chains.size(); i++) { 463 Chain c = chains.get(i); 464 for (int j = i+1; j < chains.size(); j++) { 465 String complement = complement(chains.get(j).getAtomSequence(), useRNA); 466 String match = longestCommonSubstring(c.getAtomSequence(), complement); 467 if (log.isDebugEnabled()) { 468 log.debug(c.getAtomSequence() + " " + chains.get(j).getAtomSequence() + " " + match); 469 } 470 int index1 = c.getAtomSequence().indexOf(match); 471 int index2 = complement.length() - complement.indexOf(match) - 1; 472 for (int k = 0; k < match.length(); k++) { 473 Group g1 = c.getAtomGroup(index1+k); 474 Group g2 = chains.get(j).getAtomGroup(index2-k); 475 Integer type1 = BASE_MAP.get(g1.getPDBName()); 476 Integer type2 = BASE_MAP.get(g2.getPDBName()); 477 if (type1 == null || type2 == null) { 478 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 479 continue; 480 } 481 Atom a1 = g1.getAtom(RING_MAP.get(type1).get(0)); 482 Atom a2 = g2.getAtom(RING_MAP.get(type2).get(0)); 483 484 if (a1 == null) { 485 log.info("Error processing " + g1.getPDBName()); 486 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 487 continue; 488 } 489 if (a2 == null) { 490 log.info("Error processing " + g2.getPDBName()); 491 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 492 continue; 493 } 494 495 double dx = a1.getX()-a2.getX(); 496 double dy = a1.getY()-a2.getY(); 497 double dz = a1.getZ()-a2.getZ(); 498 double distance = Math.sqrt(dx*dx+dy*dy+dz*dz); 499 //log.info("C8-C6 Distance (Å): " + distance); 500 // could be a base pair 501 if (Math.abs(distance-10.0) < 4.0) { 502 boolean valid = true; 503 for (String atomname : RING_MAP.get(type1)) { 504 Atom a = g1.getAtom(atomname); 505 if (a == null) valid = false; 506 } 507 if (valid) for (String atomname: RING_MAP.get(type2)) { 508 Atom a = g2.getAtom(atomname); 509 if (a == null) valid = false; 510 } 511 if (valid) { 512 result.add(new Pair<Group>(g1, g2)); 513 pairingNames.add((useRNA ? BASE_LIST_RNA[type1]+ BASE_LIST_RNA[type2] : BASE_LIST_DNA[type1]+ BASE_LIST_DNA[type2])); 514 pairSequence += c.getAtomSequence().charAt(index1 + k); 515 } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 516 } else if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 517 } 518 if (pairSequence.length() != 0 && pairSequence.charAt(pairSequence.length()-1) != ' ') pairSequence += ' '; 519 } 520 //log.info(); 521 } 522 log.info("Matched: " + pairSequence); 523 return result; 524 } 525 526 527 /** 528 * This method calculates the central frame (4x4 transformation matrix) of a single base pair. 529 * @param pair An array of the two groups that make a hypothetical pair 530 * @return The middle frame of the center of the base-pair formed 531 */ 532 public Matrix4d basePairReferenceFrame(Pair<Group> pair) { 533 Integer type1 = BASE_MAP.get(pair.getFirst().getPDBName()); 534 Integer type2 = BASE_MAP.get(pair.getSecond().getPDBName()); 535 SuperPosition sp = new SuperPositionQCP(true); 536 if (type1 == null || type2 == null) return null; 537 PDBFileReader pdbFileReader = new PDBFileReader(); 538 Structure s1, s2; 539 try { 540 s1 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type1].getBytes())); 541 s2 = pdbFileReader.getStructure(new ByteArrayInputStream(STANDARD_BASES[type2].getBytes())); 542 } catch (IOException e) { 543 e.printStackTrace(); 544 return null; 545 } 546 Group std1 = s1.getChain("A").getAtomGroup(0); 547 Group std2 = s2.getChain("A").getAtomGroup(0); 548 549 Point3d[] pointref = new Point3d[std1.getAtoms().size()]; 550 Point3d[] pointact = new Point3d[std1.getAtoms().size()]; 551 int count = 0; 552 553 for (Atom a : std1.getAtoms()) { 554 if (pair.getFirst().getAtom(a.getName()) == null) return null; 555 pointref[count] = a.getCoordsAsPoint3d(); 556 pointact[count] = pair.getFirst().getAtom(a.getName()).getCoordsAsPoint3d(); 557 count++; 558 } 559 assert count == std1.getAtoms().size(); 560 Matrix4d ref1 = (Matrix4d)sp.superposeAndTransform(pointact, pointref).clone(); 561 562 pointref = new Point3d[std2.getAtoms().size()]; 563 pointact = new Point3d[std2.getAtoms().size()]; 564 565 count = 0; 566 for (Atom a : std2.getAtoms()) { 567 if (pair.getSecond().getAtom(a.getName()) == null) return null; 568 pointref[count] = a.getCoordsAsPoint3d(); 569 pointact[count] = pair.getSecond().getAtom(a.getName()).getCoordsAsPoint3d(); 570 count++; 571 } 572 assert count == std2.getAtoms().size(); 573 574 Matrix4d temp = (Matrix4d)ref1.clone(); 575 Matrix4d temp2 = (Matrix4d)temp.clone(); 576 Matrix4d ref2 = sp.superposeAndTransform(pointact, pointref); 577 578 double[][] v = new double[3][4]; 579 double[] y3 = new double[4]; 580 double[] z3 = new double[4]; 581 ref2.getColumn(1, y3); 582 ref2.getColumn(2, z3); 583 double[] z31 = new double[4]; 584 ref1.getColumn(2, z31); 585 if (z3[0]*z31[0]+z3[1]*z31[1]+z3[2]*z31[2] < 0.0) { 586 for (int i = 0; i < 3; i++) { 587 y3[i] *= -1.0; 588 z3[i] *= -1.0; 589 } 590 } 591 ref2.setColumn(1, y3); 592 ref2.setColumn(2, z3); 593 594 temp.add(ref2); 595 temp.mul(0.5); 596 double[] x3 = new double[4]; 597 temp.getColumn(0, x3); 598 temp.getColumn(1, y3); 599 temp.getColumn(2, z3); 600 x3 = removeComponent(x3, z3); 601 x3 = removeComponent(x3, y3); 602 y3 = removeComponent(y3, z3); 603 temp.setColumn(0, x3); 604 temp.setColumn(1, y3); 605 temp.setColumn(2, z3); 606 607 // normalize the short, long, and normal axes 608 for (int i = 0; i < 3; i++) { 609 temp.getColumn(i, v[i]); 610 double r = Math.sqrt(v[i][0] * v[i][0] + v[i][1] * v[i][1] + v[i][2] * v[i][2]); 611 for (int j = 0; j < 3; j++) { 612 v[i][j] /= r; 613 } 614 temp.setColumn(i, v[i]); 615 } 616 617 // calculate pairing parameters: buckle, propeller, opening, shear, stretch, stagger 618 temp2.invert(); 619 temp2.mul(ref2); 620 pairParameters = calculateTp(temp2); 621 for (int i = 0; i < 6; i++) pairParameters[i] *= -1; 622 623 // return the central frame of the base pair 624 return temp; 625 626 } 627 628 629 @Override 630 public String toString() { 631 if (getPairingParameters() == null) return "No data"; 632 StringBuilder result = new StringBuilder(10000); 633 result.append(pairingParameters.length + " base pairs\n"); 634 result.append("bp: buckle propeller opening shear stretch stagger tilt roll twist shift slide rise\n"); 635 for (int i = 0; i < pairingParameters.length; i++) { 636 result.append(pairingNames.get(i)+": "); 637 for (int j = 0; j < 6; j++) 638 result.append(String.format("%5.4f", pairingParameters[i][j]) + " "); 639 for (int j = 0; j < 6; j++) 640 result.append(String.format("%5.4f", stepParameters[i][j]) + " "); 641 result.append("\n"); 642 } 643 return result.toString(); 644 } 645 646 647 // The following methods are just helper classes for the rapid analyze of base-pair geometry. 648 /** 649 * This method calculates pairing and step parameters from 4x4 transformation matrices (used internally) 650 * that comes out as a Matrix4d. 651 * @param input the 4x4 matrix representing the transformation from strand II -> strand I or pair i to pair i+1 652 * @return Six parameters as double[6] 653 */ 654 public static double[] calculateTp(Matrix4d input) { 655 656 double[][] A = new double[4][4]; 657 for (int i = 0; i < 4; i++) for (int j = 0; j < 4; j++) { 658 A[i][j] = input.getElement(i, j); 659 } 660 double[] M = new double[6]; 661 662 double cosgamma, gamma, phi, omega, sgcp, omega2_minus_phi, 663 sm, cm, sp, cp, sg, cg; 664 665 cosgamma = A[2][2]; 666 if (cosgamma > 1.0) cosgamma = 1.0; 667 else if (cosgamma < -1.0) cosgamma = -1.0; 668 669 gamma = acos(cosgamma); 670 671 sgcp = A[1][1]*A[0][2]-A[0][1]*A[1][2]; 672 673 if (gamma == 0.0) omega = -atan2(A[0][1],A[1][1]); 674 else omega = atan2(A[2][1]*A[0][2]+sgcp*A[1][2],sgcp*A[0][2]-A[2][1]*A[1][2]); 675 676 omega2_minus_phi = atan2(A[1][2],A[0][2]); 677 678 phi = omega/2.0 - omega2_minus_phi; 679 680 M[0] = gamma*sin(phi)*180.0/PI; 681 M[1] = gamma*cos(phi)*180.0/PI; 682 M[2] = omega*180.0/PI; 683 684 sm = sin(omega/2.0-phi); 685 cm = cos(omega/2.0-phi); 686 sp = sin(phi); 687 cp = cos(phi); 688 sg = sin(gamma/2.0); 689 cg = cos(gamma/2.0); 690 691 M[3] = (cm*cg*cp-sm*sp)*A[0][3]+(sm*cg*cp+cm*sp)*A[1][3]-sg*cp*A[2][3]; 692 M[4] = (-cm*cg*sp-sm*cp)*A[0][3]+(-sm*cg*sp+cm*cp)*A[1][3]+sg*sp*A[2][3]; 693 M[5] = (cm*sg)*A[0][3]+(sm*sg)*A[1][3]+cg*A[2][3]; 694 695 return M; 696 697 } 698 699 /** 700 * This method returns the complement of a base. (used internally) 701 * @param base The letter of the base 702 * @param RNA Whether it is RNA (if false, it is DNA) 703 * @return The character representing the complement of the base 704 */ 705 protected static char complementBase(char base, boolean RNA) { 706 if (base == 'A' && RNA) return 'U'; 707 if (base == 'A') return 'T'; 708 if (base == 'T' && !RNA) return 'A'; 709 if (base == 'U' && RNA) return 'A'; 710 if (base == 'C') return 'G'; 711 if (base == 'G') return 'C'; 712 return ' '; 713 } 714 715 /** 716 * Simple helper method for quickly checking the complement of a sequence, see also DNASequence nad RNASequence classes 717 * for more extensively useful functions not used in this narrow context of structural biology of base pairs. (Used internally) 718 */ 719 private static String complement(String sequence, boolean RNA) { 720 String result = ""; 721 for (int i = sequence.length() - 1; i >= 0; i--) { 722 result += complementBase(sequence.charAt(i), RNA); 723 } 724 return result; 725 } 726 727 /** 728 * This does a 3D Vector cross product of two vectors as double arrays. (used internally) 729 * 730 * @param a An array of length 3 or 4 (4th component is ignored) 731 * @param b An array of length 3 or 4 (4th component is ignored) 732 * @return The cross product of the vectors (just the first three components 733 */ 734 @SuppressWarnings("unused") 735 private static double[] cross(double[] a, double[] b) { 736 assert a.length >= 3 && b.length >= 3; 737 double[] result = new double[4]; 738 result[0] = a[1]*b[2]-a[2]*b[1]; 739 result[1] = a[2]*b[0]-a[0]*b[2]; 740 result[2] = a[0]*b[1]-a[1]*b[0]; 741 return result; 742 } 743 744 /** 745 * This method removes any component of vector a that is along vector b. (used internally) 746 * @param a The array (vector) to remove component from 747 * @param b The component array (vector) to remove from the first 748 * @return The original array a with any component along b removed from it. 749 */ 750 private static double[] removeComponent(double[] a, double[] b) { 751 double dot = 0; 752 double[] result = new double[4]; 753 for (int i = 0; i < 3; i++) { 754 dot += a[i]*b[i]; 755 } 756 for (int i = 0; i < 3; i++) { 757 result[i] = a[i]-dot*b[i]; 758 } 759 return result; 760 761 } 762 763 /** 764 * This method finds the longest common substring between two strings. (used internally) 765 * @param s1 The first string 766 * @param s2 The second string 767 * @return The substring itself 768 */ 769 private static String longestCommonSubstring(String s1, String s2) { 770 int start = 0; 771 int max = 0; 772 for (int i = 0; i < s1.length(); i++) { 773 for (int j = 0; j < s2.length(); j++) { 774 int x = 0; 775 while (s1.charAt(i + x) == s2.charAt(j + x)) { 776 x++; 777 if (((i + x) >= s1.length()) || ((j + x) >= s2.length())) break; 778 } 779 if (x > max) { 780 max = x; 781 start = i; 782 } 783 } 784 } 785 return s1.substring(start, (start + max)); 786 } 787 788 /** 789 * This returns true if a is the complement of b, false otherwise. (used internally) 790 * @param a First letter 791 * @param b Potential matching letter 792 * @param RNA Whether it is RNA (if false, DNA rules are used) 793 * @return True if the bases are complementary. 794 */ 795 protected static boolean match(char a, char b, boolean RNA) { 796 if (a == 'A' && b == 'T' && !RNA) return true; 797 if (a == 'A' && b == 'U' && RNA) return true; 798 if (a == 'T' && b == 'A' && !RNA) return true; 799 if (a == 'U' && b == 'A' && RNA) return true; 800 if (a == 'G' && b == 'C') return true; 801 if (a == 'C' && b == 'G') return true; 802 return false; 803 } 804}