001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.text.DateFormat; 024import java.text.SimpleDateFormat; 025import java.util.ArrayList; 026import java.util.Date; 027import java.util.HashSet; 028import java.util.LinkedHashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033 034import javax.vecmath.Matrix4d; 035 036import org.biojava.nbio.structure.AminoAcid; 037import org.biojava.nbio.structure.AminoAcidImpl; 038import org.biojava.nbio.structure.Atom; 039import org.biojava.nbio.structure.Bond; 040import org.biojava.nbio.structure.Chain; 041import org.biojava.nbio.structure.ExperimentalTechnique; 042import org.biojava.nbio.structure.Group; 043import org.biojava.nbio.structure.GroupType; 044import org.biojava.nbio.structure.NucleotideImpl; 045import org.biojava.nbio.structure.PDBCrystallographicInfo; 046import org.biojava.nbio.structure.Structure; 047import org.biojava.nbio.structure.StructureException; 048import org.biojava.nbio.structure.StructureIO; 049import org.biojava.nbio.structure.align.util.AtomCache; 050import org.biojava.nbio.structure.io.FileParsingParameters; 051import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 052import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; 053import org.biojava.nbio.structure.io.mmcif.chem.ChemCompTools; 054import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 055import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 056import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 057import org.biojava.nbio.structure.secstruc.DSSPParser; 058import org.biojava.nbio.structure.secstruc.SecStrucCalc; 059import org.biojava.nbio.structure.secstruc.SecStrucState; 060import org.biojava.nbio.structure.secstruc.SecStrucType; 061import org.biojava.nbio.structure.xtal.CrystalCell; 062import org.biojava.nbio.structure.xtal.SpaceGroup; 063import org.rcsb.mmtf.dataholders.DsspType; 064import org.rcsb.mmtf.utils.CodecUtils; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068/** 069 * A utils class of functions needed for Biojava to read and write to mmtf. 070 * @author Anthony Bradley 071 * 072 */ 073public class MmtfUtils { 074 075 private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class); 076 077 /** 078 * Set up the configuration parameters for BioJava. 079 */ 080 public static AtomCache setUpBioJava() { 081 // Set up the atom cache etc 082 AtomCache cache = new AtomCache(); 083 cache.setUseMmCif(true); 084 FileParsingParameters params = cache.getFileParsingParams(); 085 params.setCreateAtomBonds(true); 086 params.setAlignSeqRes(true); 087 params.setParseBioAssembly(true); 088 DownloadChemCompProvider cc = new DownloadChemCompProvider(); 089 ChemCompGroupFactory.setChemCompProvider(cc); 090 cc.checkDoFirstInstall(); 091 cache.setFileParsingParams(params); 092 StructureIO.setAtomCache(cache); 093 return cache; 094 } 095 096 /** 097 * Set up the configuration parameters for BioJava. 098 * @param extraUrl the string describing the URL (or file path) from which 099 * to get missing CCD entries. 100 */ 101 public static AtomCache setUpBioJava(String extraUrl) { 102 // Set up the atom cache etc 103 AtomCache cache = new AtomCache(); 104 cache.setUseMmCif(true); 105 FileParsingParameters params = cache.getFileParsingParams(); 106 params.setCreateAtomBonds(true); 107 params.setAlignSeqRes(true); 108 params.setParseBioAssembly(true); 109 DownloadChemCompProvider.serverBaseUrl = extraUrl; 110 DownloadChemCompProvider.useDefaultUrlLayout = false; 111 DownloadChemCompProvider cc = new DownloadChemCompProvider(); 112 ChemCompGroupFactory.setChemCompProvider(cc); 113 cc.checkDoFirstInstall(); 114 cache.setFileParsingParams(params); 115 StructureIO.setAtomCache(cache); 116 return cache; 117 } 118 119 120 /** 121 * This sets all microheterogeneous groups 122 * (previously alternate location groups) as separate groups. 123 * This is required because mmtf groups cannot have multiple HET codes. 124 * @param bioJavaStruct 125 */ 126 public static void fixMicroheterogenity(Structure bioJavaStruct) { 127 // Loop through the models 128 for (int i=0; i<bioJavaStruct.nrModels(); i++){ 129 // Then the chains 130 List<Chain> chains = bioJavaStruct.getModel(i); 131 for (Chain c : chains) { 132 // Build a new list of groups 133 List<Group> outGroups = new ArrayList<>(); 134 for (Group g : c.getAtomGroups()) { 135 List<Group> removeList = new ArrayList<>(); 136 for (Group altLoc : g.getAltLocs()) { 137 // Check if they are not equal -> microheterogenity 138 if(! altLoc.getPDBName().equals(g.getPDBName())) { 139 // Now add this group to the main list 140 removeList.add(altLoc); 141 } 142 } 143 // Add this group 144 outGroups.add(g); 145 // Remove any microhet alt locs 146 g.getAltLocs().removeAll(removeList); 147 // Add these microhet alt locs 148 outGroups.addAll(removeList); 149 } 150 c.setAtomGroups(outGroups); 151 } 152 } 153 } 154 155 156 /** 157 * Generate the secondary structure for a Biojava structure object. 158 * @param bioJavaStruct the Biojava structure for which it is to be calculate. 159 */ 160 public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) { 161 SecStrucCalc ssp = new SecStrucCalc(); 162 163 try{ 164 ssp.calculate(bioJavaStruct, true); 165 } 166 catch(StructureException e) { 167 LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage()); 168 169 try { 170 DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result 171 } catch(Exception bige){ 172 LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage()); 173 } 174 } 175 } 176 177 /** 178 * Get the string representation of a space group. 179 * @param spaceGroup the input SpaceGroup object 180 * @return the space group as a string. 181 */ 182 public static String getSpaceGroupAsString(SpaceGroup spaceGroup) { 183 if(spaceGroup==null){ 184 return "NA"; 185 } 186 else{ 187 return spaceGroup.getShortSymbol(); 188 } 189 } 190 191 /** 192 * Get the length six array of the unit cell information. 193 * @param xtalInfo the input PDBCrystallographicInfo object 194 * @return the length six float array 195 */ 196 public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) { 197 CrystalCell xtalCell = xtalInfo.getCrystalCell(); 198 if(xtalCell==null){ 199 return null; 200 }else{ 201 float[] inputUnitCell = new float[6]; 202 inputUnitCell[0] = (float) xtalCell.getA(); 203 inputUnitCell[1] = (float) xtalCell.getB(); 204 inputUnitCell[2] = (float) xtalCell.getC(); 205 inputUnitCell[3] = (float) xtalCell.getAlpha(); 206 inputUnitCell[4] = (float) xtalCell.getBeta(); 207 inputUnitCell[5] = (float) xtalCell.getGamma(); 208 return inputUnitCell; 209 } 210 } 211 212 /** 213 * Converts the set of experimental techniques to an array of strings. 214 * @param experimentalTechniques the input set of experimental techniques 215 * @return the array of strings describing the methods used. 216 */ 217 public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) { 218 if(experimentalTechniques==null){ 219 return new String[0]; 220 } 221 String[] outArray = new String[experimentalTechniques.size()]; 222 int index = 0; 223 for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) { 224 outArray[index] = experimentalTechnique.getName(); 225 index++; 226 } 227 return outArray; 228 } 229 230 /** 231 * Covert a Date object to ISO time format. 232 * @param inputDate The input date object 233 * @return the time in ISO time format 234 */ 235 public static String dateToIsoString(Date inputDate) { 236 DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd"); 237 return dateStringFormat.format(inputDate); 238 } 239 240 /** 241 * Convert a bioassembly information into a map of transform, chainindices it relates to. 242 * @param bioassemblyInfo the bioassembly info object for this structure 243 * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to. 244 * @return the bioassembly information (as primitive types). 245 */ 246 public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) { 247 Map<Matrix4d, List<Integer>> matMap = new LinkedHashMap<>(); 248 List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms(); 249 for (BiologicalAssemblyTransformation transformation : transforms) { 250 Matrix4d transMatrix = transformation.getTransformationMatrix(); 251 String transChainId = transformation.getChainId(); 252 if (!chainIdToIndexMap.containsKey(transChainId)){ 253 continue; 254 } 255 int chainIndex = chainIdToIndexMap.get(transformation.getChainId()); 256 if(matMap.containsKey(transMatrix)){ 257 matMap.get(transMatrix).add(chainIndex); 258 } 259 else{ 260 List<Integer> chainIdList = new ArrayList<>(); 261 chainIdList.add(chainIndex); 262 matMap.put(transMatrix, chainIdList); 263 } 264 } 265 266 Map<double[], int[]> outMap = new LinkedHashMap<>(); 267 for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) { 268 outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue())); 269 } 270 return outMap; 271 } 272 273 /** 274 * Convert a four-d matrix to a double array. Row-packed. 275 * @param transformationMatrix the input matrix4d object 276 * @return the double array (16 long). 277 */ 278 public static double[] convertToDoubleArray(Matrix4d transformationMatrix) { 279 // Initialise the output array 280 double[] outArray = new double[16]; 281 // Iterate over the matrix 282 for(int i=0; i<4; i++){ 283 for(int j=0; j<4; j++){ 284 // Now set this element 285 outArray[i*4+j] = transformationMatrix.getElement(i,j); 286 } 287 } 288 return outArray; 289 } 290 291 /** 292 * Count the total number of groups in the structure 293 * @param structure the input structure 294 * @return the total number of groups 295 */ 296 public static int getNumGroups(Structure structure) { 297 int count = 0; 298 for(int i=0; i<structure.nrModels(); i++) { 299 for(Chain chain : structure.getChains(i)){ 300 count+= chain.getAtomGroups().size(); 301 } 302 } 303 return count; 304 } 305 306 307 /** 308 * Get a list of atoms for a group. Only add each atom once. 309 * @param inputGroup the Biojava Group to consider 310 * @return the atoms for the input Biojava Group 311 */ 312 public static List<Atom> getAtomsForGroup(Group inputGroup) { 313 Set<Atom> uniqueAtoms = new HashSet<Atom>(); 314 List<Atom> theseAtoms = new ArrayList<Atom>(); 315 for(Atom a: inputGroup.getAtoms()){ 316 theseAtoms.add(a); 317 uniqueAtoms.add(a); 318 } 319 List<Group> altLocs = inputGroup.getAltLocs(); 320 for(Group thisG: altLocs){ 321 for(Atom a: thisG.getAtoms()){ 322 if(uniqueAtoms.contains(a)){ 323 continue; 324 } 325 theseAtoms.add(a); 326 } 327 } 328 return theseAtoms; 329 } 330 331 /** 332 * Find the number of bonds in a group 333 * @param atomsInGroup the list of atoms in the group 334 * @return the number of atoms in the group 335 */ 336 public static int getNumBondsInGroup(List<Atom> atomsInGroup) { 337 int bondCounter = 0; 338 for(Atom atom : atomsInGroup) { 339 if(atom.getBonds()==null){ 340 continue; 341 } 342 for(Bond bond : atom.getBonds()) { 343 // Now set the bonding information. 344 Atom other = bond.getOther(atom); 345 // If both atoms are in the group 346 if (atomsInGroup.indexOf(other)!=-1){ 347 Integer firstBondIndex = atomsInGroup.indexOf(atom); 348 Integer secondBondIndex = atomsInGroup.indexOf(other); 349 // Don't add the same bond twice 350 if (firstBondIndex<secondBondIndex){ 351 bondCounter++; 352 } 353 } 354 } 355 } 356 return bondCounter; 357 } 358 359 /** 360 * Get the secondary structure as defined by DSSP. 361 * @param group the input group to be calculated 362 * @return the integer index of the group type. 363 */ 364 public static int getSecStructType(Group group) { 365 SecStrucState props = (SecStrucState) group.getProperty("secstruc"); 366 if(props==null){ 367 return DsspType.NULL_ENTRY.getDsspIndex(); 368 } 369 return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex(); 370 } 371 372 /** 373 * Get the secondary structure as defined by DSSP. 374 * @param group the input group to be calculated 375 * @param dsspIndex integer index of the group type. 376 */ 377 public static void setSecStructType(Group group, int dsspIndex) { 378 SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex); 379 SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType); 380 if(secStrucType!=null){ 381 group.setProperty("secstruc", secStrucState); 382 } 383 } 384 385 386 /** 387 * Set the DSSP type based on a numerical index. 388 * @param dsspIndex the integer index of the type to set 389 * @return the instance of the SecStrucType object holding this secondary 390 * structure type. 391 */ 392 public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) { 393 String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType(); 394 for(SecStrucType secStrucType : SecStrucType.values()) 395 { 396 if(dsspType.equals(secStrucType.name)) 397 { 398 return secStrucType; 399 } 400 } 401 // Return a null entry. 402 return null; 403 } 404 405 /** 406 * Get summary information for the structure. 407 * @param structure the structure for which to get the information. 408 */ 409 public static MmtfSummaryDataBean getStructureInfo(Structure structure) { 410 MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean(); 411 // Get all the atoms 412 List<Atom> theseAtoms = new ArrayList<>(); 413 List<Chain> allChains = new ArrayList<>(); 414 Map<String, Integer> chainIdToIndexMap = new LinkedHashMap<>(); 415 int chainCounter = 0; 416 int bondCount = 0; 417 mmtfSummaryDataBean.setAllAtoms(theseAtoms); 418 mmtfSummaryDataBean.setAllChains(allChains); 419 mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap); 420 for (int i=0; i<structure.nrModels(); i++){ 421 List<Chain> chains = structure.getModel(i); 422 allChains.addAll(chains); 423 for (Chain chain : chains) { 424 String idOne = chain.getId(); 425 if (!chainIdToIndexMap.containsKey(idOne)) { 426 chainIdToIndexMap.put(idOne, chainCounter); 427 } 428 chainCounter++; 429 for (Group g : chain.getAtomGroups()) { 430 for(Atom atom: getAtomsForGroup(g)){ 431 theseAtoms.add(atom); 432 // If both atoms are in the group 433 if (atom.getBonds()!=null){ 434 bondCount+=atom.getBonds().size(); 435 } 436 } 437 } 438 } 439 } 440 // Assumes all bonds are referenced twice 441 mmtfSummaryDataBean.setNumBonds(bondCount/2); 442 return mmtfSummaryDataBean; 443 444 } 445 446 /** 447 * Get a list of N 4*4 matrices from a single list of doubles of length 16*N. 448 * @param ncsOperMatrixList the input list of doubles 449 * @return the list of 4*4 matrics 450 */ 451 public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) { 452 if(ncsOperMatrixList==null){ 453 return null; 454 } 455 int numMats = ncsOperMatrixList.length; 456 if(numMats==0){ 457 return null; 458 } 459 if(numMats==1 && ncsOperMatrixList[0].length==0){ 460 return null; 461 } 462 Matrix4d[] outList = new Matrix4d[numMats]; 463 for(int i=0; i<numMats; i++){ 464 outList[i] = new Matrix4d(ncsOperMatrixList[i]); 465 } 466 return outList; 467 } 468 469 /** 470 * Get a list of length N*16 of a list of Matrix4d*N. 471 * @param ncsOperators the {@link Matrix4d} list 472 * @return the list of length N*16 of the list of matrices 473 */ 474 public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) { 475 if(ncsOperators==null){ 476 return new double[0][0]; 477 } 478 double[][] outList = new double[ncsOperators.length][16]; 479 for(int i=0; i<ncsOperators.length;i++){ 480 outList[i] = convertToDoubleArray(ncsOperators[i]); 481 } 482 return outList; 483 } 484 485 /** 486 * Insert the group in the given position in the sequence. 487 * @param chain the chain to add the seq res group to 488 * @param group the group to add 489 * @param sequenceIndexId the index to add it in 490 */ 491 public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) { 492 List<Group> seqResGroups = chain.getSeqResGroups(); 493 addGroupAtId(seqResGroups, group, sequenceIndexId); 494 } 495 496 /** 497 * Add the missing groups to the SeqResGroups. 498 * @param modelChain the chain to add the information for 499 * @param sequence the sequence of the construct 500 */ 501 public static void addSeqRes(Chain modelChain, String sequence) { 502 503 List<Group> seqResGroups = modelChain.getSeqResGroups(); 504 GroupType chainType = getChainType(modelChain.getAtomGroups()); 505 506 for(int i=0; i<sequence.length(); i++){ 507 508 char singleLetterCode = sequence.charAt(i); 509 Group group = null; 510 if (seqResGroups.size() > i) { 511 group=seqResGroups.get(i); 512 } 513 if(group!=null){ 514 continue; 515 } 516 517 group = getSeqResGroup(singleLetterCode, chainType); 518 addGroupAtId(seqResGroups, group, i); 519 } 520 } 521 522 private static GroupType getChainType(List<Group> groups) { 523 for(Group group : groups) { 524 if(group!=null && group.getType()!=GroupType.HETATM){ 525 return group.getType(); 526 } 527 } 528 return GroupType.HETATM; 529 } 530 531 private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) { 532 while(seqResGroups.size()<=sequenceIndexId){ 533 seqResGroups.add(null); 534 } 535 if(sequenceIndexId>=0){ 536 seqResGroups.set(sequenceIndexId, group); 537 } 538 } 539 540 private static Group getSeqResGroup(char singleLetterCode, GroupType type) { 541 542 if(type==GroupType.AMINOACID){ 543 String threeLetter = ChemCompTools.getAminoThreeLetter(singleLetterCode); 544 if (threeLetter == null) return null; 545 ChemComp chemComp = ChemCompGroupFactory.getChemComp(threeLetter); 546 547 AminoAcidImpl a = new AminoAcidImpl(); 548 a.setRecordType(AminoAcid.SEQRESRECORD); 549 a.setAminoType(singleLetterCode); 550 a.setPDBName(threeLetter); 551 a.setChemComp(chemComp); 552 return a; 553 554 } else if (type==GroupType.NUCLEOTIDE) { 555 String twoLetter = ChemCompTools.getDNATwoLetter(singleLetterCode); 556 if (twoLetter == null) return null; 557 ChemComp chemComp = ChemCompGroupFactory.getChemComp(twoLetter); 558 559 NucleotideImpl n = new NucleotideImpl(); 560 n.setPDBName(twoLetter); 561 n.setChemComp(chemComp); 562 return n; 563 } 564 else{ 565 return null; 566 } 567 } 568}