001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.text.DateFormat; 024import java.text.SimpleDateFormat; 025import java.util.ArrayList; 026import java.util.Date; 027import java.util.HashMap; 028import java.util.HashSet; 029import java.util.List; 030import java.util.Map; 031import java.util.Map.Entry; 032import java.util.Set; 033 034import javax.vecmath.Matrix4d; 035 036import org.biojava.nbio.structure.AminoAcid; 037import org.biojava.nbio.structure.AminoAcidImpl; 038import org.biojava.nbio.structure.Atom; 039import org.biojava.nbio.structure.Bond; 040import org.biojava.nbio.structure.Chain; 041import org.biojava.nbio.structure.ExperimentalTechnique; 042import org.biojava.nbio.structure.Group; 043import org.biojava.nbio.structure.GroupType; 044import org.biojava.nbio.structure.NucleotideImpl; 045import org.biojava.nbio.structure.PDBCrystallographicInfo; 046import org.biojava.nbio.structure.Structure; 047import org.biojava.nbio.structure.StructureException; 048import org.biojava.nbio.structure.StructureIO; 049import org.biojava.nbio.structure.align.util.AtomCache; 050import org.biojava.nbio.structure.io.FileParsingParameters; 051import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; 052import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; 053import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 054import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 055import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 056import org.biojava.nbio.structure.secstruc.DSSPParser; 057import org.biojava.nbio.structure.secstruc.SecStrucCalc; 058import org.biojava.nbio.structure.secstruc.SecStrucState; 059import org.biojava.nbio.structure.secstruc.SecStrucType; 060import org.biojava.nbio.structure.xtal.CrystalCell; 061import org.biojava.nbio.structure.xtal.SpaceGroup; 062import org.rcsb.mmtf.dataholders.DsspType; 063import org.rcsb.mmtf.utils.CodecUtils; 064import org.slf4j.Logger; 065import org.slf4j.LoggerFactory; 066 067/** 068 * A utils class of functions needed for Biojava to read and write to mmtf. 069 * @author Anthony Bradley 070 * 071 */ 072public class MmtfUtils { 073 074 private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class); 075 076 /** 077 * Set up the configuration parameters for BioJava. 078 */ 079 public static AtomCache setUpBioJava() { 080 // Set up the atom cache etc 081 AtomCache cache = new AtomCache(); 082 cache.setUseMmCif(true); 083 FileParsingParameters params = cache.getFileParsingParams(); 084 params.setCreateAtomBonds(true); 085 params.setAlignSeqRes(true); 086 params.setParseBioAssembly(true); 087 DownloadChemCompProvider cc = new DownloadChemCompProvider(); 088 ChemCompGroupFactory.setChemCompProvider(cc); 089 cc.checkDoFirstInstall(); 090 cache.setFileParsingParams(params); 091 StructureIO.setAtomCache(cache); 092 return cache; 093 } 094 095 /** 096 * Set up the configuration parameters for BioJava. 097 * @param extraUrl the string describing the URL (or file path) from which 098 * to get missing CCD entries. 099 */ 100 public static AtomCache setUpBioJava(String extraUrl) { 101 // Set up the atom cache etc 102 AtomCache cache = new AtomCache(); 103 cache.setUseMmCif(true); 104 FileParsingParameters params = cache.getFileParsingParams(); 105 params.setCreateAtomBonds(true); 106 params.setAlignSeqRes(true); 107 params.setParseBioAssembly(true); 108 DownloadChemCompProvider.serverBaseUrl = extraUrl; 109 DownloadChemCompProvider.useDefaultUrlLayout = false; 110 DownloadChemCompProvider cc = new DownloadChemCompProvider(); 111 ChemCompGroupFactory.setChemCompProvider(cc); 112 cc.checkDoFirstInstall(); 113 cache.setFileParsingParams(params); 114 StructureIO.setAtomCache(cache); 115 return cache; 116 } 117 118 119 /** 120 * This sets all microheterogeneous groups 121 * (previously alternate location groups) as separate groups. 122 * This is required because mmtf groups cannot have multiple HET codes. 123 * @param bioJavaStruct 124 */ 125 public static void fixMicroheterogenity(Structure bioJavaStruct) { 126 // Loop through the models 127 for (int i=0; i<bioJavaStruct.nrModels(); i++){ 128 // Then the chains 129 List<Chain> chains = bioJavaStruct.getModel(i); 130 for (Chain c : chains) { 131 // Build a new list of groups 132 List<Group> outGroups = new ArrayList<>(); 133 for (Group g : c.getAtomGroups()) { 134 List<Group> removeList = new ArrayList<>(); 135 for (Group altLoc : g.getAltLocs()) { 136 // Check if they are not equal -> microheterogenity 137 if(! altLoc.getPDBName().equals(g.getPDBName())) { 138 // Now add this group to the main list 139 removeList.add(altLoc); 140 } 141 } 142 // Add this group 143 outGroups.add(g); 144 // Remove any microhet alt locs 145 g.getAltLocs().removeAll(removeList); 146 // Add these microhet alt locs 147 outGroups.addAll(removeList); 148 } 149 c.setAtomGroups(outGroups); 150 } 151 } 152 } 153 154 155 /** 156 * Generate the secondary structure for a Biojava structure object. 157 * @param bioJavaStruct the Biojava structure for which it is to be calculate. 158 */ 159 public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) { 160 SecStrucCalc ssp = new SecStrucCalc(); 161 162 try{ 163 ssp.calculate(bioJavaStruct, true); 164 } 165 catch(StructureException e) { 166 LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage()); 167 168 try { 169 DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result 170 } catch(Exception bige){ 171 LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage()); 172 } 173 } 174 } 175 176 /** 177 * Get the string representation of a space group. 178 * @param spaceGroup the input SpaceGroup object 179 * @return the space group as a string. 180 */ 181 public static String getSpaceGroupAsString(SpaceGroup spaceGroup) { 182 if(spaceGroup==null){ 183 return "NA"; 184 } 185 else{ 186 return spaceGroup.getShortSymbol(); 187 } 188 } 189 190 /** 191 * Get the length six array of the unit cell information. 192 * @param xtalInfo the input PDBCrystallographicInfo object 193 * @return the length six float array 194 */ 195 public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) { 196 CrystalCell xtalCell = xtalInfo.getCrystalCell(); 197 if(xtalCell==null){ 198 return null; 199 }else{ 200 float[] inputUnitCell = new float[6]; 201 inputUnitCell[0] = (float) xtalCell.getA(); 202 inputUnitCell[1] = (float) xtalCell.getB(); 203 inputUnitCell[2] = (float) xtalCell.getC(); 204 inputUnitCell[3] = (float) xtalCell.getAlpha(); 205 inputUnitCell[4] = (float) xtalCell.getBeta(); 206 inputUnitCell[5] = (float) xtalCell.getGamma(); 207 return inputUnitCell; 208 } 209 } 210 211 /** 212 * Converts the set of experimental techniques to an array of strings. 213 * @param experimentalTechniques the input set of experimental techniques 214 * @return the array of strings describing the methods used. 215 */ 216 public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) { 217 if(experimentalTechniques==null){ 218 return new String[0]; 219 } 220 String[] outArray = new String[experimentalTechniques.size()]; 221 int index = 0; 222 for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) { 223 outArray[index] = experimentalTechnique.getName(); 224 index++; 225 } 226 return outArray; 227 } 228 229 /** 230 * Covert a Date object to ISO time format. 231 * @param inputDate The input date object 232 * @return the time in ISO time format 233 */ 234 public static String dateToIsoString(Date inputDate) { 235 DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd"); 236 return dateStringFormat.format(inputDate); 237 } 238 239 /** 240 * Convert a bioassembly information into a map of transform, chainindices it relates to. 241 * @param bioassemblyInfo the bioassembly info object for this structure 242 * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to. 243 * @return the bioassembly information (as primitive types). 244 */ 245 public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) { 246 Map<Matrix4d, List<Integer>> matMap = new HashMap<>(); 247 List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms(); 248 for (BiologicalAssemblyTransformation transformation : transforms) { 249 Matrix4d transMatrix = transformation.getTransformationMatrix(); 250 String transChainId = transformation.getChainId(); 251 if (!chainIdToIndexMap.containsKey(transChainId)){ 252 continue; 253 } 254 int chainIndex = chainIdToIndexMap.get(transformation.getChainId()); 255 if(matMap.containsKey(transMatrix)){ 256 matMap.get(transMatrix).add(chainIndex); 257 } 258 else{ 259 List<Integer> chainIdList = new ArrayList<>(); 260 chainIdList.add(chainIndex); 261 matMap.put(transMatrix, chainIdList); 262 } 263 } 264 Map<double[], int[]> outMap = new HashMap<>(); 265 for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) { 266 outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue())); 267 } 268 return outMap; 269 } 270 271 /** 272 * Convert a four-d matrix to a double array. Row-packed. 273 * @param transformationMatrix the input matrix4d object 274 * @return the double array (16 long). 275 */ 276 public static double[] convertToDoubleArray(Matrix4d transformationMatrix) { 277 // Initialise the output array 278 double[] outArray = new double[16]; 279 // Iterate over the matrix 280 for(int i=0; i<4; i++){ 281 for(int j=0; j<4; j++){ 282 // Now set this element 283 outArray[i*4+j] = transformationMatrix.getElement(i,j); 284 } 285 } 286 return outArray; 287 } 288 289 /** 290 * Count the total number of groups in the structure 291 * @param structure the input structure 292 * @return the total number of groups 293 */ 294 public static int getNumGroups(Structure structure) { 295 int count = 0; 296 for(int i=0; i<structure.nrModels(); i++) { 297 for(Chain chain : structure.getChains(i)){ 298 count+= chain.getAtomGroups().size(); 299 } 300 } 301 return count; 302 } 303 304 305 /** 306 * Get a list of atoms for a group. Only add each atom once. 307 * @param inputGroup the Biojava Group to consider 308 * @return the atoms for the input Biojava Group 309 */ 310 public static List<Atom> getAtomsForGroup(Group inputGroup) { 311 Set<Atom> uniqueAtoms = new HashSet<Atom>(); 312 List<Atom> theseAtoms = new ArrayList<Atom>(); 313 for(Atom a: inputGroup.getAtoms()){ 314 theseAtoms.add(a); 315 uniqueAtoms.add(a); 316 } 317 List<Group> altLocs = inputGroup.getAltLocs(); 318 for(Group thisG: altLocs){ 319 for(Atom a: thisG.getAtoms()){ 320 if(uniqueAtoms.contains(a)){ 321 continue; 322 } 323 theseAtoms.add(a); 324 } 325 } 326 return theseAtoms; 327 } 328 329 /** 330 * Find the number of bonds in a group 331 * @param atomsInGroup the list of atoms in the group 332 * @return the number of atoms in the group 333 */ 334 public static int getNumBondsInGroup(List<Atom> atomsInGroup) { 335 int bondCounter = 0; 336 for(Atom atom : atomsInGroup) { 337 if(atom.getBonds()==null){ 338 continue; 339 } 340 for(Bond bond : atom.getBonds()) { 341 // Now set the bonding information. 342 Atom other = bond.getOther(atom); 343 // If both atoms are in the group 344 if (atomsInGroup.indexOf(other)!=-1){ 345 Integer firstBondIndex = atomsInGroup.indexOf(atom); 346 Integer secondBondIndex = atomsInGroup.indexOf(other); 347 // Don't add the same bond twice 348 if (firstBondIndex<secondBondIndex){ 349 bondCounter++; 350 } 351 } 352 } 353 } 354 return bondCounter; 355 } 356 357 /** 358 * Get the secondary structure as defined by DSSP. 359 * @param group the input group to be calculated 360 * @return the integer index of the group type. 361 */ 362 public static int getSecStructType(Group group) { 363 SecStrucState props = (SecStrucState) group.getProperty("secstruc"); 364 if(props==null){ 365 return DsspType.NULL_ENTRY.getDsspIndex(); 366 } 367 return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex(); 368 } 369 370 /** 371 * Get the secondary structure as defined by DSSP. 372 * @param group the input group to be calculated 373 * @param the integer index of the group type. 374 */ 375 public static void setSecStructType(Group group, int dsspIndex) { 376 SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex); 377 SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType); 378 if(secStrucType!=null){ 379 group.setProperty("secstruc", secStrucState); 380 } 381 else{ 382 } 383 } 384 385 386 /** 387 * Set the DSSP type based on a numerical index. 388 * @param dsspIndex the integer index of the type to set 389 * @return the instance of the SecStrucType object holding this secondary 390 * structure type. 391 */ 392 public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) { 393 String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType(); 394 for(SecStrucType secStrucType : SecStrucType.values()) 395 { 396 if(dsspType==secStrucType.name) 397 { 398 return secStrucType; 399 } 400 } 401 // Return a null entry. 402 return null; 403 } 404 405 /** 406 * Get summary information for the structure. 407 * @param structure the structure for which to get the information. 408 */ 409 public static MmtfSummaryDataBean getStructureInfo(Structure structure) { 410 MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean(); 411 // Get all the atoms 412 List<Atom> theseAtoms = new ArrayList<>(); 413 List<Chain> allChains = new ArrayList<>(); 414 Map<String, Integer> chainIdToIndexMap = new HashMap<>(); 415 int chainCounter = 0; 416 int bondCount = 0; 417 mmtfSummaryDataBean.setAllAtoms(theseAtoms); 418 mmtfSummaryDataBean.setAllChains(allChains); 419 mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap); 420 for (int i=0; i<structure.nrModels(); i++){ 421 List<Chain> chains = structure.getModel(i); 422 allChains.addAll(chains); 423 for (Chain chain : chains) { 424 String idOne = chain.getId(); 425 if (!chainIdToIndexMap.containsKey(idOne)) { 426 chainIdToIndexMap.put(idOne, chainCounter); 427 } 428 chainCounter++; 429 for (Group g : chain.getAtomGroups()) { 430 for(Atom atom: getAtomsForGroup(g)){ 431 theseAtoms.add(atom); 432 // If both atoms are in the group 433 if (atom.getBonds()!=null){ 434 bondCount+=atom.getBonds().size(); 435 } 436 } 437 } 438 } 439 } 440 // Assumes all bonds are referenced twice 441 mmtfSummaryDataBean.setNumBonds(bondCount/2); 442 return mmtfSummaryDataBean; 443 444 } 445 446 /** 447 * Get a list of N 4*4 matrices from a single list of doubles of length 16*N. 448 * @param ncsOperMatrixList the input list of doubles 449 * @return the list of 4*4 matrics 450 */ 451 public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) { 452 if(ncsOperMatrixList==null){ 453 return null; 454 } 455 int numMats = ncsOperMatrixList.length; 456 if(numMats==0){ 457 return null; 458 } 459 if(numMats==1 && ncsOperMatrixList[0].length==0){ 460 return null; 461 } 462 Matrix4d[] outList = new Matrix4d[numMats]; 463 for(int i=0; i<numMats; i++){ 464 outList[i] = new Matrix4d(ncsOperMatrixList[i]); 465 } 466 return outList; 467 } 468 469 /** 470 * Get a list of length N*16 of a list of Matrix4d*N. 471 * @param ncsOperators the {@link Matrix4d} list 472 * @return the list of length N*16 of the list of matrices 473 */ 474 public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) { 475 if(ncsOperators==null){ 476 return new double[0][0]; 477 } 478 double[][] outList = new double[ncsOperators.length][16]; 479 for(int i=0; i<ncsOperators.length;i++){ 480 outList[i] = convertToDoubleArray(ncsOperators[i]); 481 } 482 return outList; 483 } 484 485 /** 486 * Insert the group in the given position in the sequence. 487 * @param chain the chain to add the seq res group to 488 * @param group the group to add 489 * @param sequenceIndexId the index to add it in 490 */ 491 public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) { 492 List<Group> seqResGroups = chain.getSeqResGroups(); 493 addGroupAtId(seqResGroups, group, sequenceIndexId); 494 } 495 496 /** 497 * Add the missing groups to the SeqResGroups. 498 * @param modelChain the chain to add the information for 499 * @param sequence the sequence of the construct 500 */ 501 public static void addSeqRes(Chain modelChain, String sequence) { 502 List<Group> seqResGroups = modelChain.getSeqResGroups(); 503 GroupType chainType = getChainType(modelChain.getAtomGroups()); 504 for(int i=0; i<sequence.length(); i++){ 505 char singleLetterCode = sequence.charAt(i); 506 Group group = null; 507 if(seqResGroups.size()<=i){ 508 } 509 else{ 510 group=seqResGroups.get(i); 511 } 512 if(group!=null){ 513 continue; 514 } 515 group = getSeqResGroup(modelChain, singleLetterCode, chainType); 516 addGroupAtId(seqResGroups, group, i); 517 seqResGroups.set(i, group); 518 } 519 } 520 521 private static GroupType getChainType(List<Group> groups) { 522 for(Group group : groups) { 523 if(group==null){ 524 continue; 525 } 526 else if(group.getType()!=GroupType.HETATM){ 527 return group.getType(); 528 } 529 } 530 return GroupType.HETATM; 531 } 532 533 private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) { 534 while(seqResGroups.size()<=sequenceIndexId){ 535 seqResGroups.add(null); 536 } 537 if(sequenceIndexId>=0){ 538 seqResGroups.set(sequenceIndexId, group); 539 } 540 } 541 542 private static Group getSeqResGroup(Chain modelChain, char singleLetterCode, GroupType type) { 543 if(type==GroupType.AMINOACID){ 544 AminoAcidImpl a = new AminoAcidImpl(); 545 a.setRecordType(AminoAcid.SEQRESRECORD); 546 a.setAminoType(singleLetterCode); 547 ChemComp chemComp = new ChemComp(); 548 chemComp.setOne_letter_code(""+singleLetterCode); 549 a.setChemComp(chemComp); 550 return a; 551 552 } else if (type==GroupType.NUCLEOTIDE) { 553 NucleotideImpl n = new NucleotideImpl(); 554 ChemComp chemComp = new ChemComp(); 555 chemComp.setOne_letter_code(""+singleLetterCode); 556 n.setChemComp(chemComp); 557 return n; 558 } 559 else{ 560 return null; 561 } 562 } 563}