001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.io.Serializable; 024import java.text.ParseException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Collections; 028import java.util.Date; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032 033import javax.vecmath.Matrix4d; 034 035import org.biojava.nbio.structure.AminoAcid; 036import org.biojava.nbio.structure.AminoAcidImpl; 037import org.biojava.nbio.structure.Atom; 038import org.biojava.nbio.structure.AtomImpl; 039import org.biojava.nbio.structure.BondImpl; 040import org.biojava.nbio.structure.Chain; 041import org.biojava.nbio.structure.ChainImpl; 042import org.biojava.nbio.structure.Element; 043import org.biojava.nbio.structure.EntityInfo; 044import org.biojava.nbio.structure.EntityType; 045import org.biojava.nbio.structure.Group; 046import org.biojava.nbio.structure.HetatomImpl; 047import org.biojava.nbio.structure.NucleotideImpl; 048import org.biojava.nbio.structure.PDBCrystallographicInfo; 049import org.biojava.nbio.structure.PDBHeader; 050import org.biojava.nbio.structure.Structure; 051import org.biojava.nbio.structure.StructureImpl; 052import org.biojava.nbio.structure.StructureTools; 053import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 054import org.biojava.nbio.structure.io.mmcif.chem.ResidueType; 055import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 056import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 057import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 058import org.biojava.nbio.structure.xtal.CrystalCell; 059import org.biojava.nbio.structure.xtal.SpaceGroup; 060import org.rcsb.mmtf.api.StructureAdapterInterface; 061import org.rcsb.mmtf.dataholders.MmtfStructure; 062import org.slf4j.Logger; 063import org.slf4j.LoggerFactory; 064 065 066/** 067 * A biojava specific structure inflator for MMTF. 068 * Should be ported to biojava code. 069 * 070 * @author Anthony Bradley 071 * @since 5.0 072 * 073 */ 074public class MmtfStructureReader implements StructureAdapterInterface, Serializable { 075 076 /** The Constant serialVersionUID. */ 077 private static final long serialVersionUID = 6772030485225130853L; 078 079 /** The logger */ 080 private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class); 081 082 /** The structure. */ 083 private Structure structure; 084 085 /** The model number. */ 086 private int modelNumber; 087 088 /** The chain. */ 089 private Chain chain; 090 091 /** The group. */ 092 private Group group; 093 094 /** The atoms in a group. */ 095 private List<Atom> atomsInGroup; 096 097 /** All the atoms. */ 098 private Atom[] allAtoms; 099 private int atomCounter; 100 101 /** The list of EntityInformation */ 102 private List<EntityInfo> entityInfoList; 103 104 /** All the chains */ 105 private List<Chain> chainList; 106 107 /** All the chains as a list of maps */ 108 private List<Map<String,Chain>> chainMap; 109 110 private List<double[]> transformList; 111 112 private int bioassIndex; 113 114 private Map<String,String> chainSequenceMap; 115 116 /** 117 * Instantiates a new bio java structure decoder. 118 */ 119 public MmtfStructureReader() { 120 structure = new StructureImpl(); 121 modelNumber = 0; 122 entityInfoList = new ArrayList<>(); 123 chainList = new ArrayList<>(); 124 chainMap = new ArrayList<>(); 125 transformList = new ArrayList<>(); 126 chainSequenceMap = new HashMap<>(); 127 } 128 129 /** 130 * Gets the structure. 131 * 132 * @return the structure 133 */ 134 public Structure getStructure() { 135 return structure; 136 } 137 138 @Override 139 public void finalizeStructure() { 140 // Number the remaining ones 141 int counter =0; 142 // Add the entity info 143 for (EntityInfo entityInfo : entityInfoList) { 144 counter++; 145 entityInfo.setMolId(counter); 146 } 147 structure.setEntityInfos(entityInfoList); 148 // Add the actual chains 149 for(int i=0; i<chainMap.size(); i++) { 150 // Now add the chain information 151 Map<String, Chain> modelChainMap = chainMap.get(i); 152 for(Chain modelChain : modelChainMap.values()){ 153 structure.addChain(modelChain, i); 154 String sequence = chainSequenceMap.get(modelChain.getId()); 155 if (sequence == null) { 156 logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId()); 157 continue; 158 } 159 MmtfUtils.addSeqRes(modelChain, sequence); 160 } 161 } 162 StructureTools.cleanUpAltLocs(structure); 163 } 164 165 @Override 166 public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, 167 int totalNumChains, int totalNumModels, String modelId) { 168 structure.setPDBCode(modelId); 169 allAtoms = new Atom[totalNumAtoms]; 170 } 171 172 173 /* (non-Javadoc) 174 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int) 175 */ 176 @Override 177 public void setModelInfo(int inputModelNumber, 178 int chainCount) { 179 modelNumber = inputModelNumber; 180 structure.addModel(new ArrayList<Chain>(chainCount)); 181 chainMap.add(new HashMap<>()); 182 } 183 184 /* (non-Javadoc) 185 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 186 * #setChainInfo(java.lang.String, int) 187 */ 188 @Override 189 public void setChainInfo(String chainId, String chainName, int groupCount) { 190 // First check to see if the chain exists 191 Map<String, Chain> modelChainMap = chainMap.get(modelNumber); 192 if(modelChainMap.containsKey(chainId)){ 193 chain = modelChainMap.get(chainId); 194 } 195 // If we need to set a new chain do this 196 else{ 197 chain = new ChainImpl(); 198 chain.setId(chainId.trim()); 199 chain.setName(chainName); 200 chain.setAtomGroups(new ArrayList<>(groupCount)); 201 modelChainMap.put(chainId, chain); 202 chainList.add(chain); 203 } 204 } 205 206 207 /* (non-Javadoc) 208 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 209 * #setGroupInfo(java.lang.String, int, char, int, int) 210 */ 211 @Override 212 public void setGroupInfo(String groupName, int groupNumber, 213 char insertionCode, String chemCompType, int atomCount, int bondCount, 214 char singleLetterCode, int sequenceIndexId, int secStructType) { 215 // Get the polymer type 216 ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType); 217 int polymerType = getGroupTypIndicator(residueType.polymerType); 218 switch (polymerType) { 219 case 1: 220 AminoAcid aa = new AminoAcidImpl(); 221 // Now set the one letter code 222 aa.setAminoType(singleLetterCode); 223 group = aa; 224 break; 225 case 2: 226 group = new NucleotideImpl(); 227 break; 228 default: 229 group = new HetatomImpl(); 230 break; 231 } 232 atomsInGroup = new ArrayList<>(); 233 ChemComp chemComp = new ChemComp(); 234 chemComp.setOne_letter_code(String.valueOf(singleLetterCode)); 235 chemComp.setType(chemCompType.toUpperCase()); 236 chemComp.setResidueType(residueType); 237 chemComp.setPolymerType(residueType.polymerType); 238 group.setChemComp(chemComp); 239 group.setPDBName(groupName); 240 if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) { 241 group.setResidueNumber(chain.getName().trim(), groupNumber, null); 242 } else { 243 group.setResidueNumber(chain.getName().trim(), 244 groupNumber, insertionCode); 245 } 246 group.setAtoms(new ArrayList<>(atomCount)); 247 if (polymerType==1 || polymerType==2) { 248 MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId); 249 } 250 if (atomCount > 0) { 251 chain.addGroup(group); 252 } 253 MmtfUtils.setSecStructType(group, secStructType); 254 } 255 256 /** 257 * 258 * @return 259 */ 260 private Group getGroupWithSameResNumButDiffPDBName() { 261 // If this chain already has this group number 262 for (Group g : chain.getAtomGroups() ) { 263 if (g.getResidueNumber().equals(group.getResidueNumber())) { 264 if( ! g.getPDBName().equals(group.getPDBName() )){ 265 return g; 266 } 267 } 268 } 269 return null; 270 } 271 272 /* (non-Javadoc) 273 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 274 * setAtomInfo(java.lang.String, int, char, float, float, 275 * float, float, float, java.lang.String, int) 276 */ 277 @Override 278 public void setAtomInfo(String atomName, 279 int serialNumber, char alternativeLocationId, float x, 280 float y, float z, float occupancy, 281 float temperatureFactor, 282 String element, int charge) { 283 Atom atom = new AtomImpl(); 284 Group altGroup = null; 285 atom.setPDBserial(serialNumber); 286 atom.setName(atomName.trim()); 287 atom.setElement(Element.valueOfIgnoreCase(element)); 288 if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){ 289 alternativeLocationId = ' '; 290 } 291 if (alternativeLocationId != ' ') { 292 // Get the altGroup 293 altGroup = getCorrectAltLocGroup(alternativeLocationId); 294 atom.setAltLoc(alternativeLocationId); 295 } else { 296 atom.setAltLoc(Character.valueOf(' ')); 297 } 298 atom.setX(x); 299 atom.setY(y); 300 atom.setZ(z); 301 atom.setOccupancy(occupancy); 302 atom.setTempFactor(temperatureFactor); 303 atom.setCharge((short) charge); 304 if (altGroup == null) { 305 group.addAtom(atom); 306 } else { 307 altGroup.setChain(chain); 308 altGroup.addAtom(atom); 309 } 310 311 // IF the main group doesn't have this atom 312 if (!group.hasAtom(atom.getName())) { 313 314 // If it's not a microheterogenity case 315 if (group.getPDBName().equals(atom.getGroup().getPDBName())) { 316 // And it's not a deuterated case. 'nanoheterogenity' 317 if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){ 318 group.addAtom(atom); 319 } 320 } 321 } 322 atomsInGroup.add(atom); 323 allAtoms[atomCounter] = atom; 324 atomCounter++; 325 } 326 327 /* (non-Javadoc) 328 * @see org.rcsb.mmtf.decoder.StructureDecoderInter 329 * face#setGroupBonds(int, int, int) 330 */ 331 @Override 332 public void setGroupBond(int indOne, int indTwo, int bondOrder) { 333 334 // Get the atoms 335 Atom atomOne = atomsInGroup.get(indOne); 336 Atom atomTwo = atomsInGroup.get(indTwo); 337 338 // set the new bond 339 new BondImpl(atomOne, atomTwo, bondOrder); 340 341 } 342 343 /* (non-Javadoc) 344 * @see org.rcsb.mmtf.decoder.StructureDecoder 345 * Interface#setInterGroupBonds(int, int, int) 346 */ 347 @Override 348 public void setInterGroupBond(int indOne, int indTwo, int bondOrder) { 349 350 // Get the atoms 351 Atom atomOne = allAtoms[indOne]; 352 Atom atomTwo = allAtoms[indTwo]; 353 354 // set the new bond (this 355 new BondImpl(atomOne, atomTwo, bondOrder); 356 } 357 358 359 /** 360 * Generates Alternate location groups. 361 * 362 * @param altLoc the alt loc 363 * @return the correct alt loc group 364 */ 365 private Group getCorrectAltLocGroup(Character altLoc) { 366 // see if we know this altLoc already; 367 List<Atom> atoms = group.getAtoms(); 368 if (atoms.size() > 0) { 369 Atom a1 = atoms.get(0); 370 // we are just adding atoms to the current group 371 // probably there is a second group following later... 372 if (a1.getAltLoc().equals(altLoc)) { 373 return group; } 374 } 375 376 // Get the altLocGroup 377 Group altLocgroup = group.getAltLocGroup(altLoc); 378 if (altLocgroup != null) { 379 return altLocgroup; 380 } 381 // If the group already exists (microheterogenity). 382 Group oldGroup = getGroupWithSameResNumButDiffPDBName(); 383 if (oldGroup!= null){ 384 Group altLocG = group; 385 group = oldGroup; 386 group.addAltLoc(altLocG); 387 chain.getAtomGroups().remove(altLocG); 388 return altLocG; 389 } 390 // no matching altLoc group found. 391 // build it up. 392 if (group.getAtoms().size() == 0) { 393 return group; 394 } 395 Group altLocG = (Group) group.clone(); 396 // drop atoms from cloned group... 397 // https://redmine.open-bio.org/issues/3307 398 altLocG.setAtoms(new ArrayList<Atom>()); 399 altLocG.getAltLocs().clear(); 400 group.addAltLoc(altLocG); 401 return altLocG; 402 403 } 404 405 406 /* (non-Javadoc) 407 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 408 * setXtalInfo(java.lang.String, java.util.List) 409 */ 410 @Override 411 public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) { 412 // Now set the xtalographic information 413 PDBCrystallographicInfo pci = new PDBCrystallographicInfo(); 414 SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString); 415 pci.setSpaceGroup(spaceGroup); 416 if (unitCell.length > 0) { 417 CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1], 418 unitCell[2], unitCell[3], unitCell[4], unitCell[5]); 419 pci.setCrystalCell(cell); 420 structure.setCrystallographicInfo(pci); 421 } 422 423 pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList)); 424 } 425 426 427 /** 428 * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 429 * @param polymerType 430 * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 431 */ 432 private int getGroupTypIndicator(PolymerType polymerType) { 433 if(PolymerType.PROTEIN_ONLY.contains(polymerType)){ 434 return 1; 435 } 436 if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){ 437 return 2; 438 } 439 return 0; 440 } 441 442 443 @Override 444 public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) { 445 // Biojava uses this as a one indexed id. 446 bioAssemblyId++; 447 if(bioassIndex!=bioAssemblyId){ 448 transformList = new ArrayList<>(); 449 bioassIndex = bioAssemblyId; 450 } 451 PDBHeader pdbHeader = structure.getPDBHeader(); 452 // Get the bioassembly data 453 Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies(); 454 // Get the bioassembly itself (if it exists 455 BioAssemblyInfo bioAssInfo; 456 if (bioAssemblies.containsKey(bioAssemblyId)){ 457 bioAssInfo = bioAssemblies.get(bioAssemblyId); 458 } 459 else{ 460 bioAssInfo = new BioAssemblyInfo(); 461 bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>()); 462 bioAssemblies.put(bioAssemblyId, bioAssInfo); 463 bioAssInfo.setId(bioAssemblyId); 464 } 465 466 for(int currChainIndex : inputChainIndices){ 467 BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation(); 468 Integer transId = transformList.indexOf(inputTransform)+1; 469 if(transId==0){ 470 transformList.add(inputTransform); 471 transId = transformList.indexOf(inputTransform)+1; 472 } 473 bioAssTrans.setId(transId.toString()); 474 // If it actually has an index - if it doesn't it is because the chain has no density. 475 if (currChainIndex!=-1){ 476 bioAssTrans.setChainId(chainList.get(currChainIndex).getId()); 477 } 478 else { 479 continue; 480 } 481 // Now set matrix 482 Matrix4d mat4d = new Matrix4d(inputTransform); 483 bioAssTrans.setTransformationMatrix(mat4d); 484 // Now add this 485 bioAssInfo.getTransforms().add(bioAssTrans); 486 // sort transformations into a unique order 487 Collections.sort(bioAssInfo.getTransforms()); 488 } 489 } 490 491 @Override 492 public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) { 493 // First get the chains 494 EntityInfo entityInfo = new EntityInfo(); 495 entityInfo.setDescription(description); 496 entityInfo.setType(EntityType.entityTypeFromString(type)); 497 List<Chain> chains = new ArrayList<>(); 498 // Now loop through the chain ids and make a list of them 499 for( int index : chainIndices) { 500 chains.add(chainList.get(index)); 501 chainList.get(index).setEntityInfo(entityInfo); 502 chainSequenceMap.put(chainList.get(index).getId(), sequence); 503 } 504 entityInfo.setChains(chains); 505 entityInfoList.add(entityInfo); 506 } 507 508 @Override 509 public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, 510 String releaseDate, String[] experimentalMethods) { 511 SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); 512 // Get the pdb header 513 PDBHeader pdbHeader = structure.getPDBHeader(); 514 pdbHeader.setTitle(title); 515 pdbHeader.setResolution(resolution); 516 pdbHeader.setRfree(rFree); 517 pdbHeader.setRwork(rWork); 518 // Now loop through the techniques and add them in 519 if (experimentalMethods!=null) { 520 for (String techniqueStr : experimentalMethods) { 521 pdbHeader.setExperimentalTechnique(techniqueStr); 522 } 523 } 524 // Set the dates 525 if(depositionDate!=null){ 526 try { 527 Date depDate = formatter.parse(depositionDate); 528 pdbHeader.setDepDate(depDate); 529 } catch (ParseException e) { 530 logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate); 531 } 532 } 533 else{ 534 pdbHeader.setDepDate(new Date(0)); 535 } 536 if(releaseDate!=null){ 537 try { 538 Date relDate = formatter.parse(releaseDate); 539 pdbHeader.setRelDate(relDate); 540 } catch (ParseException e) { 541 logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate); 542 } 543 } 544 else{ 545 pdbHeader.setRelDate(new Date(0)); 546 } 547 } 548}