001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmtf; 022 023import java.io.Serializable; 024import java.text.ParseException; 025import java.text.SimpleDateFormat; 026import java.util.ArrayList; 027import java.util.Date; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031 032import javax.vecmath.Matrix4d; 033 034import org.biojava.nbio.structure.AminoAcid; 035import org.biojava.nbio.structure.AminoAcidImpl; 036import org.biojava.nbio.structure.Atom; 037import org.biojava.nbio.structure.AtomImpl; 038import org.biojava.nbio.structure.BondImpl; 039import org.biojava.nbio.structure.Chain; 040import org.biojava.nbio.structure.ChainImpl; 041import org.biojava.nbio.structure.Element; 042import org.biojava.nbio.structure.EntityInfo; 043import org.biojava.nbio.structure.EntityType; 044import org.biojava.nbio.structure.Group; 045import org.biojava.nbio.structure.HetatomImpl; 046import org.biojava.nbio.structure.NucleotideImpl; 047import org.biojava.nbio.structure.PDBCrystallographicInfo; 048import org.biojava.nbio.structure.PDBHeader; 049import org.biojava.nbio.structure.Structure; 050import org.biojava.nbio.structure.StructureImpl; 051import org.biojava.nbio.structure.StructureTools; 052import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 053import org.biojava.nbio.structure.io.mmcif.chem.ResidueType; 054import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 055import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; 056import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; 057import org.biojava.nbio.structure.xtal.CrystalCell; 058import org.biojava.nbio.structure.xtal.SpaceGroup; 059import org.rcsb.mmtf.api.StructureAdapterInterface; 060import org.rcsb.mmtf.dataholders.MmtfStructure; 061import org.slf4j.Logger; 062import org.slf4j.LoggerFactory; 063 064 065/** 066 * A biojava specific structure inflator for MMTF. 067 * Should be ported to biojava code. 068 * 069 * @author Anthony Bradley 070 */ 071public class MmtfStructureReader implements StructureAdapterInterface, Serializable { 072 073 /** The Constant serialVersionUID. */ 074 private static final long serialVersionUID = 6772030485225130853L; 075 076 /** The logger */ 077 private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class); 078 079 /** The structure. */ 080 private Structure structure; 081 082 /** The model number. */ 083 private int modelNumber; 084 085 /** The chain. */ 086 private Chain chain; 087 088 /** The group. */ 089 private Group group; 090 091 /** The atoms in a group. */ 092 private List<Atom> atomsInGroup; 093 094 /** All the atoms. */ 095 private Atom[] allAtoms; 096 private int atomCounter; 097 098 /** The list of EntityInformation */ 099 private List<EntityInfo> entityInfoList; 100 101 /** All the chains */ 102 private List<Chain> chainList; 103 104 /** All the chains as a list of maps */ 105 private List<Map<String,Chain>> chainMap; 106 107 private List<double[]> transformList; 108 109 private int bioassIndex; 110 111 private Map<String,String> chainSequenceMap; 112 113 /** 114 * Instantiates a new bio java structure decoder. 115 */ 116 public MmtfStructureReader() { 117 structure = new StructureImpl(); 118 modelNumber = 0; 119 entityInfoList = new ArrayList<>(); 120 chainList = new ArrayList<>(); 121 chainMap = new ArrayList<>(); 122 transformList = new ArrayList<>(); 123 chainSequenceMap = new HashMap<>(); 124 } 125 126 /** 127 * Gets the structure. 128 * 129 * @return the structure 130 */ 131 public Structure getStructure() { 132 return structure; 133 } 134 135 @Override 136 public void finalizeStructure() { 137 // Number the remaining ones 138 int counter =0; 139 // Add the entity info 140 for (EntityInfo entityInfo : entityInfoList) { 141 counter++; 142 entityInfo.setMolId(counter); 143 } 144 structure.setEntityInfos(entityInfoList); 145 // Add the actual chains 146 for(int i=0; i<chainMap.size(); i++) { 147 // Now add the chain information 148 Map<String, Chain> modelChainMap = chainMap.get(i); 149 for(Chain modelChain : modelChainMap.values()){ 150 structure.addChain(modelChain, i); 151 String sequence = chainSequenceMap.get(modelChain.getId()); 152 if (sequence == null) { 153 logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId()); 154 continue; 155 } 156 MmtfUtils.addSeqRes(modelChain, sequence); 157 } 158 } 159 StructureTools.cleanUpAltLocs(structure); 160 } 161 162 @Override 163 public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups, 164 int totalNumChains, int totalNumModels, String modelId) { 165 structure.setPDBCode(modelId); 166 allAtoms = new Atom[totalNumAtoms]; 167 } 168 169 170 /* (non-Javadoc) 171 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int) 172 */ 173 @Override 174 public void setModelInfo(int inputModelNumber, 175 int chainCount) { 176 modelNumber = inputModelNumber; 177 structure.addModel(new ArrayList<Chain>(chainCount)); 178 chainMap.add(new HashMap<>()); 179 } 180 181 /* (non-Javadoc) 182 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 183 * #setChainInfo(java.lang.String, int) 184 */ 185 @Override 186 public void setChainInfo(String chainId, String chainName, int groupCount) { 187 // First check to see if the chain exists 188 Map<String, Chain> modelChainMap = chainMap.get(modelNumber); 189 if(modelChainMap.containsKey(chainId)){ 190 chain = modelChainMap.get(chainId); 191 } 192 // If we need to set a new chain do this 193 else{ 194 chain = new ChainImpl(); 195 chain.setId(chainId.trim()); 196 chain.setName(chainName); 197 chain.setAtomGroups(new ArrayList<>(groupCount)); 198 modelChainMap.put(chainId, chain); 199 chainList.add(chain); 200 } 201 } 202 203 204 /* (non-Javadoc) 205 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface 206 * #setGroupInfo(java.lang.String, int, char, int, int) 207 */ 208 @Override 209 public void setGroupInfo(String groupName, int groupNumber, 210 char insertionCode, String chemCompType, int atomCount, int bondCount, 211 char singleLetterCode, int sequenceIndexId, int secStructType) { 212 // Get the polymer type 213 ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType); 214 int polymerType = getGroupTypIndicator(residueType.polymerType); 215 switch (polymerType) { 216 case 1: 217 AminoAcid aa = new AminoAcidImpl(); 218 // Now set the one letter code 219 aa.setAminoType(singleLetterCode); 220 group = aa; 221 break; 222 case 2: 223 group = new NucleotideImpl(); 224 break; 225 default: 226 group = new HetatomImpl(); 227 break; 228 } 229 atomsInGroup = new ArrayList<Atom>(); 230 ChemComp chemComp = new ChemComp(); 231 chemComp.setOne_letter_code(String.valueOf(singleLetterCode)); 232 chemComp.setType(chemCompType.toUpperCase()); 233 chemComp.setResidueType(residueType); 234 chemComp.setPolymerType(residueType.polymerType); 235 group.setChemComp(chemComp); 236 group.setPDBName(groupName); 237 if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) { 238 group.setResidueNumber(chain.getName().trim(), groupNumber, null); 239 } else { 240 group.setResidueNumber(chain.getName().trim(), 241 groupNumber, insertionCode); 242 } 243 group.setAtoms(new ArrayList<Atom>(atomCount)); 244 if (polymerType==1 || polymerType==2) { 245 MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId); 246 } 247 if (atomCount > 0) { 248 chain.addGroup(group); 249 } 250 MmtfUtils.setSecStructType(group, secStructType); 251 } 252 253 /** 254 * 255 * @return 256 */ 257 private Group getGroupWithSameResNumButDiffPDBName() { 258 // If this chain already has this group number 259 for (Group g : chain.getAtomGroups() ) { 260 if (g.getResidueNumber().equals(group.getResidueNumber())) { 261 if( ! g.getPDBName().equals(group.getPDBName() )){ 262 return g; 263 } 264 } 265 } 266 return null; 267 } 268 269 /* (non-Javadoc) 270 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 271 * setAtomInfo(java.lang.String, int, char, float, float, 272 * float, float, float, java.lang.String, int) 273 */ 274 @Override 275 public void setAtomInfo(String atomName, 276 int serialNumber, char alternativeLocationId, float x, 277 float y, float z, float occupancy, 278 float temperatureFactor, 279 String element, int charge) { 280 Atom atom = new AtomImpl(); 281 Group altGroup = null; 282 atom.setPDBserial(serialNumber); 283 atom.setName(atomName.trim()); 284 atom.setElement(Element.valueOfIgnoreCase(element)); 285 if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){ 286 alternativeLocationId = ' '; 287 } 288 if (alternativeLocationId != ' ') { 289 // Get the altGroup 290 altGroup = getCorrectAltLocGroup(alternativeLocationId); 291 atom.setAltLoc(alternativeLocationId); 292 } else { 293 atom.setAltLoc(Character.valueOf(' ')); 294 } 295 atom.setX(x); 296 atom.setY(y); 297 atom.setZ(z); 298 atom.setOccupancy(occupancy); 299 atom.setTempFactor(temperatureFactor); 300 atom.setCharge((short) charge); 301 if (altGroup == null) { 302 group.addAtom(atom); 303 } else { 304 altGroup.setChain(chain); 305 altGroup.addAtom(atom); 306 } 307 308 // IF the main group doesn't have this atom 309 if (!group.hasAtom(atom.getName())) { 310 311 // If it's not a microheterogenity case 312 if (group.getPDBName().equals(atom.getGroup().getPDBName())) { 313 // And it's not a deuterated case. 'nanoheterogenity' 314 if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){ 315 group.addAtom(atom); 316 } 317 } 318 } 319 atomsInGroup.add(atom); 320 allAtoms[atomCounter] = atom; 321 atomCounter++; 322 } 323 324 /* (non-Javadoc) 325 * @see org.rcsb.mmtf.decoder.StructureDecoderInter 326 * face#setGroupBonds(int, int, int) 327 */ 328 @Override 329 public void setGroupBond(int indOne, 330 int indTwo, int bondOrder) { 331 // Get the atom 332 Atom atomOne = atomsInGroup.get(indOne); 333 Atom atomTwo = atomsInGroup.get(indTwo); 334 // set the new bond 335 @SuppressWarnings("unused") 336 BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder); 337 } 338 339 /* (non-Javadoc) 340 * @see org.rcsb.mmtf.decoder.StructureDecoder 341 * Interface#setInterGroupBonds(int, int, int) 342 */ 343 @Override 344 public void setInterGroupBond(int indOne, 345 int indTwo, int bondOrder) { 346 // Get the atom 347 Atom atomOne = allAtoms[indOne]; 348 Atom atomTwo = allAtoms[indTwo]; 349 // set the new bond 350 @SuppressWarnings("unused") 351 BondImpl bond = new BondImpl(atomOne, atomTwo, bondOrder); 352 } 353 354 355 /** 356 * Generates Alternate location groups. 357 * 358 * @param altLoc the alt loc 359 * @return the correct alt loc group 360 */ 361 private Group getCorrectAltLocGroup(Character altLoc) { 362 // see if we know this altLoc already; 363 List<Atom> atoms = group.getAtoms(); 364 if (atoms.size() > 0) { 365 Atom a1 = atoms.get(0); 366 // we are just adding atoms to the current group 367 // probably there is a second group following later... 368 if (a1.getAltLoc().equals(altLoc)) { 369 return group; } 370 } 371 372 // Get the altLocGroup 373 Group altLocgroup = group.getAltLocGroup(altLoc); 374 if (altLocgroup != null) { 375 return altLocgroup; 376 } 377 // If the group already exists (microheterogenity). 378 Group oldGroup = getGroupWithSameResNumButDiffPDBName(); 379 if (oldGroup!= null){ 380 Group altLocG = group; 381 group = oldGroup; 382 group.addAltLoc(altLocG); 383 chain.getAtomGroups().remove(altLocG); 384 return altLocG; 385 } 386 // no matching altLoc group found. 387 // build it up. 388 if (group.getAtoms().size() == 0) { 389 return group; 390 } 391 Group altLocG = (Group) group.clone(); 392 // drop atoms from cloned group... 393 // https://redmine.open-bio.org/issues/3307 394 altLocG.setAtoms(new ArrayList<Atom>()); 395 altLocG.getAltLocs().clear(); 396 group.addAltLoc(altLocG); 397 return altLocG; 398 399 } 400 401 402 /* (non-Javadoc) 403 * @see org.rcsb.mmtf.decoder.StructureDecoderInterface# 404 * setXtalInfo(java.lang.String, java.util.List) 405 */ 406 @Override 407 public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) { 408 // Now set the xtalographic information 409 PDBCrystallographicInfo pci = new PDBCrystallographicInfo(); 410 SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString); 411 pci.setSpaceGroup(spaceGroup); 412 if (unitCell.length > 0) { 413 CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1], 414 unitCell[2], unitCell[3], unitCell[4], unitCell[5]); 415 pci.setCrystalCell(cell); 416 structure.setCrystallographicInfo(pci); 417 } 418 419 pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList)); 420 } 421 422 423 /** 424 * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 425 * @param currentGroup 426 * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0) 427 */ 428 private int getGroupTypIndicator(PolymerType polymerType) { 429 if(PolymerType.PROTEIN_ONLY.contains(polymerType)){ 430 return 1; 431 } 432 if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){ 433 return 2; 434 } 435 return 0; 436 } 437 438 439 @Override 440 public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) { 441 // Biojava uses this as a one indexed id. 442 bioAssemblyId++; 443 if(bioassIndex!=bioAssemblyId){ 444 transformList = new ArrayList<>(); 445 bioassIndex = bioAssemblyId; 446 } 447 PDBHeader pdbHeader = structure.getPDBHeader(); 448 // Get the bioassembly data 449 Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies(); 450 // Get the bioassembly itself (if it exists 451 BioAssemblyInfo bioAssInfo; 452 if (bioAssemblies.containsKey(bioAssemblyId)){ 453 bioAssInfo = bioAssemblies.get(bioAssemblyId); 454 } 455 else{ 456 bioAssInfo = new BioAssemblyInfo(); 457 bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>()); 458 bioAssemblies.put(bioAssemblyId, bioAssInfo); 459 bioAssInfo.setId(bioAssemblyId); 460 } 461 462 for(int currChainIndex : inputChainIndices){ 463 BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation(); 464 Integer transId = transformList.indexOf(inputTransform)+1; 465 if(transId==0){ 466 transformList.add(inputTransform); 467 transId = transformList.indexOf(inputTransform)+1; 468 } 469 bioAssTrans.setId(transId.toString()); 470 // If it actually has an index - if it doesn't it is because the chain has no density. 471 if (currChainIndex!=-1){ 472 bioAssTrans.setChainId(chainList.get(currChainIndex).getId()); 473 } 474 else { 475 continue; 476 } 477 // Now set matrix 478 Matrix4d mat4d = new Matrix4d(inputTransform); 479 bioAssTrans.setTransformationMatrix(mat4d); 480 // Now add this 481 bioAssInfo.getTransforms().add(bioAssTrans); 482 } 483 } 484 485 @Override 486 public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) { 487 // First get the chains 488 EntityInfo entityInfo = new EntityInfo(); 489 entityInfo.setDescription(description); 490 entityInfo.setType(EntityType.entityTypeFromString(type)); 491 List<Chain> chains = new ArrayList<>(); 492 // Now loop through the chain ids and make a list of them 493 for( int index : chainIndices) { 494 chains.add(chainList.get(index)); 495 chainList.get(index).setEntityInfo(entityInfo); 496 chainSequenceMap.put(chainList.get(index).getId(), sequence); 497 } 498 entityInfo.setChains(chains); 499 entityInfoList.add(entityInfo); 500 } 501 502 @Override 503 public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate, 504 String releaseDate, String[] experimentalMethods) { 505 SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); 506 // Get the pdb header 507 PDBHeader pdbHeader = structure.getPDBHeader(); 508 pdbHeader.setTitle(title); 509 pdbHeader.setResolution(resolution); 510 pdbHeader.setRfree(rFree); 511 pdbHeader.setRwork(rWork); 512 // Now loop through the techniques and add them in 513 if (experimentalMethods!=null) { 514 for (String techniqueStr : experimentalMethods) { 515 pdbHeader.setExperimentalTechnique(techniqueStr); 516 } 517 } 518 // Set the dates 519 if(depositionDate!=null){ 520 try { 521 Date depDate = formatter.parse(depositionDate); 522 pdbHeader.setDepDate(depDate); 523 } catch (ParseException e) { 524 logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate); 525 } 526 } 527 else{ 528 pdbHeader.setDepDate(new Date(0)); 529 } 530 if(releaseDate!=null){ 531 try { 532 Date relDate = formatter.parse(releaseDate); 533 pdbHeader.setRelDate(relDate); 534 } catch (ParseException e) { 535 logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate); 536 } 537 } 538 else{ 539 pdbHeader.setRelDate(new Date(0)); 540 } 541 } 542}