001package org.biojava.nbio.structure.io.cif; 002 003import org.biojava.nbio.structure.*; 004import org.biojava.nbio.structure.xtal.CrystalCell; 005import org.biojava.nbio.structure.xtal.SpaceGroup; 006import org.rcsb.cif.CifBuilder; 007import org.rcsb.cif.model.Category; 008import org.rcsb.cif.model.CifFile; 009import org.rcsb.cif.model.FloatColumnBuilder; 010import org.rcsb.cif.model.IntColumnBuilder; 011import org.rcsb.cif.model.StrColumnBuilder; 012import org.rcsb.cif.schema.StandardSchemata; 013import org.rcsb.cif.schema.mm.MmCifBlockBuilder; 014import org.rcsb.cif.schema.mm.MmCifCategoryBuilder; 015import org.rcsb.cif.schema.mm.MmCifFileBuilder; 016 017import java.util.LinkedHashMap; 018import java.util.List; 019import java.util.Map; 020import java.util.Optional; 021import java.util.function.Consumer; 022import java.util.stream.Collector; 023import java.util.stream.Collectors; 024 025/** 026 * Convert a BioJava object to a CifFile. 027 * @author Sebastian Bittrich 028 * @since 5.3.0 029 */ 030public abstract class AbstractCifFileSupplier<S> implements CifFileSupplier<S> { 031 protected CifFile getInternal(Structure structure, List<WrappedAtom> wrappedAtoms) { 032 // for now BioJava only considered 3 categories for create a Cif representation of a structure 033 034 // cell 035 CrystalCell crystalCell = structure.getPDBHeader().getCrystallographicInfo().getCrystalCell(); 036 // symmetry 037 SpaceGroup spaceGroup = structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup(); 038 // atom_site 039 Category atomSite = wrappedAtoms.stream().collect(toAtomSite()); 040 // entity information 041 List<EntityInfo> entityInfos = structure.getEntityInfos(); 042 043 MmCifBlockBuilder blockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF) 044 .enterBlock(structure.getPdbId() == null? "" : structure.getPdbId().getId()); 045 046 blockBuilder.enterStructKeywords().enterText() 047 .add(String.join(", ", structure.getPDBHeader().getKeywords())) 048 .leaveColumn().leaveCategory(); 049 050 if (atomSite.isDefined() && atomSite.getRowCount() > 0) { 051 // set atom site 052 blockBuilder.addCategory(atomSite); 053 } 054 055 if (crystalCell != null) { 056 // set cell category 057 blockBuilder.enterCell() 058 .enterLengthA() 059 .add(crystalCell.getA()) 060 .leaveColumn() 061 062 .enterLengthB() 063 .add(crystalCell.getB()) 064 .leaveColumn() 065 066 .enterLengthC() 067 .add(crystalCell.getC()) 068 .leaveColumn() 069 070 .enterAngleAlpha() 071 .add(crystalCell.getAlpha()) 072 .leaveColumn() 073 074 .enterAngleBeta() 075 .add(crystalCell.getBeta()) 076 .leaveColumn() 077 078 .enterAngleGamma() 079 .add(crystalCell.getGamma()) 080 .leaveColumn() 081 .leaveCategory(); 082 } 083 084 if (spaceGroup != null) { 085 // set symmetry category 086 blockBuilder.enterSymmetry() 087 .enterSpaceGroupNameH_M() 088 .add(spaceGroup.getShortSymbol()) 089 .leaveColumn() 090 .leaveCategory(); 091 } 092 093 if (entityInfos != null) { 094 095 String[] entityIds = new String[entityInfos.size()]; 096 String[] entityTypes = new String[entityInfos.size()]; 097 String[] entityDescriptions = new String[entityInfos.size()]; 098 099 for (int i=0; i<entityInfos.size(); i++) { 100 EntityInfo e = entityInfos.get(i); 101 entityIds[i] = Integer.toString(e.getMolId()); 102 entityTypes[i] = e.getType().getEntityType(); 103 entityDescriptions[i] = e.getDescription() == null? "?" : e.getDescription(); 104 } 105 106 String[] polyEntityIds = entityInfos.stream().filter(e -> e.getType() == EntityType.POLYMER).map(e -> Integer.toString(e.getMolId())).toArray(String[]::new); 107 String[] polyEntitySeqs = entityInfos.stream().filter(e -> e.getType() == EntityType.POLYMER).map(e -> e.getChains().get(0).getSeqResSequence()).toArray(String[]::new); 108 109 blockBuilder.enterEntity() 110 .enterId() 111 .add(entityIds) 112 .leaveColumn() 113 114 .enterType() 115 .add(entityTypes) 116 .leaveColumn() 117 118 .enterPdbxDescription() 119 .add(entityDescriptions) 120 .leaveColumn() 121 122 .leaveCategory(); 123 124 blockBuilder.enterEntityPoly() 125 .enterEntityId() 126 .add(polyEntityIds) 127 .leaveColumn() 128 129 .enterPdbxSeqOneLetterCodeCan() 130 .add(polyEntitySeqs) 131 .leaveColumn() 132 133 .leaveCategory(); 134 } 135 136 return blockBuilder.leaveBlock().leaveFile(); 137 } 138 139 protected void handleChain(Chain chain, int model, List<WrappedAtom> wrappedAtoms) { 140 final String chainName = chain.getName(); 141 final String chainId = chain.getId(); 142 for (Group group : chain.getAtomGroups()) { 143 // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have 144 // all atoms (see StructureTools#cleanUpAltLocs) 145 // Thus we have to remove duplicates here by using the atom id 146 // See issue https://github.com/biojava/biojava/issues/778 and 147 // TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs 148 Map<Integer, WrappedAtom> uniqueAtoms = new LinkedHashMap<>(); 149 for (int atomIndex = 0; atomIndex < group.size(); atomIndex++) { 150 Atom atom = group.getAtom(atomIndex); 151 if (atom == null) { 152 continue; 153 } 154 155 uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial())); 156 } 157 158 if (group.hasAltLoc()) { 159 for (Group alt : group.getAltLocs()) { 160 for (int atomIndex = 0; atomIndex < alt.size(); atomIndex++) { 161 Atom atom = alt.getAtom(atomIndex); 162 if (atom == null) { 163 continue; 164 } 165 166 uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial())); 167 } 168 } 169 } 170 171 wrappedAtoms.addAll(uniqueAtoms.values()); 172 } 173 } 174 175 /** 176 * Wrapped atoms represent individual atoms enriched with model- and chain-level information. Also, gives control 177 * over the atomId field. Useful to convert structures (and subsets thereof) to their mmCIF representation. 178 */ 179 public static class WrappedAtom { 180 private final int model; 181 private final String chainName; 182 private final String chainId; 183 private final Atom atom; 184 private final int atomId; 185 186 /** 187 * Construct a new atoms. 188 * @param model the model number 189 * @param chainName the label_asym_id 190 * @param chainId the auth_asym_id 191 * @param atom the atom instance itself 192 * @param atomId the label_atom_id 193 */ 194 public WrappedAtom(int model, String chainName, String chainId, Atom atom, int atomId) { 195 this.model = model; 196 this.chainName = chainName; 197 this.chainId = chainId; 198 this.atom = atom; 199 this.atomId = atomId; 200 } 201 202 public int getModel() { 203 return model; 204 } 205 206 public String getChainName() { 207 return chainName; 208 } 209 210 public String getChainId() { 211 return chainId; 212 } 213 214 public Atom getAtom() { 215 return atom; 216 } 217 218 public int getAtomId() { 219 return atomId; 220 } 221 } 222 223 /** 224 * Collects {@link WrappedAtom} instances into one {@link org.rcsb.cif.schema.mm.AtomSite}. 225 * @return an atom site record containing all atoms 226 */ 227 public static Collector<WrappedAtom, ?, Category> toAtomSite() { 228 return Collector.of(AtomSiteCollector::new, 229 AtomSiteCollector::accept, 230 AtomSiteCollector::combine, 231 AtomSiteCollector::get); 232 } 233 234 static class AtomSiteCollector implements Consumer<WrappedAtom> { 235 private final MmCifCategoryBuilder.AtomSiteBuilder atomSiteBuilder; 236 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> groupPDB; 237 private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> id; 238 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> typeSymbol; 239 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelAtomId; 240 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelAltId; 241 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelCompId; 242 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelAsymId; 243 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelEntityId; 244 private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelSeqId; 245 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> pdbxPDBInsCode; 246 private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> cartnX; 247 private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> cartnY; 248 private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> cartnZ; 249 private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> occupancy; 250 private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> bIsoOrEquiv; 251 private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authSeqId; 252 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authCompId; 253 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authAsymId; 254 private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authAtomId; 255 private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> pdbxPDBModelNum; 256 257 AtomSiteCollector() { 258 this.atomSiteBuilder = new MmCifCategoryBuilder.AtomSiteBuilder(null); 259 this.groupPDB = atomSiteBuilder.enterGroupPDB(); 260 this.id = atomSiteBuilder.enterId(); 261 this.typeSymbol = atomSiteBuilder.enterTypeSymbol(); 262 this.labelAtomId = atomSiteBuilder.enterLabelAtomId(); 263 this.labelAltId = atomSiteBuilder.enterLabelAltId(); 264 this.labelCompId = atomSiteBuilder.enterLabelCompId(); 265 this.labelAsymId = atomSiteBuilder.enterLabelAsymId(); 266 this.labelEntityId = atomSiteBuilder.enterLabelEntityId(); 267 this.labelSeqId = atomSiteBuilder.enterLabelSeqId(); 268 this.pdbxPDBInsCode = atomSiteBuilder.enterPdbxPDBInsCode(); 269 this.cartnX = atomSiteBuilder.enterCartnX(); 270 this.cartnY = atomSiteBuilder.enterCartnY(); 271 this.cartnZ = atomSiteBuilder.enterCartnZ(); 272 this.occupancy = atomSiteBuilder.enterOccupancy(); 273 this.bIsoOrEquiv = atomSiteBuilder.enterBIsoOrEquiv(); 274 this.authSeqId = atomSiteBuilder.enterAuthSeqId(); 275 this.authCompId = atomSiteBuilder.enterAuthCompId(); 276 this.authAsymId = atomSiteBuilder.enterAuthAsymId(); 277 this.authAtomId = atomSiteBuilder.enterAuthAtomId(); 278 this.pdbxPDBModelNum = atomSiteBuilder.enterPdbxPDBModelNum(); 279 } 280 281 @Override 282 public void accept(WrappedAtom wrappedAtom) { 283 Atom atom = wrappedAtom.getAtom(); 284 Group group = atom.getGroup(); 285 Chain chain = group.getChain(); 286 287 groupPDB.add(group.getType().equals(GroupType.HETATM) ? "HETATM" : "ATOM"); 288 id.add(wrappedAtom.getAtomId()); 289 Element element = atom.getElement(); 290 typeSymbol.add(element.equals(Element.R) ? "X" : element.toString().toUpperCase()); 291 labelAtomId.add(atom.getName()); 292 Character altLoc = atom.getAltLoc(); 293 if (altLoc == null || altLoc == ' ') { 294 labelAltId.markNextNotPresent(); 295 } else { 296 labelAltId.add(String.valueOf(altLoc)); 297 } 298 labelCompId.add(group.getPDBName()); 299 labelAsymId.add(wrappedAtom.getChainId()); 300 String entityId = "0"; 301 int seqId = group.getResidueNumber().getSeqNum(); 302 if (chain.getEntityInfo() != null) { 303 entityId = Integer.toString(chain.getEntityInfo().getMolId()); 304 if (chain.getEntityInfo().getType() == EntityType.POLYMER) { 305 // this only makes sense for polymeric chains, non-polymer chains will never have seqres groups and 306 // there's no point in calling getAlignedResIndex 307 seqId = chain.getEntityInfo().getAlignedResIndex(group, chain); 308 } 309 } 310 labelEntityId.add(entityId); 311 labelSeqId.add(seqId); 312 String insCode = ""; 313 if (group.getResidueNumber().getInsCode() != null) { 314 insCode = Character.toString(group.getResidueNumber().getInsCode()); 315 } 316 if (insCode.isEmpty()) { 317 pdbxPDBInsCode.markNextUnknown(); 318 } else { 319 pdbxPDBInsCode.add(insCode); 320 } 321 cartnX.add(atom.getX()); 322 cartnY.add(atom.getY()); 323 cartnZ.add(atom.getZ()); 324 occupancy.add(atom.getOccupancy()); 325 bIsoOrEquiv.add(atom.getTempFactor()); 326 authSeqId.add(group.getResidueNumber().getSeqNum()); 327 authCompId.add(group.getPDBName()); 328 authAsymId.add(wrappedAtom.getChainName()); 329 authAtomId.add(atom.getName()); 330 pdbxPDBModelNum.add(wrappedAtom.getModel()); 331 } 332 333 AtomSiteCollector combine(AtomSiteCollector other) { 334 throw new UnsupportedOperationException("impl by calling addAll for all collections"); 335 } 336 337 Category get() { 338 groupPDB.leaveColumn(); 339 id.leaveColumn(); 340 typeSymbol.leaveColumn(); 341 labelAtomId.leaveColumn(); 342 labelAltId.leaveColumn(); 343 labelCompId.leaveColumn(); 344 labelAsymId.leaveColumn(); 345 labelEntityId.leaveColumn(); 346 labelSeqId.leaveColumn(); 347 pdbxPDBInsCode.leaveColumn(); 348 cartnX.leaveColumn(); 349 cartnY.leaveColumn(); 350 cartnZ.leaveColumn(); 351 occupancy.leaveColumn(); 352 bIsoOrEquiv.leaveColumn(); 353 authSeqId.leaveColumn(); 354 authCompId.leaveColumn(); 355 authAsymId.leaveColumn(); 356 authAtomId.leaveColumn(); 357 pdbxPDBModelNum.leaveColumn(); 358 return atomSiteBuilder.build(); 359 } 360 } 361}