001package org.biojava.nbio.structure.io.cif;
002
003import org.biojava.nbio.structure.Atom;
004import org.biojava.nbio.structure.Chain;
005import org.biojava.nbio.structure.Element;
006import org.biojava.nbio.structure.EntityType;
007import org.biojava.nbio.structure.Group;
008import org.biojava.nbio.structure.GroupType;
009import org.biojava.nbio.structure.Structure;
010import org.biojava.nbio.structure.xtal.CrystalCell;
011import org.biojava.nbio.structure.xtal.SpaceGroup;
012import org.rcsb.cif.CifBuilder;
013import org.rcsb.cif.model.Category;
014import org.rcsb.cif.model.CifFile;
015import org.rcsb.cif.model.FloatColumnBuilder;
016import org.rcsb.cif.model.IntColumnBuilder;
017import org.rcsb.cif.model.StrColumnBuilder;
018import org.rcsb.cif.schema.StandardSchemata;
019import org.rcsb.cif.schema.mm.MmCifBlockBuilder;
020import org.rcsb.cif.schema.mm.MmCifCategoryBuilder;
021import org.rcsb.cif.schema.mm.MmCifFileBuilder;
022
023import java.util.LinkedHashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.function.Consumer;
027import java.util.stream.Collector;
028
029/**
030 * Convert a BioJava object to a CifFile.
031 * @author Sebastian Bittrich
032 * @since 5.3.0
033 */
034public abstract class AbstractCifFileSupplier<S> implements CifFileSupplier<S> {
035    protected CifFile getInternal(Structure structure, List<WrappedAtom> wrappedAtoms) {
036        // for now BioJava only considered 3 categories for create a Cif representation of a structure
037
038        // cell
039        CrystalCell crystalCell = structure.getPDBHeader().getCrystallographicInfo().getCrystalCell();
040        // symmetry
041        SpaceGroup spaceGroup = structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup();
042        // atom_site
043        Category atomSite = wrappedAtoms.stream().collect(toAtomSite());
044
045        MmCifBlockBuilder blockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF)
046                .enterBlock(structure.getPDBCode());
047
048        blockBuilder.enterStructKeywords().enterText()
049        .add(String.join(", ", structure.getPDBHeader().getKeywords()))
050        .leaveColumn().leaveCategory();
051
052        if (atomSite.isDefined() && atomSite.getRowCount() > 0) {
053            // set atom site
054            blockBuilder.addCategory(atomSite);
055        }
056
057        if (crystalCell != null) {
058            // set cell category
059            blockBuilder.enterCell()
060                    .enterLengthA()
061                    .add(crystalCell.getA())
062                    .leaveColumn()
063
064                    .enterLengthB()
065                    .add(crystalCell.getB())
066                    .leaveColumn()
067
068                    .enterLengthC()
069                    .add(crystalCell.getC())
070                    .leaveColumn()
071
072                    .enterAngleAlpha()
073                    .add(crystalCell.getAlpha())
074                    .leaveColumn()
075
076                    .enterAngleBeta()
077                    .add(crystalCell.getBeta())
078                    .leaveColumn()
079
080                    .enterAngleGamma()
081                    .add(crystalCell.getGamma())
082                    .leaveColumn()
083                    .leaveCategory();
084        }
085
086        if (spaceGroup != null) {
087            // set symmetry category
088            blockBuilder.enterSymmetry()
089                    .enterSpaceGroupNameH_M()
090                    .add(spaceGroup.getShortSymbol())
091                    .leaveColumn()
092                    .leaveCategory();
093        }
094
095        return blockBuilder.leaveBlock().leaveFile();
096    }
097
098    protected void handleChain(Chain chain, int model, List<WrappedAtom> wrappedAtoms) {
099        final String chainName = chain.getName();
100        final String chainId = chain.getId();
101        for (Group group : chain.getAtomGroups()) {
102            // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have
103            // all atoms (see StructureTools#cleanUpAltLocs)
104            // Thus we have to remove duplicates here by using the atom id
105            // See issue https://github.com/biojava/biojava/issues/778 and
106            // TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs
107            Map<Integer, WrappedAtom> uniqueAtoms = new LinkedHashMap<>();
108            for (int atomIndex = 0; atomIndex < group.size(); atomIndex++) {
109                Atom atom = group.getAtom(atomIndex);
110                if (atom == null) {
111                    continue;
112                }
113
114                uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial()));
115            }
116
117            if (group.hasAltLoc()) {
118                for (Group alt : group.getAltLocs()) {
119                    for (int atomIndex = 0; atomIndex < alt.size(); atomIndex++) {
120                        Atom atom = alt.getAtom(atomIndex);
121                        if (atom == null) {
122                            continue;
123                        }
124
125                        uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial()));
126                    }
127                }
128            }
129
130            wrappedAtoms.addAll(uniqueAtoms.values());
131        }
132    }
133
134    /**
135     * Wrapped atoms represent individual atoms enriched with model- and chain-level information. Also, gives control
136     * over the atomId field. Useful to convert structures (and subsets thereof) to their mmCIF representation.
137     */
138    public static class WrappedAtom {
139        private final int model;
140        private final String chainName;
141        private final String chainId;
142        private final Atom atom;
143        private final int atomId;
144
145        /**
146         * Construct a new atoms.
147         * @param model the model number
148         * @param chainName the label_asym_id
149         * @param chainId the auth_asym_id
150         * @param atom the atom instance itself
151         * @param atomId the label_atom_id
152         */
153        public WrappedAtom(int model, String chainName, String chainId, Atom atom, int atomId) {
154            this.model = model;
155            this.chainName = chainName;
156            this.chainId = chainId;
157            this.atom = atom;
158            this.atomId = atomId;
159        }
160
161        public int getModel() {
162            return model;
163        }
164
165        public String getChainName() {
166            return chainName;
167        }
168
169        public String getChainId() {
170            return chainId;
171        }
172
173        public Atom getAtom() {
174            return atom;
175        }
176
177        public int getAtomId() {
178            return atomId;
179        }
180    }
181
182    /**
183     * Collects {@link WrappedAtom} instances into one {@link org.rcsb.cif.schema.mm.AtomSite}.
184     * @return an atom site record containing all atoms
185     */
186    public static Collector<WrappedAtom, ?, Category> toAtomSite() {
187        return Collector.of(AtomSiteCollector::new,
188                AtomSiteCollector::accept,
189                AtomSiteCollector::combine,
190                AtomSiteCollector::get);
191    }
192
193    static class AtomSiteCollector implements Consumer<WrappedAtom> {
194        private final MmCifCategoryBuilder.AtomSiteBuilder atomSiteBuilder;
195        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> groupPDB;
196        private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> id;
197        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> typeSymbol;
198        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelAtomId;
199        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelAltId;
200        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelCompId;
201        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelAsymId;
202        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelEntityId;
203        private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> labelSeqId;
204        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> pdbxPDBInsCode;
205        private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> cartnX;
206        private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> cartnY;
207        private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> cartnZ;
208        private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> occupancy;
209        private final FloatColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> bIsoOrEquiv;
210        private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authSeqId;
211        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authCompId;
212        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authAsymId;
213        private final StrColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> authAtomId;
214        private final IntColumnBuilder<MmCifCategoryBuilder.AtomSiteBuilder, MmCifBlockBuilder, MmCifFileBuilder> pdbxPDBModelNum;
215
216        AtomSiteCollector() {
217            this.atomSiteBuilder = new MmCifCategoryBuilder.AtomSiteBuilder(null);
218            this.groupPDB = atomSiteBuilder.enterGroupPDB();
219            this.id = atomSiteBuilder.enterId();
220            this.typeSymbol = atomSiteBuilder.enterTypeSymbol();
221            this.labelAtomId = atomSiteBuilder.enterLabelAtomId();
222            this.labelAltId = atomSiteBuilder.enterLabelAltId();
223            this.labelCompId = atomSiteBuilder.enterLabelCompId();
224            this.labelAsymId = atomSiteBuilder.enterLabelAsymId();
225            this.labelEntityId = atomSiteBuilder.enterLabelEntityId();
226            this.labelSeqId = atomSiteBuilder.enterLabelSeqId();
227            this.pdbxPDBInsCode = atomSiteBuilder.enterPdbxPDBInsCode();
228            this.cartnX = atomSiteBuilder.enterCartnX();
229            this.cartnY = atomSiteBuilder.enterCartnY();
230            this.cartnZ = atomSiteBuilder.enterCartnZ();
231            this.occupancy = atomSiteBuilder.enterOccupancy();
232            this.bIsoOrEquiv = atomSiteBuilder.enterBIsoOrEquiv();
233            this.authSeqId = atomSiteBuilder.enterAuthSeqId();
234            this.authCompId = atomSiteBuilder.enterAuthCompId();
235            this.authAsymId = atomSiteBuilder.enterAuthAsymId();
236            this.authAtomId = atomSiteBuilder.enterAuthAtomId();
237            this.pdbxPDBModelNum = atomSiteBuilder.enterPdbxPDBModelNum();
238        }
239
240        @Override
241        public void accept(WrappedAtom wrappedAtom) {
242            Atom atom = wrappedAtom.getAtom();
243            Group group = atom.getGroup();
244            Chain chain = group.getChain();
245
246            groupPDB.add(group.getType().equals(GroupType.HETATM) ? "HETATM" : "ATOM");
247            id.add(wrappedAtom.getAtomId());
248            Element element = atom.getElement();
249            typeSymbol.add(element.equals(Element.R) ? "X" : element.toString().toUpperCase());
250            labelAtomId.add(atom.getName());
251            Character altLoc = atom.getAltLoc();
252            if (altLoc == null || altLoc == ' ') {
253                labelAltId.markNextNotPresent();
254            } else {
255                labelAltId.add(String.valueOf(altLoc));
256            }
257            labelCompId.add(group.getPDBName());
258            labelAsymId.add(wrappedAtom.getChainId());
259            String entityId = "0";
260            int seqId = group.getResidueNumber().getSeqNum();
261            if (chain.getEntityInfo() != null) {
262                entityId = Integer.toString(chain.getEntityInfo().getMolId());
263                if (chain.getEntityInfo().getType() == EntityType.POLYMER) {
264                    // this only makes sense for polymeric chains, non-polymer chains will never have seqres groups and
265                    // there's no point in calling getAlignedResIndex
266                    seqId = chain.getEntityInfo().getAlignedResIndex(group, chain);
267                }
268            }
269            labelEntityId.add(entityId);
270            labelSeqId.add(seqId);
271            String insCode = "";
272            if (group.getResidueNumber().getInsCode() != null) {
273                insCode = Character.toString(group.getResidueNumber().getInsCode());
274            }
275            if (insCode.isEmpty()) {
276                pdbxPDBInsCode.markNextUnknown();
277            } else {
278                pdbxPDBInsCode.add(insCode);
279            }
280            cartnX.add(atom.getX());
281            cartnY.add(atom.getY());
282            cartnZ.add(atom.getZ());
283            occupancy.add(atom.getOccupancy());
284            bIsoOrEquiv.add(atom.getTempFactor());
285            authSeqId.add(group.getResidueNumber().getSeqNum());
286            authCompId.add(group.getPDBName());
287            authAsymId.add(wrappedAtom.getChainName());
288            authAtomId.add(atom.getName());
289            pdbxPDBModelNum.add(wrappedAtom.getModel());
290        }
291
292        AtomSiteCollector combine(AtomSiteCollector other) {
293            throw new UnsupportedOperationException("impl by calling addAll for all collections");
294        }
295
296        Category get() {
297            groupPDB.leaveColumn();
298            id.leaveColumn();
299            typeSymbol.leaveColumn();
300            labelAtomId.leaveColumn();
301            labelAltId.leaveColumn();
302            labelCompId.leaveColumn();
303            labelAsymId.leaveColumn();
304            labelEntityId.leaveColumn();
305            labelSeqId.leaveColumn();
306            pdbxPDBInsCode.leaveColumn();
307            cartnX.leaveColumn();
308            cartnY.leaveColumn();
309            cartnZ.leaveColumn();
310            occupancy.leaveColumn();
311            bIsoOrEquiv.leaveColumn();
312            authSeqId.leaveColumn();
313            authCompId.leaveColumn();
314            authAsymId.leaveColumn();
315            authAtomId.leaveColumn();
316            pdbxPDBModelNum.leaveColumn();
317            return atomSiteBuilder.build();
318        }
319    }
320}