001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.io.Serializable;
024import java.text.ParseException;
025import java.text.SimpleDateFormat;
026import java.util.ArrayList;
027import java.util.Collections;
028import java.util.Date;
029import java.util.HashMap;
030import java.util.LinkedHashMap;
031import java.util.List;
032import java.util.Map;
033
034import javax.vecmath.Matrix4d;
035
036import org.biojava.nbio.structure.AminoAcid;
037import org.biojava.nbio.structure.AminoAcidImpl;
038import org.biojava.nbio.structure.Atom;
039import org.biojava.nbio.structure.AtomImpl;
040import org.biojava.nbio.structure.BondImpl;
041import org.biojava.nbio.structure.Chain;
042import org.biojava.nbio.structure.ChainImpl;
043import org.biojava.nbio.structure.Element;
044import org.biojava.nbio.structure.EntityInfo;
045import org.biojava.nbio.structure.EntityType;
046import org.biojava.nbio.structure.Group;
047import org.biojava.nbio.structure.HetatomImpl;
048import org.biojava.nbio.structure.NucleotideImpl;
049import org.biojava.nbio.structure.PDBCrystallographicInfo;
050import org.biojava.nbio.structure.PDBHeader;
051import org.biojava.nbio.structure.PdbId;
052import org.biojava.nbio.structure.Structure;
053import org.biojava.nbio.structure.StructureImpl;
054import org.biojava.nbio.structure.StructureTools;
055import org.biojava.nbio.structure.chem.ChemComp;
056import org.biojava.nbio.structure.chem.PolymerType;
057import org.biojava.nbio.structure.chem.ResidueType;
058import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
059import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
060import org.biojava.nbio.structure.xtal.CrystalCell;
061import org.biojava.nbio.structure.xtal.SpaceGroup;
062import org.rcsb.mmtf.api.StructureAdapterInterface;
063import org.rcsb.mmtf.dataholders.MmtfStructure;
064import org.slf4j.Logger;
065import org.slf4j.LoggerFactory;
066
067
068/**
069 * A biojava specific structure inflator for MMTF.
070 * Should be ported to biojava code.
071 *
072 * @author Anthony Bradley
073 * @since 5.0
074 *
075 */
076public class MmtfStructureReader implements StructureAdapterInterface, Serializable {
077
078        /** The Constant serialVersionUID. */
079        private static final long serialVersionUID = 6772030485225130853L;
080
081        /** The logger */
082        private static final Logger logger = LoggerFactory.getLogger(MmtfStructureReader.class);
083
084        /** The structure. */
085        private Structure structure;
086
087        /** The model number. */
088        private int modelNumber;
089
090        /** The chain. */
091        private Chain chain;
092
093        /** The group. */
094        private Group group;
095
096        /** The atoms in a group. */
097        private List<Atom> atomsInGroup;
098
099        /** All the atoms. */
100        private Atom[] allAtoms;
101        private int atomCounter;
102
103        /** The list of EntityInformation */
104        private List<EntityInfo> entityInfoList;
105
106        /** All the chains */
107        private List<Chain> chainList;
108
109        /** All the chains as a list of maps */
110        private List<Map<String,Chain>> chainMap;
111
112        private List<double[]> transformList;
113
114        private int bioassIndex;
115
116        private Map<String,String> chainSequenceMap;
117
118        /**
119         * Instantiates a new bio java structure decoder.
120         */
121        public MmtfStructureReader() {
122                structure = new StructureImpl();
123                modelNumber = 0;
124                entityInfoList = new ArrayList<>();
125                chainList = new ArrayList<>();
126                chainMap = new ArrayList<>();
127                transformList = new ArrayList<>();
128                chainSequenceMap = new HashMap<>();
129        }
130
131        /**
132         * Gets the structure.
133         *
134         * @return the structure
135         */
136        public Structure getStructure() {
137                return structure;
138        }
139
140        @Override
141        public void finalizeStructure() {
142                // Number the remaining ones
143                int counter =0;
144                // Add the entity info
145                for (EntityInfo entityInfo : entityInfoList) {
146                        counter++;
147                        entityInfo.setMolId(counter);
148                }
149                structure.setEntityInfos(entityInfoList);
150                // Add the actual chains
151                for(int i=0; i<chainMap.size(); i++) {
152                        // Now add the chain information
153                        Map<String, Chain> modelChainMap = chainMap.get(i);
154                        for(Chain modelChain : modelChainMap.values()){
155                                structure.addChain(modelChain, i);
156                                String sequence = chainSequenceMap.get(modelChain.getId());
157                                if (sequence == null) {
158                                        logger.warn("Sequence is null for chain with asym_id {}. Most likely the chain is non-polymeric. Will not add seqres groups for it.", modelChain.getId());
159                                        continue;
160                                }
161                                MmtfUtils.addSeqRes(modelChain, sequence);
162                        }
163                }
164                StructureTools.cleanUpAltLocs(structure);
165        }
166
167        @Override
168        public void initStructure(int totalNumBonds, int totalNumAtoms, int totalNumGroups,
169                        int totalNumChains, int totalNumModels, String modelId) {
170                if (modelId != null) {
171                        structure.setPdbId(new PdbId(modelId));
172                }
173                allAtoms = new Atom[totalNumAtoms];
174        }
175
176
177        /* (non-Javadoc)
178         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#setModelInfo(int, int)
179         */
180        @Override
181        public void setModelInfo(int inputModelNumber,
182                        int chainCount) {
183                modelNumber = inputModelNumber;
184                structure.addModel(new ArrayList<Chain>(chainCount));
185                chainMap.add(new LinkedHashMap<>());
186        }
187
188        /* (non-Javadoc)
189         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
190         * #setChainInfo(java.lang.String, int)
191         */
192        @Override
193        public void setChainInfo(String chainId, String chainName, int groupCount) {
194                // First check to see if the chain exists
195                Map<String, Chain> modelChainMap = chainMap.get(modelNumber);
196                if(modelChainMap.containsKey(chainId)){
197                        chain = modelChainMap.get(chainId);
198                }
199                // If we need to set a new chain do this
200                else{
201                        chain = new ChainImpl();
202                        chain.setId(chainId.trim());
203                        chain.setName(chainName);
204                        chain.setAtomGroups(new ArrayList<>(groupCount));
205                        modelChainMap.put(chainId, chain);
206                        chainList.add(chain);
207                }
208        }
209
210
211        /* (non-Javadoc)
212         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface
213         * #setGroupInfo(java.lang.String, int, char, int, int)
214         */
215        @Override
216        public void setGroupInfo(String groupName, int groupNumber,
217                        char insertionCode, String chemCompType, int atomCount, int bondCount,
218                        char singleLetterCode, int sequenceIndexId, int secStructType) {
219                // Get the polymer type
220                ResidueType residueType = ResidueType.getResidueTypeFromString(chemCompType);
221                if (residueType == null)
222                        throw new IllegalStateException("Couldn't resolve residue type for "+ chemCompType);
223
224                int polymerType = getGroupTypIndicator(residueType.polymerType);
225                switch (polymerType) {
226                case 1:
227                        AminoAcid aa = new AminoAcidImpl();
228                        // Now set the one letter code
229                        aa.setAminoType(singleLetterCode);
230                        group = aa;
231                        break;
232                case 2:
233                        group = new NucleotideImpl();
234                        break;
235                default:
236                        group = new HetatomImpl();
237                        break;
238                }
239                atomsInGroup = new ArrayList<>();
240                ChemComp chemComp = new ChemComp();
241                chemComp.setOneLetterCode(String.valueOf(singleLetterCode));
242                chemComp.setType(chemCompType.toUpperCase());
243                chemComp.setResidueType(residueType);
244                chemComp.setPolymerType(residueType.polymerType);
245                group.setChemComp(chemComp);
246                group.setPDBName(groupName);
247                if (insertionCode == MmtfStructure.UNAVAILABLE_CHAR_VALUE) {
248                        group.setResidueNumber(chain.getName().trim(), groupNumber, null);
249                } else {
250                        group.setResidueNumber(chain.getName().trim(),
251                                        groupNumber, insertionCode);
252                }
253                group.setAtoms(new ArrayList<>(atomCount));
254                if (polymerType==1 || polymerType==2) {
255                        MmtfUtils.insertSeqResGroup(chain, group, sequenceIndexId);
256                }
257                if (atomCount > 0) {
258                        chain.addGroup(group);
259                }
260                MmtfUtils.setSecStructType(group, secStructType);
261        }
262
263        /**
264         *
265         * @return
266         */
267        private Group getGroupWithSameResNumButDiffPDBName() {
268                // If this chain already has this group number
269                for (Group g : chain.getAtomGroups() ) {
270                        if (g.getResidueNumber().equals(group.getResidueNumber())) {
271                                if( ! g.getPDBName().equals(group.getPDBName() )){
272                                        return g;
273                                }
274                        }
275                }
276                return null;
277        }
278
279        /* (non-Javadoc)
280         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
281         * setAtomInfo(java.lang.String, int, char, float, float,
282         * float, float, float, java.lang.String, int)
283         */
284        @Override
285        public void setAtomInfo(String atomName,
286                        int serialNumber, char alternativeLocationId, float x,
287                        float y, float z, float occupancy,
288                        float temperatureFactor,
289                        String element, int charge) {
290                Atom atom = new AtomImpl();
291                Group altGroup = null;
292                atom.setPDBserial(serialNumber);
293                atom.setName(atomName.trim());
294                atom.setElement(Element.valueOfIgnoreCase(element));
295                if(alternativeLocationId==MmtfStructure.UNAVAILABLE_CHAR_VALUE){
296                        alternativeLocationId = ' ';
297                }
298                if (alternativeLocationId != ' ') {
299                        // Get the altGroup
300                        altGroup = getCorrectAltLocGroup(alternativeLocationId);
301                        atom.setAltLoc(alternativeLocationId);
302                } else {
303                        atom.setAltLoc(Character.valueOf(' '));
304                }
305                atom.setX(x);
306                atom.setY(y);
307                atom.setZ(z);
308                atom.setOccupancy(occupancy);
309                atom.setTempFactor(temperatureFactor);
310                atom.setCharge((short) charge);
311                if (altGroup == null) {
312                        group.addAtom(atom);
313                } else {
314                        altGroup.setChain(chain);
315                        altGroup.addAtom(atom);
316                }
317
318                // IF the main group doesn't have this atom
319                if (!group.hasAtom(atom.getName())) {
320
321                        // If it's not a microheterogenity case
322                        if (group.getPDBName().equals(atom.getGroup().getPDBName())) {
323                                // And it's not a deuterated case.  'nanoheterogenity'
324                                if(!StructureTools.hasNonDeuteratedEquiv(atom,group)){
325                                        group.addAtom(atom);
326                                }
327                        }
328                }
329                atomsInGroup.add(atom);
330                allAtoms[atomCounter] = atom;
331                atomCounter++;
332        }
333
334        /* (non-Javadoc)
335         * @see org.rcsb.mmtf.decoder.StructureDecoderInter
336         * face#setGroupBonds(int, int, int)
337         */
338        @Override
339        public void setGroupBond(int indOne, int indTwo, int bondOrder) {
340
341                // Get the atoms
342                Atom atomOne = atomsInGroup.get(indOne);
343                Atom atomTwo = atomsInGroup.get(indTwo);
344
345                // set the new bond
346                new BondImpl(atomOne, atomTwo, bondOrder);
347
348        }
349
350        /* (non-Javadoc)
351         * @see org.rcsb.mmtf.decoder.StructureDecoder
352         * Interface#setInterGroupBonds(int, int, int)
353         */
354        @Override
355        public void setInterGroupBond(int indOne, int indTwo, int bondOrder) {
356
357                // Get the atoms
358                Atom atomOne = allAtoms[indOne];
359                Atom atomTwo = allAtoms[indTwo];
360
361                // set the new bond (this
362                new BondImpl(atomOne, atomTwo, bondOrder);
363        }
364
365
366        /**
367         * Generates Alternate location groups.
368         *
369         * @param altLoc the alt loc
370         * @return the correct alt loc group
371         */
372        private Group getCorrectAltLocGroup(Character altLoc) {
373                // see if we know this altLoc already;
374                List<Atom> atoms = group.getAtoms();
375                if (atoms.size() > 0) {
376                        Atom a1 = atoms.get(0);
377                        // we are just adding atoms to the current group
378                        // probably there is a second group following later...
379                        if (a1.getAltLoc().equals(altLoc)) {
380                                return group;   }
381                }
382
383                // Get the altLocGroup
384                Group altLocgroup = group.getAltLocGroup(altLoc);
385                if (altLocgroup != null) {
386                        return altLocgroup;
387                }
388                // If the group already exists (microheterogenity).
389                Group oldGroup = getGroupWithSameResNumButDiffPDBName();
390                if (oldGroup!= null){
391                        Group altLocG = group;
392                        group = oldGroup;
393                        group.addAltLoc(altLocG);
394                        chain.getAtomGroups().remove(altLocG);
395                        return altLocG;
396                }
397                // no matching altLoc group found.
398                // build it up.
399                if (group.getAtoms().size() == 0) {
400                        return group;
401                }
402                Group altLocG = (Group) group.clone();
403                // drop atoms from cloned group...
404                // https://redmine.open-bio.org/issues/3307
405                altLocG.setAtoms(new ArrayList<Atom>());
406                altLocG.getAltLocs().clear();
407                group.addAltLoc(altLocG);
408                return altLocG;
409
410        }
411
412
413        /* (non-Javadoc)
414         * @see org.rcsb.mmtf.decoder.StructureDecoderInterface#
415         * setXtalInfo(java.lang.String, java.util.List)
416         */
417        @Override
418        public void setXtalInfo(String spaceGroupString, float[] unitCell, double[][] ncsOperMatrixList) {
419                // Now set the xtalographic information
420                PDBCrystallographicInfo pci = new PDBCrystallographicInfo();
421                SpaceGroup spaceGroup = SpaceGroup.parseSpaceGroup(spaceGroupString);
422                pci.setSpaceGroup(spaceGroup);
423                if (unitCell.length > 0) {
424                        CrystalCell cell = new CrystalCell(unitCell[0], unitCell[1],
425                                        unitCell[2], unitCell[3], unitCell[4], unitCell[5]);
426                        pci.setCrystalCell(cell);
427                        structure.setCrystallographicInfo(pci);
428                }
429
430                pci.setNcsOperators(MmtfUtils.getNcsAsMatrix4d(ncsOperMatrixList));
431        }
432
433
434        /**
435         * Get the type of group (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
436         * @param polymerType
437         * @return The type of group. (0,1 or 2) depending on whether it is an amino aicd (1), nucleic acid (2) or ligand (0)
438         */
439        private int getGroupTypIndicator(PolymerType polymerType) {
440                if(PolymerType.PROTEIN_ONLY.contains(polymerType)){
441                        return 1;
442                }
443                if(PolymerType.POLYNUCLEOTIDE_ONLY.contains(polymerType)){
444                        return 2;
445                }
446                return 0;
447        }
448
449
450        @Override
451        public void setBioAssemblyTrans(int bioAssemblyId, int[] inputChainIndices, double[] inputTransform, String name) {
452                // Biojava uses this as a one indexed id.
453                bioAssemblyId++;
454                if(bioassIndex!=bioAssemblyId){
455                        transformList = new ArrayList<>();
456                        bioassIndex = bioAssemblyId;
457                }
458                PDBHeader pdbHeader = structure.getPDBHeader();
459                // Get the bioassembly data
460                Map<Integer, BioAssemblyInfo> bioAssemblies = pdbHeader.getBioAssemblies();
461                // Get the bioassembly itself (if it exists
462                BioAssemblyInfo bioAssInfo;
463                if (bioAssemblies.containsKey(bioAssemblyId)){
464                        bioAssInfo = bioAssemblies.get(bioAssemblyId);
465                }
466                else{
467                        bioAssInfo = new  BioAssemblyInfo();
468                        bioAssInfo.setTransforms(new ArrayList<BiologicalAssemblyTransformation>());
469                        bioAssemblies.put(bioAssemblyId, bioAssInfo);
470                        bioAssInfo.setId(bioAssemblyId);
471                }
472
473                for(int currChainIndex : inputChainIndices){
474                        BiologicalAssemblyTransformation bioAssTrans = new BiologicalAssemblyTransformation();
475                        Integer transId = transformList.indexOf(inputTransform)+1;
476                        if(transId==0){
477                                transformList.add(inputTransform);
478                                transId = transformList.indexOf(inputTransform)+1;
479                        }
480                        bioAssTrans.setId(transId.toString());
481                        // If it actually has an index - if it doesn't it is because the chain has no density.
482                        if (currChainIndex!=-1){
483                                bioAssTrans.setChainId(chainList.get(currChainIndex).getId());
484                        }
485                        else {
486                                continue;
487                        }
488                        // Now set matrix
489                        Matrix4d mat4d = new Matrix4d(inputTransform);
490                        bioAssTrans.setTransformationMatrix(mat4d);
491                        // Now add this
492                        bioAssInfo.getTransforms().add(bioAssTrans);
493                        // sort transformations into a unique order
494                        Collections.sort(bioAssInfo.getTransforms());
495                }
496        }
497
498        @Override
499        public void setEntityInfo(int[] chainIndices, String sequence, String description, String type) {
500                // First get the chains
501                EntityInfo entityInfo = new EntityInfo();
502                entityInfo.setDescription(description);
503                entityInfo.setType(EntityType.entityTypeFromString(type));
504                List<Chain> chains = new ArrayList<>();
505                // Now loop through the chain ids and make a list of them
506                for( int index : chainIndices) {
507                        chains.add(chainList.get(index));
508                        chainList.get(index).setEntityInfo(entityInfo);
509                        chainSequenceMap.put(chainList.get(index).getId(), sequence);
510                }
511                entityInfo.setChains(chains);
512                entityInfoList.add(entityInfo);
513        }
514
515        @Override
516        public void setHeaderInfo(float rFree, float rWork, float resolution, String title, String depositionDate,
517                        String releaseDate, String[] experimentalMethods) {
518                SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
519                // Get the pdb header
520                PDBHeader pdbHeader = structure.getPDBHeader();
521                pdbHeader.setTitle(title);
522                pdbHeader.setResolution(resolution);
523                pdbHeader.setRfree(rFree);
524                pdbHeader.setRwork(rWork);
525                // Now loop through the techniques and add them in
526                if (experimentalMethods!=null) {
527                        for (String techniqueStr : experimentalMethods) {
528                                pdbHeader.setExperimentalTechnique(techniqueStr);
529                        }
530                }
531                // Set the dates
532                if(depositionDate!=null){
533                        try {
534                                Date depDate = formatter.parse(depositionDate);
535                                pdbHeader.setDepDate(depDate);
536                        } catch (ParseException e) {
537                                logger.warn("Could not parse date string '{}', depositon date will be unavailable", depositionDate);
538                        }
539                }
540                else{
541                        pdbHeader.setDepDate(new Date(0));
542                }
543                if(releaseDate!=null){
544                        try {
545                                Date relDate = formatter.parse(releaseDate);
546                                pdbHeader.setRelDate(relDate);
547                        } catch (ParseException e) {
548                                logger.warn("Could not parse date string '{}', release date will be unavailable", releaseDate);
549                        }
550                }
551                else{
552                        pdbHeader.setRelDate(new Date(0));
553                }
554        }
555}