001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.text.DateFormat;
024import java.text.SimpleDateFormat;
025import java.util.ArrayList;
026import java.util.Date;
027import java.util.HashSet;
028import java.util.LinkedHashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033
034import javax.vecmath.Matrix4d;
035
036import org.biojava.nbio.structure.AminoAcid;
037import org.biojava.nbio.structure.AminoAcidImpl;
038import org.biojava.nbio.structure.Atom;
039import org.biojava.nbio.structure.Bond;
040import org.biojava.nbio.structure.Chain;
041import org.biojava.nbio.structure.ExperimentalTechnique;
042import org.biojava.nbio.structure.Group;
043import org.biojava.nbio.structure.GroupType;
044import org.biojava.nbio.structure.NucleotideImpl;
045import org.biojava.nbio.structure.PDBCrystallographicInfo;
046import org.biojava.nbio.structure.Structure;
047import org.biojava.nbio.structure.StructureException;
048import org.biojava.nbio.structure.chem.ChemComp;
049import org.biojava.nbio.structure.chem.ChemCompGroupFactory;
050import org.biojava.nbio.structure.chem.ChemCompTools;
051import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
052import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
053import org.biojava.nbio.structure.secstruc.SecStrucCalc;
054import org.biojava.nbio.structure.secstruc.SecStrucState;
055import org.biojava.nbio.structure.secstruc.SecStrucType;
056import org.biojava.nbio.structure.xtal.CrystalCell;
057import org.biojava.nbio.structure.xtal.SpaceGroup;
058import org.rcsb.mmtf.dataholders.DsspType;
059import org.rcsb.mmtf.utils.CodecUtils;
060import org.slf4j.Logger;
061import org.slf4j.LoggerFactory;
062
063/**
064 * A utils class of functions needed for Biojava to read and write to mmtf.
065 * @author Anthony Bradley
066 *
067 */
068public class MmtfUtils {
069
070        private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class);
071
072        /**
073         * This sets all microheterogeneous groups
074         * (previously alternate location groups) as separate groups.
075         * This is required because mmtf groups cannot have multiple HET codes.
076         * @param bioJavaStruct
077         */
078        public static void fixMicroheterogenity(Structure bioJavaStruct) {
079                // Loop through the models
080                for (int i=0; i<bioJavaStruct.nrModels(); i++){
081                        // Then the chains
082                        List<Chain> chains = bioJavaStruct.getModel(i);
083                        for (Chain c : chains) {
084                                // Build a new list of groups
085                                List<Group> outGroups = new ArrayList<>();
086                                for (Group g : c.getAtomGroups()) {
087                                        List<Group> removeList = new ArrayList<>();
088                                        for (Group altLoc : g.getAltLocs()) {
089                                                // Check if they are not equal -> microheterogenity
090                                                if(! altLoc.getPDBName().equals(g.getPDBName())) {
091                                                        // Now add this group to the main list
092                                                        removeList.add(altLoc);
093                                                }
094                                        }
095                                        // Add this group
096                                        outGroups.add(g);
097                                        // Remove any microhet alt locs
098                                        g.getAltLocs().removeAll(removeList);
099                                        // Add these microhet alt locs
100                                        outGroups.addAll(removeList);
101                                }
102                                c.setAtomGroups(outGroups);
103                        }
104                }
105        }
106
107
108        /**
109         * Generate the secondary structure for a Biojava structure object.
110         * @param bioJavaStruct the Biojava structure for which it is to be calculate.
111         */
112        public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) {
113                SecStrucCalc ssp = new SecStrucCalc();
114
115                try{
116                        ssp.calculate(bioJavaStruct, true);
117                }
118                catch(StructureException e) {
119                        LOGGER.warn("Could not calculate secondary structure (error {}). Secondary structure annotation will be missing.", e.getMessage());
120                }
121        }
122
123        /**
124         * Get the string representation of a space group.
125         * @param spaceGroup the input SpaceGroup object
126         * @return the space group as a string.
127         */
128        public static String getSpaceGroupAsString(SpaceGroup spaceGroup) {
129                if(spaceGroup==null){
130                        return "NA";
131                }
132                else{
133                        return spaceGroup.getShortSymbol();
134                }
135        }
136
137        /**
138         * Get the length six array of the unit cell information.
139         * @param xtalInfo the input PDBCrystallographicInfo object
140         * @return the length six float array
141         */
142        public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) {
143                CrystalCell xtalCell = xtalInfo.getCrystalCell();
144                if(xtalCell==null){
145                        return null;
146                }else{
147                        float[] inputUnitCell = new float[6];
148                        inputUnitCell[0] = (float) xtalCell.getA();
149                        inputUnitCell[1] = (float) xtalCell.getB();
150                        inputUnitCell[2] = (float) xtalCell.getC();
151                        inputUnitCell[3] = (float) xtalCell.getAlpha();
152                        inputUnitCell[4] = (float) xtalCell.getBeta();
153                        inputUnitCell[5] = (float) xtalCell.getGamma();
154                        return inputUnitCell;
155                }
156        }
157
158        /**
159         * Converts the set of experimental techniques to an array of strings.
160         * @param experimentalTechniques the input set of experimental techniques
161         * @return the array of strings describing the methods used.
162         */
163        public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) {
164                if(experimentalTechniques==null){
165                        return new String[0];
166                }
167                String[] outArray = new String[experimentalTechniques.size()];
168                int index = 0;
169                for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) {
170                        outArray[index] = experimentalTechnique.getName();
171                        index++;
172                }
173                return outArray;
174        }
175
176        /**
177         * Covert a Date object to ISO time format.
178         * @param inputDate The input date object
179         * @return the time in ISO time format
180         */
181        public static String dateToIsoString(Date inputDate) {
182                DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd");
183                return dateStringFormat.format(inputDate);
184        }
185
186        /**
187         * Convert a bioassembly information into a map of transform, chainindices it relates to.
188         * @param bioassemblyInfo  the bioassembly info object for this structure
189         * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to.
190         * @return the bioassembly information (as primitive types).
191         */
192        public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) {
193            Map<Matrix4d, List<Integer>> matMap = new LinkedHashMap<>();
194                List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms();
195                for (BiologicalAssemblyTransformation transformation : transforms) {
196                        Matrix4d transMatrix = transformation.getTransformationMatrix();
197                        String transChainId = transformation.getChainId();
198                        if (!chainIdToIndexMap.containsKey(transChainId)){
199                                continue;
200                        }
201                        int chainIndex = chainIdToIndexMap.get(transformation.getChainId());
202                        if(matMap.containsKey(transMatrix)){
203                                matMap.get(transMatrix).add(chainIndex);
204                        }
205                        else{
206                                List<Integer> chainIdList = new ArrayList<>();
207                                chainIdList.add(chainIndex);
208                                matMap.put(transMatrix, chainIdList);
209                        }
210                }
211
212            Map<double[], int[]> outMap = new LinkedHashMap<>();
213                for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) {
214                        outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue()));
215                }
216                return outMap;
217        }
218
219        /**
220         * Convert a four-d matrix to a double array. Row-packed.
221         * @param transformationMatrix the input matrix4d object
222         * @return the double array (16 long).
223         */
224        public static double[] convertToDoubleArray(Matrix4d transformationMatrix) {
225                // Initialise the output array
226                double[] outArray = new double[16];
227                // Iterate over the matrix
228                for(int i=0; i<4; i++){
229                        for(int j=0; j<4; j++){
230                                // Now set this element
231                                outArray[i*4+j] = transformationMatrix.getElement(i,j);
232                        }
233                }
234                return outArray;
235        }
236
237        /**
238         * Count the total number of groups in the structure
239         * @param structure the input structure
240         * @return the total number of groups
241         */
242        public static int getNumGroups(Structure structure) {
243                int count = 0;
244                for(int i=0; i<structure.nrModels(); i++) {
245                        for(Chain chain : structure.getChains(i)){
246                                count+= chain.getAtomGroups().size();
247                        }
248                }
249                return count;
250        }
251
252
253        /**
254         * Get a list of atoms for a group. Only add each atom once.
255         * @param inputGroup the Biojava Group to consider
256         * @return the atoms for the input Biojava Group
257         */
258        public static List<Atom> getAtomsForGroup(Group inputGroup) {
259                Set<Atom> uniqueAtoms = new HashSet<>();
260                List<Atom> theseAtoms = new ArrayList<>();
261                for(Atom a: inputGroup.getAtoms()){
262                        theseAtoms.add(a);
263                        uniqueAtoms.add(a);
264                }
265                List<Group> altLocs = inputGroup.getAltLocs();
266                for(Group thisG: altLocs){
267                        for(Atom a: thisG.getAtoms()){
268                                if(uniqueAtoms.contains(a)){
269                                        continue;
270                                }
271                                theseAtoms.add(a);
272                        }
273                }
274                return theseAtoms;
275        }
276
277        /**
278         * Find the number of bonds in a group
279         * @param atomsInGroup the list of atoms in the group
280         * @return the number of atoms in the group
281         */
282        public static int getNumBondsInGroup(List<Atom> atomsInGroup) {
283                int bondCounter = 0;
284                for(Atom atom : atomsInGroup) {
285                        if(atom.getBonds()==null){
286                                continue;
287                        }
288                        for(Bond bond : atom.getBonds()) {
289                                // Now set the bonding information.
290                                Atom other = bond.getOther(atom);
291                                // If both atoms are in the group
292                                if (atomsInGroup.indexOf(other)!=-1){
293                                        Integer firstBondIndex = atomsInGroup.indexOf(atom);
294                                        Integer secondBondIndex = atomsInGroup.indexOf(other);
295                                        // Don't add the same bond twice
296                                        if (firstBondIndex<secondBondIndex){
297                                                bondCounter++;
298                                        }
299                                }
300                        }
301                }
302                return bondCounter;
303        }
304
305        /**
306         * Get the secondary structure as defined by DSSP.
307         * @param group the input group to be calculated
308         * @return the integer index of the group type.
309         */
310        public static int getSecStructType(Group group) {
311                SecStrucState props = (SecStrucState) group.getProperty("secstruc");
312                if(props==null){
313                        return DsspType.NULL_ENTRY.getDsspIndex();
314                }
315                return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex();
316        }
317
318        /**
319         * Get the secondary structure as defined by DSSP.
320         * @param group the input group to be calculated
321         * @param dsspIndex integer index of the group type.
322         */
323        public static void setSecStructType(Group group, int dsspIndex) {
324                SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex);
325                SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType);
326                if(secStrucType!=null){
327                        group.setProperty("secstruc", secStrucState);
328                }
329        }
330
331
332        /**
333         * Set the DSSP type based on a numerical index.
334         * @param dsspIndex the integer index of the type to set
335         * @return the instance of the SecStrucType object holding this secondary
336         * structure type.
337         */
338        public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) {
339                String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType();
340                for(SecStrucType secStrucType : SecStrucType.values())
341                {
342                        if(dsspType.equals(secStrucType.name))
343                        {
344                                return secStrucType;
345                        }
346                }
347                // Return a null entry.
348                return null;
349        }
350
351        /**
352         * Get summary information for the structure.
353         * @param structure the structure for which to get the information.
354         */
355        public static MmtfSummaryDataBean getStructureInfo(Structure structure) {
356                MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean();
357                // Get all the atoms
358                List<Atom> theseAtoms = new ArrayList<>();
359                List<Chain> allChains = new ArrayList<>();
360                Map<String, Integer> chainIdToIndexMap = new LinkedHashMap<>();
361                int chainCounter = 0;
362                int bondCount = 0;
363                mmtfSummaryDataBean.setAllAtoms(theseAtoms);
364                mmtfSummaryDataBean.setAllChains(allChains);
365                mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap);
366                for (int i=0; i<structure.nrModels(); i++){
367                        List<Chain> chains = structure.getModel(i);
368                        allChains.addAll(chains);
369                        for (Chain chain : chains) {
370                                String idOne = chain.getId();
371                                if (!chainIdToIndexMap.containsKey(idOne)) {
372                                        chainIdToIndexMap.put(idOne, chainCounter);
373                                }
374                                chainCounter++;
375                                for (Group g : chain.getAtomGroups()) {
376                                        for(Atom atom: getAtomsForGroup(g)){
377                                                theseAtoms.add(atom);
378                                                // If both atoms are in the group
379                                                if (atom.getBonds()!=null){
380                                                        bondCount+=atom.getBonds().size();
381                                                }
382                                        }
383                                }
384                        }
385                }
386                // Assumes all bonds are referenced twice
387                mmtfSummaryDataBean.setNumBonds(bondCount/2);
388                return mmtfSummaryDataBean;
389
390        }
391
392        /**
393         * Get a list of N 4*4 matrices from a single list of doubles of length 16*N.
394         * @param ncsOperMatrixList the input list of doubles
395         * @return the list of 4*4 matrics
396         */
397        public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) {
398                if(ncsOperMatrixList==null){
399                        return null;
400                }
401                int numMats = ncsOperMatrixList.length;
402                if(numMats==0){
403                        return null;
404                }
405                if(numMats==1 && ncsOperMatrixList[0].length==0){
406                        return null;
407                }
408                Matrix4d[] outList = new Matrix4d[numMats];
409                for(int i=0; i<numMats; i++){
410                        outList[i] = new Matrix4d(ncsOperMatrixList[i]);
411                }
412                return outList;
413        }
414
415        /**
416         * Get a list of length N*16 of a list of Matrix4d*N.
417         * @param ncsOperators the {@link Matrix4d} list
418         * @return the list of length N*16 of the list of matrices
419         */
420        public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) {
421                if(ncsOperators==null){
422                        return new double[0][0];
423                }
424                double[][] outList = new double[ncsOperators.length][16];
425                for(int i=0; i<ncsOperators.length;i++){
426                        outList[i] = convertToDoubleArray(ncsOperators[i]);
427                }
428                return outList;
429        }
430
431        /**
432         * Insert the group in the given position in the sequence.
433         * @param chain the chain to add the seq res group to
434         * @param group the group to add
435         * @param sequenceIndexId the index to add it in
436         */
437        public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) {
438                List<Group> seqResGroups = chain.getSeqResGroups();
439                addGroupAtId(seqResGroups, group, sequenceIndexId);
440        }
441
442        /**
443         * Add the missing groups to the SeqResGroups.
444         * @param modelChain the chain to add the information for
445         * @param sequence the sequence of the construct
446         */
447        public static void addSeqRes(Chain modelChain, String sequence) {
448
449                List<Group> seqResGroups = modelChain.getSeqResGroups();
450                GroupType chainType = getChainType(modelChain.getAtomGroups());
451
452                for(int i=0; i<sequence.length(); i++){
453
454                        char singleLetterCode = sequence.charAt(i);
455                        Group group = null;
456                        if (seqResGroups.size() > i) {
457                                group=seqResGroups.get(i);
458                        }
459                        if(group!=null){
460                                continue;
461                        }
462
463                        group = getSeqResGroup(singleLetterCode, chainType);
464                        addGroupAtId(seqResGroups, group, i);
465                }
466        }
467
468        private static GroupType getChainType(List<Group> groups) {
469                for(Group group : groups) {
470                        if(group!=null && group.getType()!=GroupType.HETATM){
471                                return group.getType();
472                        }
473                }
474                return GroupType.HETATM;
475        }
476
477        private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) {
478                while(seqResGroups.size()<=sequenceIndexId){
479                        seqResGroups.add(null);
480                }
481                if(sequenceIndexId>=0){
482                        seqResGroups.set(sequenceIndexId, group);
483                }
484        }
485
486        private static Group getSeqResGroup(char singleLetterCode, GroupType type) {
487
488                if(type==GroupType.AMINOACID){
489                        String threeLetter = ChemCompTools.getAminoThreeLetter(singleLetterCode);
490                        if (threeLetter == null) return null;
491                        ChemComp chemComp = ChemCompGroupFactory.getChemComp(threeLetter);
492
493                        AminoAcidImpl a = new AminoAcidImpl();
494                        a.setRecordType(AminoAcid.SEQRESRECORD);
495                        a.setAminoType(singleLetterCode);
496                        a.setPDBName(threeLetter);
497                        a.setChemComp(chemComp);
498                        return a;
499
500                } else if (type==GroupType.NUCLEOTIDE) {
501                        String twoLetter = ChemCompTools.getDNATwoLetter(singleLetterCode);
502                        if (twoLetter == null) return null;
503                        ChemComp chemComp = ChemCompGroupFactory.getChemComp(twoLetter);
504
505                        NucleotideImpl n = new NucleotideImpl();
506                        n.setPDBName(twoLetter);
507                        n.setChemComp(chemComp);
508                        return n;
509                }
510                else{
511                        return null;
512                }
513        }
514}