001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmtf;
022
023import java.text.DateFormat;
024import java.text.SimpleDateFormat;
025import java.util.ArrayList;
026import java.util.Date;
027import java.util.HashSet;
028import java.util.LinkedHashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Map.Entry;
032import java.util.Set;
033
034import javax.vecmath.Matrix4d;
035
036import org.biojava.nbio.structure.AminoAcid;
037import org.biojava.nbio.structure.AminoAcidImpl;
038import org.biojava.nbio.structure.Atom;
039import org.biojava.nbio.structure.Bond;
040import org.biojava.nbio.structure.Chain;
041import org.biojava.nbio.structure.ExperimentalTechnique;
042import org.biojava.nbio.structure.Group;
043import org.biojava.nbio.structure.GroupType;
044import org.biojava.nbio.structure.NucleotideImpl;
045import org.biojava.nbio.structure.PDBCrystallographicInfo;
046import org.biojava.nbio.structure.Structure;
047import org.biojava.nbio.structure.StructureException;
048import org.biojava.nbio.structure.StructureIO;
049import org.biojava.nbio.structure.align.util.AtomCache;
050import org.biojava.nbio.structure.io.FileParsingParameters;
051import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
052import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider;
053import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
054import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
055import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
056import org.biojava.nbio.structure.secstruc.DSSPParser;
057import org.biojava.nbio.structure.secstruc.SecStrucCalc;
058import org.biojava.nbio.structure.secstruc.SecStrucState;
059import org.biojava.nbio.structure.secstruc.SecStrucType;
060import org.biojava.nbio.structure.xtal.CrystalCell;
061import org.biojava.nbio.structure.xtal.SpaceGroup;
062import org.rcsb.mmtf.dataholders.DsspType;
063import org.rcsb.mmtf.utils.CodecUtils;
064import org.slf4j.Logger;
065import org.slf4j.LoggerFactory;
066
067/**
068 * A utils class of functions needed for Biojava to read and write to mmtf.
069 * @author Anthony Bradley
070 *
071 */
072public class MmtfUtils {
073
074        private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class);
075
076        /**
077         * Set up the configuration parameters for BioJava.
078         */
079        public static AtomCache setUpBioJava() {
080                // Set up the atom cache etc
081                AtomCache cache = new AtomCache();
082                cache.setUseMmCif(true);
083                FileParsingParameters params = cache.getFileParsingParams();
084                params.setCreateAtomBonds(true);
085                params.setAlignSeqRes(true);
086                params.setParseBioAssembly(true);
087                DownloadChemCompProvider cc = new DownloadChemCompProvider();
088                ChemCompGroupFactory.setChemCompProvider(cc);
089                cc.checkDoFirstInstall();
090                cache.setFileParsingParams(params);
091                StructureIO.setAtomCache(cache);
092                return cache;
093        }
094
095        /**
096         * Set up the configuration parameters for BioJava.
097         * @param extraUrl the string describing the URL (or file path) from which
098         * to get missing CCD entries.
099         */
100        public static AtomCache setUpBioJava(String extraUrl) {
101                // Set up the atom cache etc
102                AtomCache cache = new AtomCache();
103                cache.setUseMmCif(true);
104                FileParsingParameters params = cache.getFileParsingParams();
105                params.setCreateAtomBonds(true);
106                params.setAlignSeqRes(true);
107                params.setParseBioAssembly(true);
108                DownloadChemCompProvider.serverBaseUrl = extraUrl;
109                DownloadChemCompProvider.useDefaultUrlLayout = false;
110                DownloadChemCompProvider cc = new DownloadChemCompProvider();
111                ChemCompGroupFactory.setChemCompProvider(cc);
112                cc.checkDoFirstInstall();
113                cache.setFileParsingParams(params);
114                StructureIO.setAtomCache(cache);
115                return cache;
116        }
117
118
119        /**
120         * This sets all microheterogeneous groups
121         * (previously alternate location groups) as separate groups.
122         * This is required because mmtf groups cannot have multiple HET codes.
123         * @param bioJavaStruct
124         */
125        public static void fixMicroheterogenity(Structure bioJavaStruct) {
126                // Loop through the models
127                for (int i=0; i<bioJavaStruct.nrModels(); i++){
128                        // Then the chains
129                        List<Chain> chains = bioJavaStruct.getModel(i);
130                        for (Chain c : chains) {
131                                // Build a new list of groups
132                                List<Group> outGroups = new ArrayList<>();
133                                for (Group g : c.getAtomGroups()) {
134                                        List<Group> removeList = new ArrayList<>();
135                                        for (Group altLoc : g.getAltLocs()) {
136                                                // Check if they are not equal -> microheterogenity
137                                                if(! altLoc.getPDBName().equals(g.getPDBName())) {
138                                                        // Now add this group to the main list
139                                                        removeList.add(altLoc);
140                                                }
141                                        }
142                                        // Add this group
143                                        outGroups.add(g);
144                                        // Remove any microhet alt locs
145                                        g.getAltLocs().removeAll(removeList);
146                                        // Add these microhet alt locs
147                                        outGroups.addAll(removeList);
148                                }
149                                c.setAtomGroups(outGroups);
150                        }
151                }
152        }
153
154
155        /**
156         * Generate the secondary structure for a Biojava structure object.
157         * @param bioJavaStruct the Biojava structure for which it is to be calculate.
158         */
159        public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) {
160                SecStrucCalc ssp = new SecStrucCalc();
161
162                try{
163                        ssp.calculate(bioJavaStruct, true);
164                }
165                catch(StructureException e) {
166                        LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage());
167
168                        try {
169                                DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result
170                        } catch(Exception bige){
171                                LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage());
172                        }
173                }
174        }
175
176        /**
177         * Get the string representation of a space group.
178         * @param spaceGroup the input SpaceGroup object
179         * @return the space group as a string.
180         */
181        public static String getSpaceGroupAsString(SpaceGroup spaceGroup) {
182                if(spaceGroup==null){
183                        return "NA";
184                }
185                else{
186                        return spaceGroup.getShortSymbol();
187                }
188        }
189
190        /**
191         * Get the length six array of the unit cell information.
192         * @param xtalInfo the input PDBCrystallographicInfo object
193         * @return the length six float array
194         */
195        public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) {
196                CrystalCell xtalCell = xtalInfo.getCrystalCell();
197                if(xtalCell==null){
198                        return null;
199                }else{
200                        float[] inputUnitCell = new float[6];
201                        inputUnitCell[0] = (float) xtalCell.getA();
202                        inputUnitCell[1] = (float) xtalCell.getB();
203                        inputUnitCell[2] = (float) xtalCell.getC();
204                        inputUnitCell[3] = (float) xtalCell.getAlpha();
205                        inputUnitCell[4] = (float) xtalCell.getBeta();
206                        inputUnitCell[5] = (float) xtalCell.getGamma();
207                        return inputUnitCell;
208                }
209        }
210
211        /**
212         * Converts the set of experimental techniques to an array of strings.
213         * @param experimentalTechniques the input set of experimental techniques
214         * @return the array of strings describing the methods used.
215         */
216        public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) {
217                if(experimentalTechniques==null){
218                        return new String[0];
219                }
220                String[] outArray = new String[experimentalTechniques.size()];
221                int index = 0;
222                for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) {
223                        outArray[index] = experimentalTechnique.getName();
224                        index++;
225                }
226                return outArray;
227        }
228
229        /**
230         * Covert a Date object to ISO time format.
231         * @param inputDate The input date object
232         * @return the time in ISO time format
233         */
234        public static String dateToIsoString(Date inputDate) {
235                DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd");
236                return dateStringFormat.format(inputDate);
237        }
238
239        /**
240         * Convert a bioassembly information into a map of transform, chainindices it relates to.
241         * @param bioassemblyInfo  the bioassembly info object for this structure
242         * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to.
243         * @return the bioassembly information (as primitive types).
244         */
245        public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) {
246            Map<Matrix4d, List<Integer>> matMap = new LinkedHashMap<>();
247                List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms();
248                for (BiologicalAssemblyTransformation transformation : transforms) {
249                        Matrix4d transMatrix = transformation.getTransformationMatrix();
250                        String transChainId = transformation.getChainId();
251                        if (!chainIdToIndexMap.containsKey(transChainId)){
252                                continue;
253                        }
254                        int chainIndex = chainIdToIndexMap.get(transformation.getChainId());
255                        if(matMap.containsKey(transMatrix)){
256                                matMap.get(transMatrix).add(chainIndex);
257                        }
258                        else{
259                                List<Integer> chainIdList = new ArrayList<>();
260                                chainIdList.add(chainIndex);
261                                matMap.put(transMatrix, chainIdList);
262                        }
263                }
264
265            Map<double[], int[]> outMap = new LinkedHashMap<>();
266                for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) {
267                        outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue()));
268                }
269                return outMap;
270        }
271
272        /**
273         * Convert a four-d matrix to a double array. Row-packed.
274         * @param transformationMatrix the input matrix4d object
275         * @return the double array (16 long).
276         */
277        public static double[] convertToDoubleArray(Matrix4d transformationMatrix) {
278                // Initialise the output array
279                double[] outArray = new double[16];
280                // Iterate over the matrix
281                for(int i=0; i<4; i++){
282                        for(int j=0; j<4; j++){
283                                // Now set this element
284                                outArray[i*4+j] = transformationMatrix.getElement(i,j);
285                        }
286                }
287                return outArray;
288        }
289
290        /**
291         * Count the total number of groups in the structure
292         * @param structure the input structure
293         * @return the total number of groups
294         */
295        public static int getNumGroups(Structure structure) {
296                int count = 0;
297                for(int i=0; i<structure.nrModels(); i++) {
298                        for(Chain chain : structure.getChains(i)){
299                                count+= chain.getAtomGroups().size();
300                        }
301                }
302                return count;
303        }
304
305
306        /**
307         * Get a list of atoms for a group. Only add each atom once.
308         * @param inputGroup the Biojava Group to consider
309         * @return the atoms for the input Biojava Group
310         */
311        public static List<Atom> getAtomsForGroup(Group inputGroup) {
312                Set<Atom> uniqueAtoms = new HashSet<Atom>();
313                List<Atom> theseAtoms = new ArrayList<Atom>();
314                for(Atom a: inputGroup.getAtoms()){
315                        theseAtoms.add(a);
316                        uniqueAtoms.add(a);
317                }
318                List<Group> altLocs = inputGroup.getAltLocs();
319                for(Group thisG: altLocs){
320                        for(Atom a: thisG.getAtoms()){
321                                if(uniqueAtoms.contains(a)){
322                                        continue;
323                                }
324                                theseAtoms.add(a);
325                        }
326                }
327                return theseAtoms;
328        }
329
330        /**
331         * Find the number of bonds in a group
332         * @param atomsInGroup the list of atoms in the group
333         * @return the number of atoms in the group
334         */
335        public static int getNumBondsInGroup(List<Atom> atomsInGroup) {
336                int bondCounter = 0;
337                for(Atom atom : atomsInGroup) {
338                        if(atom.getBonds()==null){
339                                continue;
340                        }
341                        for(Bond bond : atom.getBonds()) {
342                                // Now set the bonding information.
343                                Atom other = bond.getOther(atom);
344                                // If both atoms are in the group
345                                if (atomsInGroup.indexOf(other)!=-1){
346                                        Integer firstBondIndex = atomsInGroup.indexOf(atom);
347                                        Integer secondBondIndex = atomsInGroup.indexOf(other);
348                                        // Don't add the same bond twice
349                                        if (firstBondIndex<secondBondIndex){
350                                                bondCounter++;
351                                        }
352                                }
353                        }
354                }
355                return bondCounter;
356        }
357
358        /**
359         * Get the secondary structure as defined by DSSP.
360         * @param group the input group to be calculated
361         * @return the integer index of the group type.
362         */
363        public static int getSecStructType(Group group) {
364                SecStrucState props = (SecStrucState) group.getProperty("secstruc");
365                if(props==null){
366                        return DsspType.NULL_ENTRY.getDsspIndex();
367                }
368                return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex();
369        }
370
371        /**
372         * Get the secondary structure as defined by DSSP.
373         * @param group the input group to be calculated
374         * @param the integer index of the group type.
375         */
376        public static void setSecStructType(Group group, int dsspIndex) {
377                SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex);
378                SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType);
379                if(secStrucType!=null){
380                        group.setProperty("secstruc", secStrucState);
381                }
382                else{
383                }
384        }
385
386
387        /**
388         * Set the DSSP type based on a numerical index.
389         * @param dsspIndex the integer index of the type to set
390         * @return the instance of the SecStrucType object holding this secondary
391         * structure type.
392         */
393        public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) {
394                String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType();
395                for(SecStrucType secStrucType : SecStrucType.values())
396                {
397                        if(dsspType==secStrucType.name)
398                        {
399                                return secStrucType;
400                        }
401                }
402                // Return a null entry.
403                return null;
404        }
405
406        /**
407         * Get summary information for the structure.
408         * @param structure the structure for which to get the information.
409         */
410        public static MmtfSummaryDataBean getStructureInfo(Structure structure) {
411                MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean();
412                // Get all the atoms
413                List<Atom> theseAtoms = new ArrayList<>();
414                List<Chain> allChains = new ArrayList<>();
415                Map<String, Integer> chainIdToIndexMap = new LinkedHashMap<>();
416                int chainCounter = 0;
417                int bondCount = 0;
418                mmtfSummaryDataBean.setAllAtoms(theseAtoms);
419                mmtfSummaryDataBean.setAllChains(allChains);
420                mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap);
421                for (int i=0; i<structure.nrModels(); i++){
422                        List<Chain> chains = structure.getModel(i);
423                        allChains.addAll(chains);
424                        for (Chain chain : chains) {
425                                String idOne = chain.getId();
426                                if (!chainIdToIndexMap.containsKey(idOne)) {
427                                        chainIdToIndexMap.put(idOne, chainCounter);
428                                }
429                                chainCounter++;
430                                for (Group g : chain.getAtomGroups()) {
431                                        for(Atom atom: getAtomsForGroup(g)){
432                                                theseAtoms.add(atom);
433                                                // If both atoms are in the group
434                                                if (atom.getBonds()!=null){
435                                                        bondCount+=atom.getBonds().size();
436                                                }
437                                        }
438                                }
439                        }
440                }
441                // Assumes all bonds are referenced twice
442                mmtfSummaryDataBean.setNumBonds(bondCount/2);
443                return mmtfSummaryDataBean;
444
445        }
446
447        /**
448         * Get a list of N 4*4 matrices from a single list of doubles of length 16*N.
449         * @param ncsOperMatrixList the input list of doubles
450         * @return the list of 4*4 matrics
451         */
452        public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) {
453                if(ncsOperMatrixList==null){
454                        return null;
455                }
456                int numMats = ncsOperMatrixList.length;
457                if(numMats==0){
458                        return null;
459                }
460                if(numMats==1 && ncsOperMatrixList[0].length==0){
461                        return null;
462                }
463                Matrix4d[] outList = new Matrix4d[numMats];
464                for(int i=0; i<numMats; i++){
465                        outList[i] = new Matrix4d(ncsOperMatrixList[i]);
466                }
467                return outList;
468        }
469
470        /**
471         * Get a list of length N*16 of a list of Matrix4d*N.
472         * @param ncsOperators the {@link Matrix4d} list
473         * @return the list of length N*16 of the list of matrices
474         */
475        public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) {
476                if(ncsOperators==null){
477                        return new double[0][0];
478                }
479                double[][] outList = new double[ncsOperators.length][16];
480                for(int i=0; i<ncsOperators.length;i++){
481                        outList[i] = convertToDoubleArray(ncsOperators[i]);
482                }
483                return outList;
484        }
485
486        /**
487         * Insert the group in the given position in the sequence.
488         * @param chain the chain to add the seq res group to
489         * @param group the group to add
490         * @param sequenceIndexId the index to add it in
491         */
492        public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) {
493                List<Group> seqResGroups = chain.getSeqResGroups();
494                addGroupAtId(seqResGroups, group, sequenceIndexId);
495        }
496
497        /**
498         * Add the missing groups to the SeqResGroups.
499         * @param modelChain the chain to add the information for
500         * @param sequence the sequence of the construct
501         */
502        public static void addSeqRes(Chain modelChain, String sequence) {
503                List<Group> seqResGroups = modelChain.getSeqResGroups();
504                GroupType chainType = getChainType(modelChain.getAtomGroups());
505                for(int i=0; i<sequence.length(); i++){
506                        char singleLetterCode = sequence.charAt(i);
507                        Group group = null;
508                        if(seqResGroups.size()<=i){
509                        }
510                        else{
511                                group=seqResGroups.get(i);
512                        }
513                        if(group!=null){
514                                continue;
515                        }
516                        group = getSeqResGroup(modelChain, singleLetterCode, chainType);
517                        addGroupAtId(seqResGroups, group, i);
518                        seqResGroups.set(i, group);
519                }
520        }
521
522        private static GroupType getChainType(List<Group> groups) {
523                for(Group group : groups) {
524                        if(group==null){
525                                continue;
526                        }
527                        else if(group.getType()!=GroupType.HETATM){
528                                return group.getType();
529                        }
530                }
531                return GroupType.HETATM;
532        }
533
534        private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) {
535                while(seqResGroups.size()<=sequenceIndexId){
536                        seqResGroups.add(null);
537                }
538                if(sequenceIndexId>=0){
539                        seqResGroups.set(sequenceIndexId, group);
540                }
541        }
542
543        private static Group getSeqResGroup(Chain modelChain, char singleLetterCode, GroupType type) {
544                if(type==GroupType.AMINOACID){
545                        AminoAcidImpl a = new AminoAcidImpl();
546                        a.setRecordType(AminoAcid.SEQRESRECORD);
547                        a.setAminoType(singleLetterCode);
548                        ChemComp chemComp = new ChemComp();
549                        chemComp.setOne_letter_code(""+singleLetterCode);
550                        a.setChemComp(chemComp);
551                        return a;
552
553                } else if (type==GroupType.NUCLEOTIDE) {
554                        NucleotideImpl n = new NucleotideImpl();
555                        ChemComp chemComp = new ChemComp();
556                        chemComp.setOne_letter_code(""+singleLetterCode);
557                        n.setChemComp(chemComp);
558                        return n;
559                }
560                else{
561                        return null;
562                }
563        }
564}