001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.xtal;
022
023
024import org.biojava.nbio.structure.*;
025import org.biojava.nbio.structure.contact.AtomContactSet;
026import org.biojava.nbio.structure.contact.StructureInterface;
027import org.biojava.nbio.structure.contact.StructureInterfaceList;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import javax.vecmath.Matrix4d;
032import javax.vecmath.Point3i;
033import javax.vecmath.Vector3d;
034import java.util.ArrayList;
035import java.util.Iterator;
036
037
038
039/**
040 * A class containing methods to find interfaces in a given crystallographic Structure by
041 * reconstructing the crystal lattice through application of symmetry operators
042 *
043 * @author duarte_j
044 *
045 */
046public class CrystalBuilder {
047
048        // Default number of cell neighbors to try in interface search (in 3 directions of space).
049        // In the search, only bounding box overlaps are tried, thus there's not so much overhead in adding
050        // more cells. We actually tested it and using numCells from 1 to 10 didn't change runtimes at all.
051        // Examples with interfaces in distant neighbor cells:
052        //   2nd neighbors: 3hz3, 1wqj, 2de3, 1jcd
053        //   3rd neighbors: 3bd3, 1men, 2gkp, 1wui
054        //   5th neighbors: 2ahf, 2h2z
055        //   6th neighbors: 1was (in fact interfaces appear only at 5th neighbors for it)
056        // Maybe this could be avoided by previously translating the given molecule to the first cell,
057        // BUT! some bona fide cases exist, e.g. 2d3e: it is properly placed at the origin but the molecule
058        // is enormously long in comparison with the dimensions of the unit cell, some interfaces come at the 7th neighbor.
059        // After a scan of the whole PDB (Oct 2013) using numCells=50, the highest one was 4jgc with
060        // interfaces up to the 11th neighbor. Other high ones (9th neighbors) are 4jbm and 4k3t.
061        // We set the default value to 12 based on that (having not seen any difference in runtime)
062        public static final int DEF_NUM_CELLS = 12;
063
064        /**
065         * Default maximum distance between two chains to be considered an interface.
066         * @see #getUniqueInterfaces(double)
067         */
068        public static final double DEFAULT_INTERFACE_DISTANCE_CUTOFF = 5.5;
069
070        public static final Matrix4d IDENTITY = new Matrix4d(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1);
071
072
073        /**
074         * Whether to consider HETATOMs in contact calculations
075         */
076        private static final boolean INCLUDE_HETATOMS = true;
077
078        private Structure structure;
079        private PDBCrystallographicInfo crystallographicInfo;
080        private int numChainsAu;
081        private int numOperatorsSg;
082
083        private static final Logger logger = LoggerFactory.getLogger(CrystalBuilder.class);
084
085        private int numCells;
086
087        private ArrayList<CrystalTransform> visited;
088
089        private boolean isCrystallographic;
090
091
092
093        public CrystalBuilder(Structure structure) {
094                this.structure = structure;
095
096                this.crystallographicInfo = structure.getCrystallographicInfo();
097
098                this.numChainsAu = structure.getChains().size();
099                this.numOperatorsSg = 1;
100
101
102                if (structure.isCrystallographic()) {
103
104                        this.isCrystallographic = true;
105
106                        // we need to check space group not null for the cases where the entry is crystallographic but
107                        // the space group is not a standard one recognized by biojava, e.g. 1mnk (SG: 'I 21')
108                        if (this.crystallographicInfo.isNonStandardSg()) {
109                                logger.warn("Space group is non-standard, will only calculate asymmetric unit interfaces.");
110                                this.isCrystallographic = false;
111
112                        } else if (this.crystallographicInfo.getSpaceGroup()==null) {                   
113                                // just in case we still check for space group null (a user pdb file could potentially be crystallographic and have no space group)                     
114                                logger.warn("Space group is null, will only calculate asymmetric unit interfaces.");
115                                this.isCrystallographic = false;
116                        } else {
117                                this.numOperatorsSg = this.crystallographicInfo.getSpaceGroup().getMultiplicity();
118                        }
119                        
120                        // we need to check crystal cell not null for the rare cases where the entry is crystallographic but
121                        // the crystal cell is not given, e.g. 2i68, 2xkm, 4bpq
122                        if (this.crystallographicInfo.getCrystalCell()==null) {
123                                logger.warn("Could not find a crystal cell definition, will only calculate asymmetric unit interfaces.");
124                                this.isCrystallographic = false;
125                        }
126                        
127                        // check for cases like 4hhb that are in a non-standard coordinate frame convention, see https://github.com/eppic-team/owl/issues/4
128                        if (this.crystallographicInfo.isNonStandardCoordFrameConvention()) {
129                                logger.warn("Non-standard coordinate frame convention, will only calculate asymmetric unit interfaces.");
130                                this.isCrystallographic = false;
131                                this.numOperatorsSg = 1; // we force here to 1 or otherwise it gets filled from sg above
132                        }
133
134                } else {
135                        this.isCrystallographic = false;
136                }
137
138                this.numCells = DEF_NUM_CELLS;
139
140        }
141
142        /**
143         * Set the number of neighboring crystal cells that will be used in the search for contacts
144         * @param numCells
145         */
146        public void setNumCells(int numCells) {
147                this.numCells = numCells;
148        }
149
150        private void initialiseVisited() {
151                visited = new ArrayList<CrystalTransform>();
152        }
153
154
155
156        /**
157         * Returns the list of unique interfaces that the given Structure has upon
158         * generation of all crystal symmetry mates. An interface is defined as any pair of chains
159         * that contact, i.e. for which there is at least a pair of atoms (one from each chain) within
160         * the default cutoff distance.
161         * @return
162         * @see #DEFAULT_INTERFACE_DISTANCE_CUTOFF
163         */
164        public StructureInterfaceList getUniqueInterfaces() {
165                return getUniqueInterfaces(DEFAULT_INTERFACE_DISTANCE_CUTOFF);
166        }
167
168        /**
169         * Returns the list of unique interfaces that the given Structure has upon
170         * generation of all crystal symmetry mates. An interface is defined as any pair of chains
171         * that contact, i.e. for which there is at least a pair of atoms (one from each chain) within
172         * the given cutoff distance.
173         * @param cutoff the distance cutoff for 2 chains to be considered in contact
174         * @return
175         */
176        public StructureInterfaceList getUniqueInterfaces(double cutoff) {
177
178
179                StructureInterfaceList set = new StructureInterfaceList();
180
181                // certain structures in the PDB are not macromolecules (contain no polymeric chains at all), e.g. 1ao2
182                // with the current mmCIF parsing, those will be empty since purely non-polymeric chains are removed
183                // see commit e9562781f23da0ebf3547146a307d7edd5741090
184                if (structure.getChains().size()==0) {
185                        logger.warn("No chains present in the structure! No interfaces will be calculated");
186                        return set;
187                }
188
189
190
191                // initialising the visited ArrayList for keeping track of symmetry redundancy
192                initialiseVisited();
193
194
195
196                // the isCrystallographic() condition covers 3 cases:
197                // a) entries with expMethod X-RAY/other diffraction and defined crystalCell (most usual case)
198                // b) entries with expMethod null but defined crystalCell (e.g. PDB file with CRYST1 record but no expMethod annotation)
199                // c) entries with expMethod not X-RAY (e.g. NMR) and defined crystalCell (NMR entries do have a dummy CRYST1 record "1 1 1 90 90 90 P1")
200                // d) isCrystallographic will be false if the structure is crystallographic but the space group was not recognized
201
202
203                calcInterfacesCrystal(set, cutoff);
204
205
206                return set;
207        }
208
209        /**
210         * Calculate interfaces between original asymmetric unit and neighboring
211         * whole unit cells, including the original full unit cell i.e. i=0,j=0,k=0
212         * @param set
213         * @param cutoff
214         */
215        private void calcInterfacesCrystal(StructureInterfaceList set, double cutoff) {
216
217
218                // initialising debugging vars
219                long start = -1;
220                long end = -1;
221                int trialCount = 0;
222                int skippedRedundant = 0;
223                int skippedAUsNoOverlap = 0;
224                int skippedChainsNoOverlap = 0;
225                int skippedSelfEquivalent = 0;
226
227
228                Matrix4d[] ops = null;
229                if (isCrystallographic) {
230                        ops = structure.getCrystallographicInfo().getTransformationsOrthonormal();
231                } else {
232                        ops = new Matrix4d[1];
233                        ops[0] = new Matrix4d(IDENTITY);
234                }
235
236                // The bounding boxes of all AUs of the unit cell
237                UnitCellBoundingBox bbGrid = new UnitCellBoundingBox(numOperatorsSg, numChainsAu);;
238                // we calculate all the bounds of each of the asym units, those will then be reused and translated
239                bbGrid.setBbs(structure, ops, INCLUDE_HETATOMS);
240
241
242                // if not crystallographic there's no search to do in other cells, only chains within "AU" will be checked
243                if (!isCrystallographic) numCells = 0;
244
245                boolean verbose = logger.isDebugEnabled();
246
247                if (verbose) {
248                        trialCount = 0;
249                        start= System.currentTimeMillis();
250                        int neighbors = (2*numCells+1)*(2*numCells+1)*(2*numCells+1)-1;
251                        int auTrials = (numChainsAu*(numChainsAu-1))/2;
252                        int trials = numChainsAu*numOperatorsSg*numChainsAu*neighbors;
253                        logger.debug("Chain clash trials within original AU: "+auTrials);
254                        logger.debug(
255                                        "Chain clash trials between the original AU and the neighbouring "+neighbors+
256                                        " whole unit cells ("+numCells+" neighbours)" +
257                                        "(2x"+numChainsAu+"chains x "+numOperatorsSg+"AUs x "+neighbors+"cells) : "+trials);
258                        logger.debug("Total trials: "+(auTrials+trials));
259                }
260
261
262                for (int a=-numCells;a<=numCells;a++) {
263                        for (int b=-numCells;b<=numCells;b++) {
264                                for (int c=-numCells;c<=numCells;c++) {
265
266                                        Point3i trans = new Point3i(a,b,c);
267                                        Vector3d transOrth = new Vector3d(a,b,c);
268                                        if (a!=0 || b!=0 || c!=0)
269                                                // we avoid doing the transformation for 0,0,0 (in case it's not crystallographic)
270                                                this.crystallographicInfo.getCrystalCell().transfToOrthonormal(transOrth);
271                                        UnitCellBoundingBox bbGridTrans = bbGrid.getTranslatedBbs(transOrth);
272
273                                        for (int n=0;n<numOperatorsSg;n++) {
274
275                                                // short-cut strategies
276                                                // 1) we skip first of all if the bounding boxes of the AUs don't overlap
277                                                if (!bbGrid.getAuBoundingBox(0).overlaps(bbGridTrans.getAuBoundingBox(n), cutoff)) {
278                                                        skippedAUsNoOverlap++;
279                                                        continue;
280                                                }
281
282                                                // 2) we check if we didn't already see its equivalent symmetry operator partner
283                                                CrystalTransform tt = new CrystalTransform(this.crystallographicInfo.getSpaceGroup(), n);
284                                                tt.translate(trans);
285                                                if (isRedundant(tt)) {
286                                                        skippedRedundant++;
287                                                        continue;
288                                                }
289                                                addVisited(tt);
290
291
292                                                boolean selfEquivalent = false;
293
294                                                // 3) an operator can be "self redundant" if it is the inverse of itself (involutory, e.g. all pure 2-folds with no translation)
295                                                if (tt.isEquivalent(tt)) {
296                                                        logger.debug("Transform "+tt+" is equivalent to itself, will skip half of i-chains to j-chains comparisons");
297                                                        // in this case we can't skip the operator, but we can skip half of the matrix comparisons e.g. j>i
298                                                        // we set a flag and do that within the loop below
299                                                        selfEquivalent = true;
300                                                }
301
302                                                StringBuilder builder = null;
303                                                if (verbose) builder = new StringBuilder(tt+" ");
304
305                                                // Now that we know that boxes overlap and operator is not redundant, we have to go to the details
306                                                int contactsFound = 0;
307
308                                                for (int j=0;j<numChainsAu;j++) {
309                                                        for (int i=0;i<numChainsAu;i++) { // we only have to compare the original asymmetric unit to every full cell around
310
311                                                                if(selfEquivalent && (j>i)) {
312                                                                        // in case of self equivalency of the operator we can safely skip half of the matrix
313                                                                        skippedSelfEquivalent++;
314                                                                        continue;
315                                                                }
316                                                                // special case of original AU, we don't compare a chain to itself
317                                                                if (n==0 && a==0 && b==0 && c==0 && i==j) continue;
318
319                                                                // before calculating the AtomContactSet we check for overlap, then we save putting atoms into the grid
320                                                                if (!bbGrid.getChainBoundingBox(0,i).overlaps(bbGridTrans.getChainBoundingBox(n,j),cutoff)) {
321                                                                        skippedChainsNoOverlap++;
322                                                                        if (verbose) {
323                                                                                builder.append(".");
324                                                                        }
325                                                                        continue;
326                                                                }
327                                                                trialCount++;
328
329                                                                // finally we've gone through all short-cuts and the 2 chains seem to be close enough:
330                                                                // we do the calculation of contacts
331                                                                Chain chainj = null;
332                                                                Chain chaini = structure.getChain(i);
333
334                                                                if (n==0 && a==0 && b==0 && c==0) {
335                                                                        chainj = structure.getChain(j);
336                                                                } else {
337                                                                        chainj = (Chain)structure.getChain(j).clone();
338                                                                        Matrix4d m = new Matrix4d(ops[n]);
339                                                                        translate(m, transOrth);
340                                                                        Calc.transform(chainj,m);
341                                                                }
342
343                                                                StructureInterface interf = calcContacts(chaini, chainj, cutoff, tt, builder);
344
345                                                                if (interf!=null) {
346                                                                        contactsFound++;
347                                                                        set.add(interf);
348                                                                }
349                                                        }
350                                                }
351
352                                                if( verbose ) {
353                                                        if (a==0 && b==0 && c==0 && n==0)
354                                                                builder.append(" "+contactsFound+"("+(numChainsAu*(numChainsAu-1))/2+")");
355                                                        else if (selfEquivalent)
356                                                                builder.append(" "+contactsFound+"("+(numChainsAu*(numChainsAu+1))/2+")");
357                                                        else
358                                                                builder.append(" "+contactsFound+"("+numChainsAu*numChainsAu+")");
359
360                                                        logger.debug(builder.toString());
361                                                }
362                                        }
363                                }
364                        }
365                }
366
367                end = System.currentTimeMillis();
368                logger.debug("\n"+trialCount+" chain-chain clash trials done. Time "+(end-start)/1000+"s");
369                logger.debug("  skipped (not overlapping AUs)       : "+skippedAUsNoOverlap);
370                logger.debug("  skipped (not overlapping chains)    : "+skippedChainsNoOverlap);
371                logger.debug("  skipped (sym redundant op pairs)    : "+skippedRedundant);
372                logger.debug("  skipped (sym redundant self op)     : "+skippedSelfEquivalent);
373
374                logger.debug("Found "+set.size()+" interfaces.");
375        }
376
377        private StructureInterface calcContacts(Chain chaini, Chain chainj, double cutoff, CrystalTransform tt, StringBuilder builder) {
378                // note that we don't consider hydrogens when calculating contacts
379                AtomContactSet graph = StructureTools.getAtomsInContact(chaini, chainj, cutoff, INCLUDE_HETATOMS);
380
381                if (graph.size()>0) {
382                        if (builder != null) builder.append("x");
383
384                        CrystalTransform transf = new CrystalTransform(this.crystallographicInfo.getSpaceGroup());
385                        StructureInterface interf = new StructureInterface(
386                                        StructureTools.getAllAtomArray(chaini), StructureTools.getAllAtomArray(chainj),
387                                        chaini.getChainID(), chainj.getChainID(),
388                                        graph,
389                                        transf, tt);
390
391                        return interf;
392
393                } else {
394                        if (builder != null) builder.append("o");
395                        return null;
396                }
397        }
398
399        private void addVisited(CrystalTransform tt) {
400                visited.add(tt);
401        }
402
403        /**
404         * Checks whether given transformId/translation is symmetry redundant
405         * Two transformations are symmetry redundant if their matrices (4d) multiplication gives the identity, i.e.
406         * if one is the inverse of the other.
407         * @param tt
408         * @return
409         */
410        private boolean isRedundant(CrystalTransform tt) {
411
412                Iterator<CrystalTransform> it = visited.iterator();
413                while (it.hasNext()) {
414                        CrystalTransform v = it.next();
415
416                        if (tt.isEquivalent(v)) {
417
418                                logger.debug("Skipping redundant transformation: "+tt+", equivalent to "+v);
419
420                                // there's only 1 possible equivalent partner for each visited matrix
421                                // (since the equivalent is its inverse matrix and the inverse matrix is unique)
422                                // thus once the partner has been seen, we don't need to check it ever again
423                                it.remove();
424
425                                return true;
426                        }
427                }
428
429                return false;
430        }
431
432        public void translate(Matrix4d m, Vector3d translation) {
433                m.m03 = m.m03+translation.x;
434                m.m13 = m.m13+translation.y;
435                m.m23 = m.m23+translation.z;
436        }
437
438//      /**
439//       * If NCS operators are given in MTRIX records, a bigger AU has to be constructed based on those.
440//       * Later they have to be removed with {@link #removeExtraChains()}
441//       */
442//      private void constructFullStructure() {
443//
444//              if (this.crystallographicInfo.getNcsOperators()==null ||
445//                      this.crystallographicInfo.getNcsOperators().length==0) {
446//                      // normal case: nothing to do
447//                      return;
448//              }
449//
450//              // first we store the original chains in a new list to be able to restore the structure to its original state afterwards
451//              origChains = new ArrayList<Chain>();
452//              for (Chain chain:structure.getChains()) {
453//                      origChains.add(chain);
454//              }
455//
456//              // if we are here, it means that the NCS operators exist and we have to complete the given AU by applying them
457//              Matrix4d[] ncsOps = this.crystallographicInfo.getNcsOperators();
458//
459//              if (verbose)
460//                      System.out.println(ncsOps.length+" NCS operators found, generating new AU...");
461//
462//
463//              for (int i=0;i<ncsOps.length;i++) {
464//                      Structure transformedStruct = (Structure)structure.clone();
465//                      Calc.transform(transformedStruct, ncsOps[i]);
466//
467//                      for (Chain chain: transformedStruct.getChains()) {
468//                              // we assign a new AU id (chain ID) consisting in original chain ID + an operator index from 1 to n
469//                              chain.setChainID(chain.getChainID()+(i+1));
470//                              structure.addChain(chain);
471//                      }
472//              }
473//
474//              // now we have more chains in AU, we have to update the value
475//              this.numChainsAu = structure.getChains().size();
476//      }
477//
478//      /**
479//       * Removes the extra chains that were added to the original structure in {@link #constructFullStructure()}
480//       */
481//      private void removeExtraChains() {
482//              structure.setChains(origChains);
483//      }
484}