001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.nbio.structure.quaternary;
023
024import org.biojava.nbio.structure.Calc;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.EntityInfo;
027import org.biojava.nbio.structure.Structure;
028import org.rcsb.cif.schema.mm.PdbxStructAssembly;
029import org.rcsb.cif.schema.mm.PdbxStructAssemblyGen;
030import org.rcsb.cif.schema.mm.PdbxStructOperList;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import javax.vecmath.Matrix4d;
035import java.util.*;
036
037/**
038 * Reconstructs the quaternary structure of a protein from an asymmetric unit
039 *
040 * @author Peter Rose
041 * @author Andreas Prlic
042 * @author Jose Duarte
043 *
044 */
045public class BiologicalAssemblyBuilder {
046
047        private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class);
048
049        /**
050         * The character separating the original chain identifier from the operator id.
051         */
052        public static final String SYM_CHAIN_ID_SEPARATOR = "_";
053
054        /**
055         * The character separating operator ids that are composed.
056         */
057        public static final String COMPOSED_OPERATOR_SEPARATOR = "x";
058
059        private OperatorResolver operatorResolver;
060
061
062        /**
063         * All matrix operators present in _pdbx_struct_oper_list.
064         * Identifiers (_pdbx_struct_oper_list.id) to matrix operators.
065         */
066        private Map<String, Matrix4d> allTransformations;
067
068        private List<String> modelIndex = new ArrayList<>();
069
070        public BiologicalAssemblyBuilder(){
071                init();
072        }
073
074        /**
075         * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations,
076         * by adding symmetry partners as new models.
077         * The output Structure will be different depending on the multiModel parameter:
078         * <ul>
079         * <li>
080         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
081         * the first one are discarded.
082         * </li>
083         * <li>
084         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
085         * originalAsymId_transformId and originalAuthId_transformId)
086         * </li>
087         * </ul>
088         * @param asymUnit
089         * @param transformations
090         * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF),
091         * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB).
092         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
093         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
094         * @return
095         */
096        public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) {
097
098                // ensure that new chains are build in the same order as they appear in the asymmetric unit
099                orderTransformationsByChainId(asymUnit, transformations);
100
101                Structure s = asymUnit.clone();
102
103                Map<Integer, EntityInfo> entityInfoMap = new HashMap<>();
104                // this resets all models (not only the first one): this is important for NMR (multi-model)
105                // like that we can be sure we start with an empty structures and we add models or chains to it
106                s.resetModels();
107                s.setEntityInfos(new ArrayList<>());
108
109                for (BiologicalAssemblyTransformation transformation : transformations){
110
111                        List<Chain> chainsToTransform = new ArrayList<>();
112
113                        // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest
114                        if (useAsymIds) {
115                                Chain c = asymUnit.getChain(transformation.getChainId());
116                                chainsToTransform.add(c);
117                        } else {
118                                Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId());
119                                List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId());
120                                Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId());
121                                if (polyC!=null)
122                                        chainsToTransform.add(polyC);
123                                if (!nonPolyCs.isEmpty())
124                                        chainsToTransform.addAll(nonPolyCs);
125                                if (waterC!=null)
126                                        chainsToTransform.add(waterC);
127                        }
128
129                        for (Chain c: chainsToTransform) {
130
131                                Chain chain = (Chain)c.clone();
132
133                                Calc.transform(chain, transformation.getTransformationMatrix());
134
135                                String transformId = transformation.getId();
136
137                                // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure
138
139                                if (multiModel)
140                                        addChainMultiModel(s, chain, transformId);
141                                else
142                                        addChainFlattened(s, chain, transformId);
143
144                                EntityInfo entityInfo;
145                                if (!entityInfoMap.containsKey(chain.getEntityInfo().getMolId())) {
146                                        entityInfo = new EntityInfo(chain.getEntityInfo());
147                                        entityInfoMap.put(chain.getEntityInfo().getMolId(), entityInfo);
148                                        s.addEntityInfo(entityInfo);
149                                } else {
150                                        entityInfo = entityInfoMap.get(chain.getEntityInfo().getMolId());
151                                }
152                                chain.setEntityInfo(entityInfo);
153                                entityInfo.addChain(chain);
154
155                        }
156                }
157
158                s.setBiologicalAssembly(true);
159                return s;
160        }
161
162        /**
163         * Orders model transformations by chain ids in the same order as in the asymmetric unit
164         * @param asymUnit
165         * @param transformations
166         */
167        private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) {
168                final List<String> chainIds = getChainIds(asymUnit);
169                Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() {
170                        @Override
171                        public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) {
172                                // set sort order only if the two ids are identical
173                                if (t1.getId().equals(t2.getId())) {
174                                         return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId());
175                                } else {
176                                        return t1.getId().compareTo(t2.getId());
177                                }
178                    }
179                });
180        }
181
182        /**
183         * Returns a list of chain ids in the order they are specified in the ATOM
184         * records in the asymmetric unit
185         * @param asymUnit
186         * @return
187         */
188        private List<String> getChainIds(Structure asymUnit) {
189                List<String> chainIds = new ArrayList<>();
190                for ( Chain c : asymUnit.getChains()){
191                        String intChainID = c.getId();
192                        chainIds.add(intChainID);
193                }
194                return chainIds;
195        }
196
197        /**
198         * Adds a chain to the given structure to form a biological assembly,
199         * adding the symmetry expanded chains as new models per transformId.
200         * @param s
201         * @param newChain
202         * @param transformId
203         */
204        private void addChainMultiModel(Structure s, Chain newChain, String transformId) {
205
206                // multi-model bioassembly
207
208                if ( modelIndex.size() == 0)
209                        modelIndex.add("PLACEHOLDER FOR ASYM UNIT");
210
211                int modelCount = modelIndex.indexOf(transformId);
212                if ( modelCount == -1)  {
213                        modelIndex.add(transformId);
214                        modelCount = modelIndex.indexOf(transformId);
215                }
216
217                if (modelCount == 0) {
218                        s.addChain(newChain);
219                } else if (modelCount > s.nrModels()) {
220                        List<Chain> newModel = new ArrayList<>();
221                        newModel.add(newChain);
222                        s.addModel(newModel);
223                } else {
224                        s.addChain(newChain, modelCount-1);
225                }
226
227        }
228
229        /**
230         * Adds a chain to the given structure to form a biological assembly,
231         * adding the symmetry-expanded chains as new chains with renamed
232         * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId).
233         * @param s
234         * @param newChain
235         * @param transformId
236         */
237        private void addChainFlattened(Structure s, Chain newChain, String transformId) {
238                newChain.setId(newChain.getId()+SYM_CHAIN_ID_SEPARATOR+transformId);
239                newChain.setName(newChain.getName()+SYM_CHAIN_ID_SEPARATOR+transformId);
240                s.addChain(newChain);
241        }
242
243        /**
244         * Returns a list of transformation matrices for the generation of a macromolecular
245         * assembly for the specified assembly Id.
246         *
247         * @param pdbxStructAssembly
248         * @param assemblyIndex
249         * @param pdbxStructAssemblyGen
250         * @param pdbxStructOperList
251         * @return list of transformation matrices to generate macromolecular assembly
252         */
253        public List<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly pdbxStructAssembly,
254                                                                                                                                                           int assemblyIndex,
255                                                                                                                                                           PdbxStructAssemblyGen pdbxStructAssemblyGen,
256                                                                                                                                                           PdbxStructOperList pdbxStructOperList) {
257                init();
258
259                // first we populate the list of all operators from pdbx_struct_oper_list so that we can then
260                // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators()
261                for (int i = 0; i < pdbxStructOperList.getRowCount(); i++) {
262                        try {
263                                Matrix4d m = new Matrix4d();
264                                m.m00 = pdbxStructOperList.getMatrix11().get(i);
265                                m.m01 = pdbxStructOperList.getMatrix12().get(i);
266                                m.m02 = pdbxStructOperList.getMatrix13().get(i);
267
268                                m.m10 = pdbxStructOperList.getMatrix21().get(i);
269                                m.m11 = pdbxStructOperList.getMatrix22().get(i);
270                                m.m12 = pdbxStructOperList.getMatrix23().get(i);
271
272                                m.m20 = pdbxStructOperList.getMatrix31().get(i);
273                                m.m21 = pdbxStructOperList.getMatrix32().get(i);
274                                m.m22 = pdbxStructOperList.getMatrix33().get(i);
275
276                                m.m03 = pdbxStructOperList.getVector1().get(i);
277                                m.m13 = pdbxStructOperList.getVector2().get(i);
278                                m.m23 = pdbxStructOperList.getVector3().get(i);
279
280                                m.m30 = 0;
281                                m.m31 = 0;
282                                m.m32 = 0;
283                                m.m33 = 1;
284
285                                allTransformations.put(pdbxStructOperList.getId().get(i), m);
286                        } catch (NumberFormatException e) {
287                                logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", pdbxStructOperList.getId().get(i), e.getMessage());
288                        }
289                }
290
291                String assemblyId = pdbxStructAssembly.getId().get(assemblyIndex);
292                ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(assemblyId, pdbxStructAssemblyGen);
293                transformations.addAll(getBioUnitTransformationsListBinaryOperators(assemblyId, pdbxStructAssemblyGen));
294                transformations.trimToSize();
295                return transformations;
296        }
297
298        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) {
299                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>();
300                List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators();
301
302                for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) {
303                        if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) {
304                                continue;
305                        }
306
307                        String[] asymIds= pdbxStructAssemblyGen.getAsymIdList().get(i).split(",");
308                        operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i));
309
310                        // apply binary operators to the specified chains
311                        // Example 1M4X: generates all products of transformation matrices (1-60)(61-88)
312                        for (String chainId : asymIds) {
313                                for (OrderedPair<String> operator : operators) {
314                                        Matrix4d original1 = allTransformations.get(operator.getElement1());
315                                        Matrix4d original2 = allTransformations.get(operator.getElement2());
316                                        if (original1 == null || original2 == null) {
317                                                logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId);
318                                                continue;
319                                        }
320                                        Matrix4d composed = new Matrix4d(original1);
321                                        composed.mul(original2);
322                                        BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
323                                        transform.setChainId(chainId);
324                                        transform.setId(operator.getElement1() + COMPOSED_OPERATOR_SEPARATOR + operator.getElement2());
325                                        transform.setTransformationMatrix(composed);
326                                        transformations.add(transform);
327                                }
328                        }
329                }
330
331                return transformations;
332        }
333
334        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) {
335                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>();
336
337                for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) {
338                        if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) {
339                                continue;
340                        }
341
342                        operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i));
343                        List<String> operators = operatorResolver.getUnaryOperators();
344                        String[] asymIds = pdbxStructAssemblyGen.getAsymIdList().get(i).split(",");
345
346                        // apply unary operators to the specified chains
347                        for (String chainId : asymIds) {
348                                for (String operator : operators) {
349                                        Matrix4d original = allTransformations.get(operator);
350                                        if (original == null) {
351                                                logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId);
352                                                continue;
353                                        }
354                                        BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
355                                        transform.setChainId(chainId);
356                                        transform.setId(operator);
357                                        transform.setTransformationMatrix(original);
358                                        transformations.add(transform);
359                                }
360                        }
361                }
362
363                return transformations;
364        }
365
366        private void init() {
367                operatorResolver = new OperatorResolver();
368                allTransformations = new HashMap<>();
369        }
370}