001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.nbio.structure.quaternary;
023
024import org.biojava.nbio.structure.Calc;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.EntityInfo;
027import org.biojava.nbio.structure.Structure;
028import org.rcsb.cif.schema.mm.PdbxStructAssembly;
029import org.rcsb.cif.schema.mm.PdbxStructAssemblyGen;
030import org.rcsb.cif.schema.mm.PdbxStructOperList;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import javax.vecmath.Matrix4d;
035import java.util.*;
036
037/**
038 * Reconstructs the quaternary structure of a protein from an asymmetric unit
039 *
040 * @author Peter Rose
041 * @author Andreas Prlic
042 * @author Jose Duarte
043 *
044 */
045public class BiologicalAssemblyBuilder {
046
047        private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class);
048
049        /**
050         * The character separating the original chain identifier from the operator id.
051         */
052        public static final String SYM_CHAIN_ID_SEPARATOR = "_";
053
054        /**
055         * The character separating operator ids that are composed.
056         */
057        public static final String COMPOSED_OPERATOR_SEPARATOR = "x";
058
059        private OperatorResolver operatorResolver;
060
061
062        /**
063         * All matrix operators present in _pdbx_struct_oper_list.
064         * Identifiers (_pdbx_struct_oper_list.id) to matrix operators.
065         */
066        private Map<String, Matrix4d> allTransformations;
067
068        private List<String> modelIndex = new ArrayList<>();
069
070        public BiologicalAssemblyBuilder(){
071                init();
072        }
073
074        /**
075         * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations,
076         * by adding symmetry partners as new models.
077         * The output Structure will be different depending on the multiModel parameter:
078         * <li>
079         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
080         * the first one are discarded.
081         * </li>
082         * <li>
083         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
084         * originalAsymId_transformId and originalAuthId_transformId)
085         * </li>
086         * @param asymUnit
087         * @param transformations
088         * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF),
089         * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB).
090         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
091         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
092         * @return
093         */
094        public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) {
095
096                // ensure that new chains are build in the same order as they appear in the asymmetric unit
097                orderTransformationsByChainId(asymUnit, transformations);
098
099                Structure s = asymUnit.clone();
100
101                Map<Integer, EntityInfo> entityInfoMap = new HashMap<>();
102                // this resets all models (not only the first one): this is important for NMR (multi-model)
103                // like that we can be sure we start with an empty structures and we add models or chains to it
104                s.resetModels();
105                s.setEntityInfos(new ArrayList<>());
106
107                for (BiologicalAssemblyTransformation transformation : transformations){
108
109                        List<Chain> chainsToTransform = new ArrayList<>();
110
111                        // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest
112                        if (useAsymIds) {
113                                Chain c = asymUnit.getChain(transformation.getChainId());
114                                chainsToTransform.add(c);
115                        } else {
116                                Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId());
117                                List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId());
118                                Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId());
119                                if (polyC!=null)
120                                        chainsToTransform.add(polyC);
121                                if (!nonPolyCs.isEmpty())
122                                        chainsToTransform.addAll(nonPolyCs);
123                                if (waterC!=null)
124                                        chainsToTransform.add(waterC);
125                        }
126
127                        for (Chain c: chainsToTransform) {
128
129                                Chain chain = (Chain)c.clone();
130
131                                Calc.transform(chain, transformation.getTransformationMatrix());
132
133                                String transformId = transformation.getId();
134
135                                // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure
136
137                                if (multiModel)
138                                        addChainMultiModel(s, chain, transformId);
139                                else
140                                        addChainFlattened(s, chain, transformId);
141
142                                EntityInfo entityInfo;
143                                if (!entityInfoMap.containsKey(chain.getEntityInfo().getMolId())) {
144                                        entityInfo = new EntityInfo(chain.getEntityInfo());
145                                        entityInfoMap.put(chain.getEntityInfo().getMolId(), entityInfo);
146                                        s.addEntityInfo(entityInfo);
147                                } else {
148                                        entityInfo = entityInfoMap.get(chain.getEntityInfo().getMolId());
149                                }
150                                chain.setEntityInfo(entityInfo);
151                                entityInfo.addChain(chain);
152
153                        }
154                }
155
156                s.setBiologicalAssembly(true);
157                return s;
158        }
159
160        /**
161         * Orders model transformations by chain ids in the same order as in the asymmetric unit
162         * @param asymUnit
163         * @param transformations
164         */
165        private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) {
166                final List<String> chainIds = getChainIds(asymUnit);
167                Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() {
168                        @Override
169                        public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) {
170                                // set sort order only if the two ids are identical
171                                if (t1.getId().equals(t2.getId())) {
172                                         return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId());
173                                } else {
174                                        return t1.getId().compareTo(t2.getId());
175                                }
176                    }
177                });
178        }
179
180        /**
181         * Returns a list of chain ids in the order they are specified in the ATOM
182         * records in the asymmetric unit
183         * @param asymUnit
184         * @return
185         */
186        private List<String> getChainIds(Structure asymUnit) {
187                List<String> chainIds = new ArrayList<String>();
188                for ( Chain c : asymUnit.getChains()){
189                        String intChainID = c.getId();
190                        chainIds.add(intChainID);
191                }
192                return chainIds;
193        }
194
195        /**
196         * Adds a chain to the given structure to form a biological assembly,
197         * adding the symmetry expanded chains as new models per transformId.
198         * @param s
199         * @param newChain
200         * @param transformId
201         */
202        private void addChainMultiModel(Structure s, Chain newChain, String transformId) {
203
204                // multi-model bioassembly
205
206                if ( modelIndex.size() == 0)
207                        modelIndex.add("PLACEHOLDER FOR ASYM UNIT");
208
209                int modelCount = modelIndex.indexOf(transformId);
210                if ( modelCount == -1)  {
211                        modelIndex.add(transformId);
212                        modelCount = modelIndex.indexOf(transformId);
213                }
214
215                if (modelCount == 0) {
216                        s.addChain(newChain);
217                } else if (modelCount > s.nrModels()) {
218                        List<Chain> newModel = new ArrayList<>();
219                        newModel.add(newChain);
220                        s.addModel(newModel);
221                } else {
222                        s.addChain(newChain, modelCount-1);
223                }
224
225        }
226
227        /**
228         * Adds a chain to the given structure to form a biological assembly,
229         * adding the symmetry-expanded chains as new chains with renamed
230         * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId).
231         * @param s
232         * @param newChain
233         * @param transformId
234         */
235        private void addChainFlattened(Structure s, Chain newChain, String transformId) {
236                newChain.setId(newChain.getId()+SYM_CHAIN_ID_SEPARATOR+transformId);
237                newChain.setName(newChain.getName()+SYM_CHAIN_ID_SEPARATOR+transformId);
238                s.addChain(newChain);
239        }
240
241        /**
242         * Returns a list of transformation matrices for the generation of a macromolecular
243         * assembly for the specified assembly Id.
244         *
245         * @param pdbxStructAssembly
246         * @param assemblyIndex
247         * @param pdbxStructAssemblyGen
248         * @param pdbxStructOperList
249         * @return list of transformation matrices to generate macromolecular assembly
250         */
251        public List<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly pdbxStructAssembly,
252                                                                                                                                                           int assemblyIndex,
253                                                                                                                                                           PdbxStructAssemblyGen pdbxStructAssemblyGen,
254                                                                                                                                                           PdbxStructOperList pdbxStructOperList) {
255                init();
256
257                // first we populate the list of all operators from pdbx_struct_oper_list so that we can then
258                // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators()
259                for (int i = 0; i < pdbxStructOperList.getRowCount(); i++) {
260                        try {
261                                Matrix4d m = new Matrix4d();
262                                m.m00 = pdbxStructOperList.getMatrix11().get(i);
263                                m.m01 = pdbxStructOperList.getMatrix12().get(i);
264                                m.m02 = pdbxStructOperList.getMatrix13().get(i);
265
266                                m.m10 = pdbxStructOperList.getMatrix21().get(i);
267                                m.m11 = pdbxStructOperList.getMatrix22().get(i);
268                                m.m12 = pdbxStructOperList.getMatrix23().get(i);
269
270                                m.m20 = pdbxStructOperList.getMatrix31().get(i);
271                                m.m21 = pdbxStructOperList.getMatrix32().get(i);
272                                m.m22 = pdbxStructOperList.getMatrix33().get(i);
273
274                                m.m03 = pdbxStructOperList.getVector1().get(i);
275                                m.m13 = pdbxStructOperList.getVector2().get(i);
276                                m.m23 = pdbxStructOperList.getVector3().get(i);
277
278                                m.m30 = 0;
279                                m.m31 = 0;
280                                m.m32 = 0;
281                                m.m33 = 1;
282
283                                allTransformations.put(pdbxStructOperList.getId().get(i), m);
284                        } catch (NumberFormatException e) {
285                                logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", pdbxStructOperList.getId().get(i), e.getMessage());
286                        }
287                }
288
289                String assemblyId = pdbxStructAssembly.getId().get(assemblyIndex);
290                ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(assemblyId, pdbxStructAssemblyGen);
291                transformations.addAll(getBioUnitTransformationsListBinaryOperators(assemblyId, pdbxStructAssemblyGen));
292                transformations.trimToSize();
293                return transformations;
294        }
295
296        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) {
297                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>();
298                List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators();
299
300                for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) {
301                        if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) {
302                                continue;
303                        }
304
305                        String[] asymIds= pdbxStructAssemblyGen.getAsymIdList().get(i).split(",");
306                        operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i));
307
308                        // apply binary operators to the specified chains
309                        // Example 1M4X: generates all products of transformation matrices (1-60)(61-88)
310                        for (String chainId : asymIds) {
311                                for (OrderedPair<String> operator : operators) {
312                                        Matrix4d original1 = allTransformations.get(operator.getElement1());
313                                        Matrix4d original2 = allTransformations.get(operator.getElement2());
314                                        if (original1 == null || original2 == null) {
315                                                logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId);
316                                                continue;
317                                        }
318                                        Matrix4d composed = new Matrix4d(original1);
319                                        composed.mul(original2);
320                                        BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
321                                        transform.setChainId(chainId);
322                                        transform.setId(operator.getElement1() + COMPOSED_OPERATOR_SEPARATOR + operator.getElement2());
323                                        transform.setTransformationMatrix(composed);
324                                        transformations.add(transform);
325                                }
326                        }
327                }
328
329                return transformations;
330        }
331
332        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) {
333                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>();
334
335                for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) {
336                        if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) {
337                                continue;
338                        }
339
340                        operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i));
341                        List<String> operators = operatorResolver.getUnaryOperators();
342                        String[] asymIds = pdbxStructAssemblyGen.getAsymIdList().get(i).split(",");
343
344                        // apply unary operators to the specified chains
345                        for (String chainId : asymIds) {
346                                for (String operator : operators) {
347                                        Matrix4d original = allTransformations.get(operator);
348                                        if (original == null) {
349                                                logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId);
350                                                continue;
351                                        }
352                                        BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
353                                        transform.setChainId(chainId);
354                                        transform.setId(operator);
355                                        transform.setTransformationMatrix(original);
356                                        transformations.add(transform);
357                                }
358                        }
359                }
360
361                return transformations;
362        }
363
364        private void init() {
365                operatorResolver = new OperatorResolver();
366                allTransformations = new HashMap<>();
367        }
368}