001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021
022package org.biojava.nbio.structure.quaternary;
023
024import org.biojava.nbio.structure.Calc;
025import org.biojava.nbio.structure.Chain;
026import org.biojava.nbio.structure.EntityInfo;
027import org.biojava.nbio.structure.Structure;
028import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
029import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
030import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import javax.vecmath.Matrix4d;
035import java.util.*;
036
037/**
038 * Reconstructs the quaternary structure of a protein from an asymmetric unit
039 *
040 * @author Peter Rose
041 * @author Andreas Prlic
042 * @author Jose Duarte
043 *
044 */
045public class BiologicalAssemblyBuilder {
046
047        private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class);
048
049        /**
050         * The character separating the original chain identifier from the operator id.
051         */
052        public static final String SYM_CHAIN_ID_SEPARATOR = "_";
053
054        /**
055         * The character separating operator ids that are composed.
056         */
057        public static final String COMPOSED_OPERATOR_SEPARATOR = "x";
058
059        private OperatorResolver operatorResolver;
060
061
062        /**
063         * All matrix operators present in _pdbx_struct_oper_list.
064         * Identifiers (_pdbx_struct_oper_list.id) to matrix operators.
065         */
066        private Map<String, Matrix4d> allTransformations;
067
068        private List<String> modelIndex = new ArrayList<>();
069
070        public BiologicalAssemblyBuilder(){
071                init();
072        }
073
074        /**
075         * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations,
076         * by adding symmetry partners as new models.
077         * The output Structure will be different depending on the multiModel parameter:
078         * <li>
079         * the symmetry-expanded chains are added as new models, one per transformId. All original models but
080         * the first one are discarded.
081         * </li>
082         * <li>
083         * as original with symmetry-expanded chains added with renamed chain ids and names (in the form
084         * originalAsymId_transformId and originalAuthId_transformId)
085         * </li>
086         * @param asymUnit
087         * @param transformations
088         * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF),
089         * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB).
090         * @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
091         * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
092         * @return
093         */
094        public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) {
095
096                // ensure that new chains are build in the same order as they appear in the asymmetric unit
097                orderTransformationsByChainId(asymUnit, transformations);
098
099                Structure s = asymUnit.clone();
100
101                Map<Integer, EntityInfo> entityInfoMap = new HashMap<>();
102                // this resets all models (not only the first one): this is important for NMR (multi-model)
103                // like that we can be sure we start with an empty structures and we add models or chains to it
104                s.resetModels();
105                s.setEntityInfos(new ArrayList<>());
106
107                for (BiologicalAssemblyTransformation transformation : transformations){
108
109                        List<Chain> chainsToTransform = new ArrayList<>();
110
111                        // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest
112                        if (useAsymIds) {
113                                Chain c = asymUnit.getChain(transformation.getChainId());
114                                chainsToTransform.add(c);
115                        } else {
116                                Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId());
117                                List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId());
118                                Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId());
119                                if (polyC!=null)
120                                        chainsToTransform.add(polyC);
121                                if (!nonPolyCs.isEmpty())
122                                        chainsToTransform.addAll(nonPolyCs);
123                                if (waterC!=null)
124                                        chainsToTransform.add(waterC);
125                        }
126
127                        for (Chain c: chainsToTransform) {
128
129                                Chain chain = (Chain)c.clone();
130
131                                Calc.transform(chain, transformation.getTransformationMatrix());
132
133                                String transformId = transformation.getId();
134
135                                // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure
136
137                                if (multiModel)
138                                        addChainMultiModel(s, chain, transformId);
139                                else
140                                        addChainFlattened(s, chain, transformId);
141
142                                EntityInfo entityInfo;
143                                if (!entityInfoMap.containsKey(chain.getEntityInfo().getMolId())) {
144                                        entityInfo = new EntityInfo(chain.getEntityInfo());
145                                        entityInfoMap.put(chain.getEntityInfo().getMolId(), entityInfo);
146                                        s.addEntityInfo(entityInfo);
147                                } else {
148                                        entityInfo = entityInfoMap.get(chain.getEntityInfo().getMolId());
149                                }
150                                chain.setEntityInfo(entityInfo);
151                                entityInfo.addChain(chain);
152
153                        }
154                }
155
156                s.setBiologicalAssembly(true);
157                return s;
158        }
159
160        /**
161         * Orders model transformations by chain ids in the same order as in the asymmetric unit
162         * @param asymUnit
163         * @param transformations
164         */
165        private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) {
166                final List<String> chainIds = getChainIds(asymUnit);
167                Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() {
168                        @Override
169                        public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) {
170                                // set sort order only if the two ids are identical
171                                if (t1.getId().equals(t2.getId())) {
172                                         return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId());
173                                } else {
174                                        return t1.getId().compareTo(t2.getId());
175                                }
176                    }
177                });
178        }
179
180        /**
181         * Returns a list of chain ids in the order they are specified in the ATOM
182         * records in the asymmetric unit
183         * @param asymUnit
184         * @return
185         */
186        private List<String> getChainIds(Structure asymUnit) {
187                List<String> chainIds = new ArrayList<String>();
188                for ( Chain c : asymUnit.getChains()){
189                        String intChainID = c.getId();
190                        chainIds.add(intChainID);
191                }
192                return chainIds;
193        }
194
195        /**
196         * Adds a chain to the given structure to form a biological assembly,
197         * adding the symmetry expanded chains as new models per transformId.
198         * @param s
199         * @param newChain
200         * @param transformId
201         */
202        private void addChainMultiModel(Structure s, Chain newChain, String transformId) {
203
204                // multi-model bioassembly
205
206                if ( modelIndex.size() == 0)
207                        modelIndex.add("PLACEHOLDER FOR ASYM UNIT");
208
209                int modelCount = modelIndex.indexOf(transformId);
210                if ( modelCount == -1)  {
211                        modelIndex.add(transformId);
212                        modelCount = modelIndex.indexOf(transformId);
213                }
214
215                if (modelCount == 0) {
216                        s.addChain(newChain);
217                } else if (modelCount > s.nrModels()) {
218                        List<Chain> newModel = new ArrayList<>();
219                        newModel.add(newChain);
220                        s.addModel(newModel);
221                } else {
222                        s.addChain(newChain, modelCount-1);
223                }
224
225        }
226
227        /**
228         * Adds a chain to the given structure to form a biological assembly,
229         * adding the symmetry-expanded chains as new chains with renamed
230         * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId).
231         * @param s
232         * @param newChain
233         * @param transformId
234         */
235        private void addChainFlattened(Structure s, Chain newChain, String transformId) {
236                newChain.setId(newChain.getId()+SYM_CHAIN_ID_SEPARATOR+transformId);
237                newChain.setName(newChain.getName()+SYM_CHAIN_ID_SEPARATOR+transformId);
238                s.addChain(newChain);
239        }
240
241        /**
242         * Returns a list of transformation matrices for the generation of a macromolecular
243         * assembly for the specified assembly Id.
244         *
245         * @param psa
246         * @param psags
247         * @param operators
248         * @return list of transformation matrices to generate macromolecular assembly
249         */
250        public ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly psa, List<PdbxStructAssemblyGen> psags, List<PdbxStructOperList> operators) {
251                init();
252
253                // first we populate the list of all operators from pdbx_struct_oper_list so that we can then
254                // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators()
255                for (PdbxStructOperList oper: operators){
256                        try {
257                                Matrix4d m = new Matrix4d();
258                                m.m00 = Double.parseDouble(oper.getMatrix11());
259                                m.m01 = Double.parseDouble(oper.getMatrix12());
260                                m.m02 = Double.parseDouble(oper.getMatrix13());
261
262                                m.m10 = Double.parseDouble(oper.getMatrix21());
263                                m.m11 = Double.parseDouble(oper.getMatrix22());
264                                m.m12 = Double.parseDouble(oper.getMatrix23());
265
266                                m.m20 = Double.parseDouble(oper.getMatrix31());
267                                m.m21 = Double.parseDouble(oper.getMatrix32());
268                                m.m22 = Double.parseDouble(oper.getMatrix33());
269
270                                m.m03 = Double.parseDouble(oper.getVector1());
271                                m.m13 = Double.parseDouble(oper.getVector2());
272                                m.m23 = Double.parseDouble(oper.getVector3());
273
274                                m.m30 = 0;
275                                m.m31 = 0;
276                                m.m32 = 0;
277                                m.m33 = 1;
278
279                                allTransformations.put(oper.getId(), m);
280
281                        } catch (NumberFormatException e) {
282                                logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", oper.getId(), e.getMessage());
283                        }
284                }
285
286                ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(psa.getId(), psags);
287                transformations.addAll(getBioUnitTransformationsListBinaryOperators(psa.getId(), psags));
288                transformations.trimToSize();
289                return transformations;
290        }
291
292
293        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) {
294
295                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>();
296
297                List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators();
298
299
300                for ( PdbxStructAssemblyGen psag : psags){
301                        if ( psag.getAssembly_id().equals(assemblyId)) {
302
303                                List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(","));
304
305                                operatorResolver.parseOperatorExpressionString(psag.getOper_expression());
306
307                                // apply binary operators to the specified chains
308                                // Example 1M4X: generates all products of transformation matrices (1-60)(61-88)
309                                for (String chainId : asymIds) {
310
311                                        for (OrderedPair<String> operator : operators) {
312                                                Matrix4d original1 = allTransformations.get(operator.getElement1());
313                                                Matrix4d original2 = allTransformations.get(operator.getElement2());
314                                                if (original1 == null || original2 == null) {
315                                                        logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId);
316                                                        continue;
317                                                }
318                                                Matrix4d composed = new Matrix4d(original1);
319                                                composed.mul(original2);
320                                                BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
321                                                transform.setChainId(chainId);
322                                                transform.setId(operator.getElement1() + COMPOSED_OPERATOR_SEPARATOR + operator.getElement2());
323                                                transform.setTransformationMatrix(composed);
324                                                transformations.add(transform);
325                                        }
326                                }
327                        }
328
329                }
330
331                return transformations;
332        }
333
334        private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) {
335                ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<BiologicalAssemblyTransformation>();
336
337
338                for ( PdbxStructAssemblyGen psag : psags){
339                        if ( psag.getAssembly_id().equals(assemblyId)) {
340
341                                operatorResolver.parseOperatorExpressionString(psag.getOper_expression());
342                                List<String> operators = operatorResolver.getUnaryOperators();
343
344                                List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(","));
345
346                                // apply unary operators to the specified chains
347                                for (String chainId : asymIds) {
348                                        for (String operator : operators) {
349                                                Matrix4d original = allTransformations.get(operator);
350                                                if (original == null) {
351                                                        logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId);
352                                                        continue;
353                                                }
354                                                BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation();
355                                                transform.setChainId(chainId);
356                                                transform.setId(operator);
357                                                transform.setTransformationMatrix(original);
358                                                transformations.add(transform);
359                                        }
360                                }
361                        }
362                }
363
364                return transformations;
365        }
366
367        private void init(){
368                operatorResolver= new OperatorResolver();
369                allTransformations = new HashMap<>();
370        }
371}