001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.nbio.structure.quaternary; 023 024import org.biojava.nbio.structure.Calc; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.EntityInfo; 027import org.biojava.nbio.structure.Structure; 028import org.rcsb.cif.schema.mm.PdbxStructAssembly; 029import org.rcsb.cif.schema.mm.PdbxStructAssemblyGen; 030import org.rcsb.cif.schema.mm.PdbxStructOperList; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import javax.vecmath.Matrix4d; 035import java.util.*; 036 037/** 038 * Reconstructs the quaternary structure of a protein from an asymmetric unit 039 * 040 * @author Peter Rose 041 * @author Andreas Prlic 042 * @author Jose Duarte 043 * 044 */ 045public class BiologicalAssemblyBuilder { 046 047 private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class); 048 049 /** 050 * The character separating the original chain identifier from the operator id. 051 */ 052 public static final String SYM_CHAIN_ID_SEPARATOR = "_"; 053 054 /** 055 * The character separating operator ids that are composed. 056 */ 057 public static final String COMPOSED_OPERATOR_SEPARATOR = "x"; 058 059 private OperatorResolver operatorResolver; 060 061 062 /** 063 * All matrix operators present in _pdbx_struct_oper_list. 064 * Identifiers (_pdbx_struct_oper_list.id) to matrix operators. 065 */ 066 private Map<String, Matrix4d> allTransformations; 067 068 private List<String> modelIndex = new ArrayList<>(); 069 070 public BiologicalAssemblyBuilder(){ 071 init(); 072 } 073 074 /** 075 * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations, 076 * by adding symmetry partners as new models. 077 * The output Structure will be different depending on the multiModel parameter: 078 * <li> 079 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 080 * the first one are discarded. 081 * </li> 082 * <li> 083 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 084 * originalAsymId_transformId and originalAuthId_transformId) 085 * </li> 086 * @param asymUnit 087 * @param transformations 088 * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF), 089 * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB). 090 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 091 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 092 * @return 093 */ 094 public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) { 095 096 // ensure that new chains are build in the same order as they appear in the asymmetric unit 097 orderTransformationsByChainId(asymUnit, transformations); 098 099 Structure s = asymUnit.clone(); 100 101 Map<Integer, EntityInfo> entityInfoMap = new HashMap<>(); 102 // this resets all models (not only the first one): this is important for NMR (multi-model) 103 // like that we can be sure we start with an empty structures and we add models or chains to it 104 s.resetModels(); 105 s.setEntityInfos(new ArrayList<>()); 106 107 for (BiologicalAssemblyTransformation transformation : transformations){ 108 109 List<Chain> chainsToTransform = new ArrayList<>(); 110 111 // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest 112 if (useAsymIds) { 113 Chain c = asymUnit.getChain(transformation.getChainId()); 114 chainsToTransform.add(c); 115 } else { 116 Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId()); 117 List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId()); 118 Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId()); 119 if (polyC!=null) 120 chainsToTransform.add(polyC); 121 if (!nonPolyCs.isEmpty()) 122 chainsToTransform.addAll(nonPolyCs); 123 if (waterC!=null) 124 chainsToTransform.add(waterC); 125 } 126 127 for (Chain c: chainsToTransform) { 128 129 Chain chain = (Chain)c.clone(); 130 131 Calc.transform(chain, transformation.getTransformationMatrix()); 132 133 String transformId = transformation.getId(); 134 135 // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure 136 137 if (multiModel) 138 addChainMultiModel(s, chain, transformId); 139 else 140 addChainFlattened(s, chain, transformId); 141 142 EntityInfo entityInfo; 143 if (!entityInfoMap.containsKey(chain.getEntityInfo().getMolId())) { 144 entityInfo = new EntityInfo(chain.getEntityInfo()); 145 entityInfoMap.put(chain.getEntityInfo().getMolId(), entityInfo); 146 s.addEntityInfo(entityInfo); 147 } else { 148 entityInfo = entityInfoMap.get(chain.getEntityInfo().getMolId()); 149 } 150 chain.setEntityInfo(entityInfo); 151 entityInfo.addChain(chain); 152 153 } 154 } 155 156 s.setBiologicalAssembly(true); 157 return s; 158 } 159 160 /** 161 * Orders model transformations by chain ids in the same order as in the asymmetric unit 162 * @param asymUnit 163 * @param transformations 164 */ 165 private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) { 166 final List<String> chainIds = getChainIds(asymUnit); 167 Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() { 168 @Override 169 public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) { 170 // set sort order only if the two ids are identical 171 if (t1.getId().equals(t2.getId())) { 172 return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId()); 173 } else { 174 return t1.getId().compareTo(t2.getId()); 175 } 176 } 177 }); 178 } 179 180 /** 181 * Returns a list of chain ids in the order they are specified in the ATOM 182 * records in the asymmetric unit 183 * @param asymUnit 184 * @return 185 */ 186 private List<String> getChainIds(Structure asymUnit) { 187 List<String> chainIds = new ArrayList<String>(); 188 for ( Chain c : asymUnit.getChains()){ 189 String intChainID = c.getId(); 190 chainIds.add(intChainID); 191 } 192 return chainIds; 193 } 194 195 /** 196 * Adds a chain to the given structure to form a biological assembly, 197 * adding the symmetry expanded chains as new models per transformId. 198 * @param s 199 * @param newChain 200 * @param transformId 201 */ 202 private void addChainMultiModel(Structure s, Chain newChain, String transformId) { 203 204 // multi-model bioassembly 205 206 if ( modelIndex.size() == 0) 207 modelIndex.add("PLACEHOLDER FOR ASYM UNIT"); 208 209 int modelCount = modelIndex.indexOf(transformId); 210 if ( modelCount == -1) { 211 modelIndex.add(transformId); 212 modelCount = modelIndex.indexOf(transformId); 213 } 214 215 if (modelCount == 0) { 216 s.addChain(newChain); 217 } else if (modelCount > s.nrModels()) { 218 List<Chain> newModel = new ArrayList<>(); 219 newModel.add(newChain); 220 s.addModel(newModel); 221 } else { 222 s.addChain(newChain, modelCount-1); 223 } 224 225 } 226 227 /** 228 * Adds a chain to the given structure to form a biological assembly, 229 * adding the symmetry-expanded chains as new chains with renamed 230 * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId). 231 * @param s 232 * @param newChain 233 * @param transformId 234 */ 235 private void addChainFlattened(Structure s, Chain newChain, String transformId) { 236 newChain.setId(newChain.getId()+SYM_CHAIN_ID_SEPARATOR+transformId); 237 newChain.setName(newChain.getName()+SYM_CHAIN_ID_SEPARATOR+transformId); 238 s.addChain(newChain); 239 } 240 241 /** 242 * Returns a list of transformation matrices for the generation of a macromolecular 243 * assembly for the specified assembly Id. 244 * 245 * @param pdbxStructAssembly 246 * @param assemblyIndex 247 * @param pdbxStructAssemblyGen 248 * @param pdbxStructOperList 249 * @return list of transformation matrices to generate macromolecular assembly 250 */ 251 public List<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly pdbxStructAssembly, 252 int assemblyIndex, 253 PdbxStructAssemblyGen pdbxStructAssemblyGen, 254 PdbxStructOperList pdbxStructOperList) { 255 init(); 256 257 // first we populate the list of all operators from pdbx_struct_oper_list so that we can then 258 // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators() 259 for (int i = 0; i < pdbxStructOperList.getRowCount(); i++) { 260 try { 261 Matrix4d m = new Matrix4d(); 262 m.m00 = pdbxStructOperList.getMatrix11().get(i); 263 m.m01 = pdbxStructOperList.getMatrix12().get(i); 264 m.m02 = pdbxStructOperList.getMatrix13().get(i); 265 266 m.m10 = pdbxStructOperList.getMatrix21().get(i); 267 m.m11 = pdbxStructOperList.getMatrix22().get(i); 268 m.m12 = pdbxStructOperList.getMatrix23().get(i); 269 270 m.m20 = pdbxStructOperList.getMatrix31().get(i); 271 m.m21 = pdbxStructOperList.getMatrix32().get(i); 272 m.m22 = pdbxStructOperList.getMatrix33().get(i); 273 274 m.m03 = pdbxStructOperList.getVector1().get(i); 275 m.m13 = pdbxStructOperList.getVector2().get(i); 276 m.m23 = pdbxStructOperList.getVector3().get(i); 277 278 m.m30 = 0; 279 m.m31 = 0; 280 m.m32 = 0; 281 m.m33 = 1; 282 283 allTransformations.put(pdbxStructOperList.getId().get(i), m); 284 } catch (NumberFormatException e) { 285 logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", pdbxStructOperList.getId().get(i), e.getMessage()); 286 } 287 } 288 289 String assemblyId = pdbxStructAssembly.getId().get(assemblyIndex); 290 ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(assemblyId, pdbxStructAssemblyGen); 291 transformations.addAll(getBioUnitTransformationsListBinaryOperators(assemblyId, pdbxStructAssemblyGen)); 292 transformations.trimToSize(); 293 return transformations; 294 } 295 296 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) { 297 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>(); 298 List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators(); 299 300 for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) { 301 if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) { 302 continue; 303 } 304 305 String[] asymIds= pdbxStructAssemblyGen.getAsymIdList().get(i).split(","); 306 operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i)); 307 308 // apply binary operators to the specified chains 309 // Example 1M4X: generates all products of transformation matrices (1-60)(61-88) 310 for (String chainId : asymIds) { 311 for (OrderedPair<String> operator : operators) { 312 Matrix4d original1 = allTransformations.get(operator.getElement1()); 313 Matrix4d original2 = allTransformations.get(operator.getElement2()); 314 if (original1 == null || original2 == null) { 315 logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId); 316 continue; 317 } 318 Matrix4d composed = new Matrix4d(original1); 319 composed.mul(original2); 320 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 321 transform.setChainId(chainId); 322 transform.setId(operator.getElement1() + COMPOSED_OPERATOR_SEPARATOR + operator.getElement2()); 323 transform.setTransformationMatrix(composed); 324 transformations.add(transform); 325 } 326 } 327 } 328 329 return transformations; 330 } 331 332 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, PdbxStructAssemblyGen pdbxStructAssemblyGen) { 333 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>(); 334 335 for (int i = 0; i < pdbxStructAssemblyGen.getRowCount(); i++) { 336 if (!pdbxStructAssemblyGen.getAssemblyId().get(i).equals(assemblyId)) { 337 continue; 338 } 339 340 operatorResolver.parseOperatorExpressionString(pdbxStructAssemblyGen.getOperExpression().get(i)); 341 List<String> operators = operatorResolver.getUnaryOperators(); 342 String[] asymIds = pdbxStructAssemblyGen.getAsymIdList().get(i).split(","); 343 344 // apply unary operators to the specified chains 345 for (String chainId : asymIds) { 346 for (String operator : operators) { 347 Matrix4d original = allTransformations.get(operator); 348 if (original == null) { 349 logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId); 350 continue; 351 } 352 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 353 transform.setChainId(chainId); 354 transform.setId(operator); 355 transform.setTransformationMatrix(original); 356 transformations.add(transform); 357 } 358 } 359 } 360 361 return transformations; 362 } 363 364 private void init() { 365 operatorResolver = new OperatorResolver(); 366 allTransformations = new HashMap<>(); 367 } 368}