001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.nbio.structure.quaternary; 023 024import org.biojava.nbio.structure.Calc; 025import org.biojava.nbio.structure.Chain; 026import org.biojava.nbio.structure.EntityInfo; 027import org.biojava.nbio.structure.Structure; 028import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly; 029import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen; 030import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import javax.vecmath.Matrix4d; 035import java.util.*; 036 037/** 038 * Reconstructs the quaternary structure of a protein from an asymmetric unit 039 * 040 * @author Peter Rose 041 * @author Andreas Prlic 042 * @author Jose Duarte 043 * 044 */ 045public class BiologicalAssemblyBuilder { 046 047 private static final Logger logger = LoggerFactory.getLogger(BiologicalAssemblyBuilder.class); 048 049 /** 050 * The character separating the original chain identifier from the operator id. 051 */ 052 public static final String SYM_CHAIN_ID_SEPARATOR = "_"; 053 054 /** 055 * The character separating operator ids that are composed. 056 */ 057 public static final String COMPOSED_OPERATOR_SEPARATOR = "x"; 058 059 private OperatorResolver operatorResolver; 060 061 062 /** 063 * All matrix operators present in _pdbx_struct_oper_list. 064 * Identifiers (_pdbx_struct_oper_list.id) to matrix operators. 065 */ 066 private Map<String, Matrix4d> allTransformations; 067 068 private List<String> modelIndex = new ArrayList<>(); 069 070 public BiologicalAssemblyBuilder(){ 071 init(); 072 } 073 074 /** 075 * Builds a Structure object containing the quaternary structure built from given asymUnit and transformations, 076 * by adding symmetry partners as new models. 077 * The output Structure will be different depending on the multiModel parameter: 078 * <li> 079 * the symmetry-expanded chains are added as new models, one per transformId. All original models but 080 * the first one are discarded. 081 * </li> 082 * <li> 083 * as original with symmetry-expanded chains added with renamed chain ids and names (in the form 084 * originalAsymId_transformId and originalAuthId_transformId) 085 * </li> 086 * @param asymUnit 087 * @param transformations 088 * @param useAsymIds if true use {@link Chain#getId()} to match the ids in the BiologicalAssemblyTransformation (needed if data read from mmCIF), 089 * if false use {@link Chain#getName()} for the chain matching (needed if data read from PDB). 090 * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, 091 * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). 092 * @return 093 */ 094 public Structure rebuildQuaternaryStructure(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations, boolean useAsymIds, boolean multiModel) { 095 096 // ensure that new chains are build in the same order as they appear in the asymmetric unit 097 orderTransformationsByChainId(asymUnit, transformations); 098 099 Structure s = asymUnit.clone(); 100 101 Map<Integer, EntityInfo> entityInfoMap = new HashMap<>(); 102 // this resets all models (not only the first one): this is important for NMR (multi-model) 103 // like that we can be sure we start with an empty structures and we add models or chains to it 104 s.resetModels(); 105 s.setEntityInfos(new ArrayList<>()); 106 107 for (BiologicalAssemblyTransformation transformation : transformations){ 108 109 List<Chain> chainsToTransform = new ArrayList<>(); 110 111 // note: for NMR structures (or any multi-model) we use the first model only and throw away the rest 112 if (useAsymIds) { 113 Chain c = asymUnit.getChain(transformation.getChainId()); 114 chainsToTransform.add(c); 115 } else { 116 Chain polyC = asymUnit.getPolyChainByPDB(transformation.getChainId()); 117 List<Chain> nonPolyCs = asymUnit.getNonPolyChainsByPDB(transformation.getChainId()); 118 Chain waterC = asymUnit.getWaterChainByPDB(transformation.getChainId()); 119 if (polyC!=null) 120 chainsToTransform.add(polyC); 121 if (!nonPolyCs.isEmpty()) 122 chainsToTransform.addAll(nonPolyCs); 123 if (waterC!=null) 124 chainsToTransform.add(waterC); 125 } 126 127 for (Chain c: chainsToTransform) { 128 129 Chain chain = (Chain)c.clone(); 130 131 Calc.transform(chain, transformation.getTransformationMatrix()); 132 133 String transformId = transformation.getId(); 134 135 // note that the Structure.addChain/Structure.addModel methods set the parent reference to the new Structure 136 137 if (multiModel) 138 addChainMultiModel(s, chain, transformId); 139 else 140 addChainFlattened(s, chain, transformId); 141 142 EntityInfo entityInfo; 143 if (!entityInfoMap.containsKey(chain.getEntityInfo().getMolId())) { 144 entityInfo = new EntityInfo(chain.getEntityInfo()); 145 entityInfoMap.put(chain.getEntityInfo().getMolId(), entityInfo); 146 s.addEntityInfo(entityInfo); 147 } else { 148 entityInfo = entityInfoMap.get(chain.getEntityInfo().getMolId()); 149 } 150 chain.setEntityInfo(entityInfo); 151 entityInfo.addChain(chain); 152 153 } 154 } 155 156 s.setBiologicalAssembly(true); 157 return s; 158 } 159 160 /** 161 * Orders model transformations by chain ids in the same order as in the asymmetric unit 162 * @param asymUnit 163 * @param transformations 164 */ 165 private void orderTransformationsByChainId(Structure asymUnit, List<BiologicalAssemblyTransformation> transformations) { 166 final List<String> chainIds = getChainIds(asymUnit); 167 Collections.sort(transformations, new Comparator<BiologicalAssemblyTransformation>() { 168 @Override 169 public int compare(BiologicalAssemblyTransformation t1, BiologicalAssemblyTransformation t2) { 170 // set sort order only if the two ids are identical 171 if (t1.getId().equals(t2.getId())) { 172 return chainIds.indexOf(t1.getChainId()) - chainIds.indexOf(t2.getChainId()); 173 } else { 174 return t1.getId().compareTo(t2.getId()); 175 } 176 } 177 }); 178 } 179 180 /** 181 * Returns a list of chain ids in the order they are specified in the ATOM 182 * records in the asymmetric unit 183 * @param asymUnit 184 * @return 185 */ 186 private List<String> getChainIds(Structure asymUnit) { 187 List<String> chainIds = new ArrayList<String>(); 188 for ( Chain c : asymUnit.getChains()){ 189 String intChainID = c.getId(); 190 chainIds.add(intChainID); 191 } 192 return chainIds; 193 } 194 195 /** 196 * Adds a chain to the given structure to form a biological assembly, 197 * adding the symmetry expanded chains as new models per transformId. 198 * @param s 199 * @param newChain 200 * @param transformId 201 */ 202 private void addChainMultiModel(Structure s, Chain newChain, String transformId) { 203 204 // multi-model bioassembly 205 206 if ( modelIndex.size() == 0) 207 modelIndex.add("PLACEHOLDER FOR ASYM UNIT"); 208 209 int modelCount = modelIndex.indexOf(transformId); 210 if ( modelCount == -1) { 211 modelIndex.add(transformId); 212 modelCount = modelIndex.indexOf(transformId); 213 } 214 215 if (modelCount == 0) { 216 s.addChain(newChain); 217 } else if (modelCount > s.nrModels()) { 218 List<Chain> newModel = new ArrayList<>(); 219 newModel.add(newChain); 220 s.addModel(newModel); 221 } else { 222 s.addChain(newChain, modelCount-1); 223 } 224 225 } 226 227 /** 228 * Adds a chain to the given structure to form a biological assembly, 229 * adding the symmetry-expanded chains as new chains with renamed 230 * chain ids and names (in the form originalAsymId_transformId and originalAuthId_transformId). 231 * @param s 232 * @param newChain 233 * @param transformId 234 */ 235 private void addChainFlattened(Structure s, Chain newChain, String transformId) { 236 newChain.setId(newChain.getId()+SYM_CHAIN_ID_SEPARATOR+transformId); 237 newChain.setName(newChain.getName()+SYM_CHAIN_ID_SEPARATOR+transformId); 238 s.addChain(newChain); 239 } 240 241 /** 242 * Returns a list of transformation matrices for the generation of a macromolecular 243 * assembly for the specified assembly Id. 244 * 245 * @param psa 246 * @param psags 247 * @param operators 248 * @return list of transformation matrices to generate macromolecular assembly 249 */ 250 public ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationList(PdbxStructAssembly psa, List<PdbxStructAssemblyGen> psags, List<PdbxStructOperList> operators) { 251 init(); 252 253 // first we populate the list of all operators from pdbx_struct_oper_list so that we can then 254 // get them from getBioUnitTransformationsListUnaryOperators() and getBioUnitTransformationsListBinaryOperators() 255 for (PdbxStructOperList oper: operators){ 256 try { 257 Matrix4d m = new Matrix4d(); 258 m.m00 = Double.parseDouble(oper.getMatrix11()); 259 m.m01 = Double.parseDouble(oper.getMatrix12()); 260 m.m02 = Double.parseDouble(oper.getMatrix13()); 261 262 m.m10 = Double.parseDouble(oper.getMatrix21()); 263 m.m11 = Double.parseDouble(oper.getMatrix22()); 264 m.m12 = Double.parseDouble(oper.getMatrix23()); 265 266 m.m20 = Double.parseDouble(oper.getMatrix31()); 267 m.m21 = Double.parseDouble(oper.getMatrix32()); 268 m.m22 = Double.parseDouble(oper.getMatrix33()); 269 270 m.m03 = Double.parseDouble(oper.getVector1()); 271 m.m13 = Double.parseDouble(oper.getVector2()); 272 m.m23 = Double.parseDouble(oper.getVector3()); 273 274 m.m30 = 0; 275 m.m31 = 0; 276 m.m32 = 0; 277 m.m33 = 1; 278 279 allTransformations.put(oper.getId(), m); 280 281 } catch (NumberFormatException e) { 282 logger.warn("Could not parse a matrix value from pdbx_struct_oper_list for id {}. The operator id will be ignored. Error: {}", oper.getId(), e.getMessage()); 283 } 284 } 285 286 ArrayList<BiologicalAssemblyTransformation> transformations = getBioUnitTransformationsListUnaryOperators(psa.getId(), psags); 287 transformations.addAll(getBioUnitTransformationsListBinaryOperators(psa.getId(), psags)); 288 transformations.trimToSize(); 289 return transformations; 290 } 291 292 293 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListBinaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) { 294 295 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<>(); 296 297 List<OrderedPair<String>> operators = operatorResolver.getBinaryOperators(); 298 299 300 for ( PdbxStructAssemblyGen psag : psags){ 301 if ( psag.getAssembly_id().equals(assemblyId)) { 302 303 List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(",")); 304 305 operatorResolver.parseOperatorExpressionString(psag.getOper_expression()); 306 307 // apply binary operators to the specified chains 308 // Example 1M4X: generates all products of transformation matrices (1-60)(61-88) 309 for (String chainId : asymIds) { 310 311 for (OrderedPair<String> operator : operators) { 312 Matrix4d original1 = allTransformations.get(operator.getElement1()); 313 Matrix4d original2 = allTransformations.get(operator.getElement2()); 314 if (original1 == null || original2 == null) { 315 logger.warn("Could not find matrix operator for operator id {} or {}. Assembly id {} will not contain the composed operator.", operator.getElement1(), operator.getElement2(), assemblyId); 316 continue; 317 } 318 Matrix4d composed = new Matrix4d(original1); 319 composed.mul(original2); 320 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 321 transform.setChainId(chainId); 322 transform.setId(operator.getElement1() + COMPOSED_OPERATOR_SEPARATOR + operator.getElement2()); 323 transform.setTransformationMatrix(composed); 324 transformations.add(transform); 325 } 326 } 327 } 328 329 } 330 331 return transformations; 332 } 333 334 private ArrayList<BiologicalAssemblyTransformation> getBioUnitTransformationsListUnaryOperators(String assemblyId, List<PdbxStructAssemblyGen> psags) { 335 ArrayList<BiologicalAssemblyTransformation> transformations = new ArrayList<BiologicalAssemblyTransformation>(); 336 337 338 for ( PdbxStructAssemblyGen psag : psags){ 339 if ( psag.getAssembly_id().equals(assemblyId)) { 340 341 operatorResolver.parseOperatorExpressionString(psag.getOper_expression()); 342 List<String> operators = operatorResolver.getUnaryOperators(); 343 344 List<String>asymIds= Arrays.asList(psag.getAsym_id_list().split(",")); 345 346 // apply unary operators to the specified chains 347 for (String chainId : asymIds) { 348 for (String operator : operators) { 349 Matrix4d original = allTransformations.get(operator); 350 if (original == null) { 351 logger.warn("Could not find matrix operator for operator id {}. Assembly id {} will not contain the operator.", operator, assemblyId); 352 continue; 353 } 354 BiologicalAssemblyTransformation transform = new BiologicalAssemblyTransformation(); 355 transform.setChainId(chainId); 356 transform.setId(operator); 357 transform.setTransformationMatrix(original); 358 transformations.add(transform); 359 } 360 } 361 } 362 } 363 364 return transformations; 365 } 366 367 private void init(){ 368 operatorResolver= new OperatorResolver(); 369 allTransformations = new HashMap<>(); 370 } 371}