001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmcif; 022 023 024import java.lang.reflect.Field; 025import java.util.*; 026 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.Chain; 029import org.biojava.nbio.structure.Element; 030import org.biojava.nbio.structure.EntityType; 031import org.biojava.nbio.structure.Group; 032import org.biojava.nbio.structure.GroupType; 033import org.biojava.nbio.structure.Structure; 034import org.biojava.nbio.structure.io.FileConvert; 035import org.biojava.nbio.structure.io.mmcif.model.AbstractBean; 036import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 037import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; 038import org.biojava.nbio.structure.io.mmcif.model.Cell; 039import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; 040import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 041import org.biojava.nbio.structure.xtal.CrystalCell; 042import org.biojava.nbio.structure.xtal.SpaceGroup; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046/** 047 * Some tools for mmCIF file writing. 048 * 049 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf 050 * 051 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}. 052 * By default, all fields from the bean are taken as the CIF labels. Fields 053 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}. 054 * The CIF label for a field may be changed (for instance, for fields that 055 * are not valid Java identifiers) by defining a function 056 * <tt>static Map<String,String> getCIFLabelMap()</tt> 057 * mapping from the field's name to the correct label. 058 * 059 * @author Jose Duarte 060 * @author Spencer Bliven 061 */ 062public class MMCIFFileTools { 063 064 private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class); 065 066 private static final String newline = System.getProperty("line.separator"); 067 068 /** 069 * The character to be printed out in cases where a value is not assigned in mmCIF files 070 */ 071 public static final String MMCIF_MISSING_VALUE = "?"; 072 073 /** 074 * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs 075 */ 076 public static final String MMCIF_DEFAULT_VALUE = "."; 077 078 079 /** 080 * Produces a mmCIF loop header string for the given categoryName and className. 081 * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package 082 * @param categoryName 083 * @param className 084 * @return 085 * @throws ClassNotFoundException if the given className can not be found 086 */ 087 public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException { 088 StringBuilder str = new StringBuilder(); 089 090 str.append(SimpleMMcifParser.LOOP_START+newline); 091 092 Class<?> c = Class.forName(className); 093 094 for (Field f : getFields(c)) { 095 str.append(categoryName+"."+f.getName()+newline); 096 } 097 098 return str.toString(); 099 } 100 101 /** 102 * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to 103 * a String representing it in mmCIF (single-record) format. 104 * @param categoryName 105 * @param o 106 * @return 107 */ 108 public static String toMMCIF(String categoryName, Object o) { 109 110 StringBuilder sb = new StringBuilder(); 111 112 Class<?> c = o.getClass(); 113 114 115 Field[] fields = getFields(c); 116 String[] names = getFieldNames(fields); 117 118 int maxFieldNameLength = getMaxStringLength(names); 119 120 for (int i=0;i<fields.length;i++) { 121 Field f = fields[i]; 122 String name = names[i]; 123 124 sb.append(categoryName).append(".").append(name); 125 126 int spacing = maxFieldNameLength - name.length() + 3; 127 128 try { 129 Object obj = f.get(o); 130 String val; 131 if (obj==null) { 132 logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE); 133 val = MMCIF_MISSING_VALUE; 134 } else { 135 val = (String) obj; 136 } 137 for (int j=0;j<spacing;j++) sb.append(' '); 138 sb.append(addMmCifQuoting(val)); 139 sb.append(newline); 140 141 } catch (IllegalAccessException e) { 142 logger.warn("Field {} is inaccessible", name); 143 continue; 144 } catch (ClassCastException e) { 145 logger.warn("Could not cast value to String for field {}",name); 146 continue; 147 } 148 149 } 150 151 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 152 153 return sb.toString(); 154 } 155 156 /** 157 * Gets all fields for a particular class, filtering fields annotated 158 * with {@link IgnoreField @IgnoreField}. 159 * 160 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 161 * on all fields. 162 * @param c 163 * @return 164 */ 165 public static Field[] getFields(Class<?> c) { 166 Field[] allFields = c.getDeclaredFields(); 167 Field[] fields = new Field[allFields.length]; 168 int n = 0; 169 for(Field f : allFields) { 170 f.setAccessible(true); 171 IgnoreField anno = f.getAnnotation(IgnoreField.class); 172 if(anno == null) { 173 fields[n] = f; 174 n++; 175 } 176 } 177 return Arrays.copyOf(fields, n); 178 } 179 180 /** 181 * Gets the mmCIF record name for each field. This is generally just 182 * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation. 183 * 184 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 185 * on all fields. 186 * @param fields 187 * @return 188 */ 189 public static String[] getFieldNames(Field[] fields) { 190 String[] names = new String[fields.length]; 191 for(int i=0;i<fields.length;i++) { 192 Field f = fields[i]; 193 f.setAccessible(true); 194 String rawName = fields[i].getName(); 195 CIFLabel cifLabel = f.getAnnotation(CIFLabel.class); 196 if(cifLabel != null) { 197 names[i] = cifLabel.label(); 198 } else { 199 names[i] = rawName; 200 } 201 } 202 return names; 203 } 204 205 /** 206 * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to 207 * a String representing them in mmCIF loop format with one record per line. 208 * @param list 209 * @return 210 */ 211 public static <T> String toMMCIF(List<T> list, Class<T> klass) { 212 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 213 214 Field[] fields = getFields(klass); 215 int[] sizes = getFieldSizes(list,fields); 216 217 StringBuilder sb = new StringBuilder(); 218 219 for (T o:list) { 220 sb.append(toSingleLoopLineMmCifString(o, fields, sizes)); 221 } 222 223 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 224 225 return sb.toString(); 226 } 227 228 /** 229 * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line 230 * @param record 231 * @param fields Set of fields for the record. If null, will be calculated from the class of the record 232 * @param sizes the size of each of the fields 233 * @return 234 */ 235 private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) { 236 237 StringBuilder str = new StringBuilder(); 238 239 Class<?> c = record.getClass(); 240 241 if(fields == null) 242 fields = getFields(c); 243 244 if (sizes.length!=fields.length) 245 throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields"); 246 247 int i = -1; 248 for (Field f : fields) { 249 i++; 250 f.setAccessible(true); 251 252 try { 253 Object obj = f.get(record); 254 String val; 255 if (obj==null) { 256 logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE); 257 val = MMCIF_MISSING_VALUE; 258 } else { 259 val = (String) obj; 260 } 261 262 str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val))); 263 264 265 } catch (IllegalAccessException e) { 266 logger.warn("Field {} is inaccessible", f.getName()); 267 continue; 268 } catch (ClassCastException e) { 269 logger.warn("Could not cast value to String for field {}",f.getName()); 270 continue; 271 } 272 } 273 274 str.append(newline); 275 276 return str.toString(); 277 278 } 279 280 /** 281 * Adds quoting to a String according to the STAR format (mmCIF) rules 282 * @param val 283 * @return 284 */ 285 private static String addMmCifQuoting(String val) { 286 String newval; 287 288 if (val.contains("'")) { 289 // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does) 290 newval = "\""+val+"\""; 291 } else if (val.contains(" ")) { 292 // single quoting for stings containing spaces 293 newval = "'"+val+"'"; 294 } else { 295 if (val.contains(" ") && val.contains("'")) { 296 // TODO deal with this case 297 logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val); 298 } 299 newval = val; 300 } 301 // TODO deal with all the other cases: e.g. multi-line quoting with ;; 302 303 return newval; 304 } 305 306 /** 307 * Converts a SpaceGroup object to a {@link Symmetry} object. 308 * @param sg 309 * @return 310 */ 311 public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) { 312 Symmetry sym = new Symmetry(); 313 sym.setSpace_group_name_H_M(sg.getShortSymbol()); 314 // TODO do we need to fill any of the other values? 315 return sym; 316 } 317 318 /** 319 * Converts a CrystalCell object to a {@link Cell} object. 320 * @param c 321 * @return 322 */ 323 public static Cell convertCrystalCellToCell(CrystalCell c) { 324 Cell cell = new Cell(); 325 cell.setLength_a(String.format("%.3f",c.getA())); 326 cell.setLength_b(String.format("%.3f",c.getB())); 327 cell.setLength_c(String.format("%.3f",c.getC())); 328 cell.setAngle_alpha(String.format("%.3f",c.getAlpha())); 329 cell.setAngle_beta(String.format("%.3f",c.getBeta())); 330 cell.setAngle_gamma(String.format("%.3f",c.getGamma())); 331 332 return cell; 333 } 334 335 /** 336 * Converts an Atom object to an {@link AtomSite} object. 337 * @param a 338 * @param model the model number for the output AtomSites 339 * @param chainName the chain identifier (author id) for the output AtomSites 340 * @param chainId the internal chain identifier (asym id) for the output AtomSites 341 * @return 342 */ 343 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId) { 344 return convertAtomToAtomSite(a, model, chainName, chainId, a.getPDBserial()); 345 } 346 347 /** 348 * Converts an Atom object to an {@link AtomSite} object. 349 * @param a the atom 350 * @param model the model number for the output AtomSites 351 * @param chainName the chain identifier (author id) for the output AtomSites 352 * @param chainId the internal chain identifier (asym id) for the output AtomSites 353 * @param atomId the atom id to be written to AtomSite 354 * @return 355 */ 356 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId, int atomId) { 357 358 /* 359 ATOM 7 C CD . GLU A 1 24 ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24 GLU A CD 1 360 ATOM 8 O OE1 . GLU A 1 24 ? -9.659 14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24 GLU A OE1 1 361 ATOM 9 O OE2 . GLU A 1 24 ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24 GLU A OE2 1 362 ATOM 10 N N . LEU A 1 25 ? -5.907 18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25 LEU A N 1 363 ATOM 11 C CA . LEU A 1 25 ? -5.168 19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25 LEU A CA 1 364 */ 365 366 Group g = a.getGroup(); 367 368 String record ; 369 if ( g.getType().equals(GroupType.HETATM) ) { 370 record = "HETATM"; 371 } else { 372 record = "ATOM"; 373 } 374 375 String entityId = "0"; 376 String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum()); 377 if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) { 378 entityId = Integer.toString(g.getChain().getEntityInfo().getMolId()); 379 if (g.getChain().getEntityInfo().getType() == EntityType.POLYMER) { 380 // this only makes sense for polymeric chains, non-polymer chains will never have seqres groups and there's no point in calling getAlignedResIndex 381 labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain())); 382 } 383 } 384 385 Character altLoc = a.getAltLoc(); 386 String altLocStr; 387 if (altLoc==null || altLoc == ' ') { 388 altLocStr = MMCIF_DEFAULT_VALUE; 389 } else { 390 altLocStr = altLoc.toString(); 391 } 392 393 Element e = a.getElement(); 394 String eString = e.toString().toUpperCase(); 395 if ( e.equals(Element.R)) { 396 eString = "X"; 397 } 398 399 String insCode = MMCIF_MISSING_VALUE; 400 if (g.getResidueNumber().getInsCode()!=null ) { 401 insCode = Character.toString(g.getResidueNumber().getInsCode()); 402 } 403 404 AtomSite atomSite = new AtomSite(); 405 atomSite.setGroup_PDB(record); 406 atomSite.setId(Integer.toString(atomId)); 407 atomSite.setType_symbol(eString); 408 atomSite.setLabel_atom_id(a.getName()); 409 atomSite.setLabel_alt_id(altLocStr); 410 atomSite.setLabel_comp_id(g.getPDBName()); 411 atomSite.setLabel_asym_id(chainId); 412 atomSite.setLabel_entity_id(entityId); 413 atomSite.setLabel_seq_id(labelSeqId); 414 atomSite.setPdbx_PDB_ins_code(insCode); 415 atomSite.setCartn_x(FileConvert.d3.format(a.getX())); 416 atomSite.setCartn_y(FileConvert.d3.format(a.getY())); 417 atomSite.setCartn_z(FileConvert.d3.format(a.getZ())); 418 atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy())); 419 atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor())); 420 atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum())); 421 atomSite.setAuth_comp_id(g.getPDBName()); 422 atomSite.setAuth_asym_id(chainName); 423 atomSite.setAuth_atom_id(a.getName()); 424 atomSite.setPdbx_PDB_model_num(Integer.toString(model)); 425 426 return atomSite; 427 } 428 429 /** 430 * Converts a Group into a List of {@link AtomSite} objects. 431 * Atoms in other altloc groups (different from the main group) are also included, removing possible duplicates 432 * via using the atom identifier to assess uniqueness. 433 * @param g the group 434 * @param model the model number for the output AtomSites 435 * @param chainName the chain identifier (author id) for the output AtomSites 436 * @param chainId the internal chain identifier (asym id) for the output AtomSites 437 * @return 438 */ 439 public static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainName, String chainId) { 440 441 // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have 442 // all atoms (see StructureTools#cleanUpAltLocs) 443 // Thus we have to remove duplicates here by using the atom id 444 // See issue https://github.com/biojava/biojava/issues/778 and TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs 445 Map<Integer, AtomSite> uniqueAtomSites = new LinkedHashMap<>(); 446 447 int groupsize = g.size(); 448 449 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 450 Atom a = g.getAtom(atompos); 451 if ( a == null) 452 continue ; 453 454 uniqueAtomSites.put(a.getPDBserial(), convertAtomToAtomSite(a, model, chainName, chainId)); 455 } 456 457 if ( g.hasAltLoc()){ 458 for (Group alt : g.getAltLocs() ) { 459 for (AtomSite atomSite : convertGroupToAtomSites(alt, model, chainName, chainId)) { 460 uniqueAtomSites.put(Integer.parseInt(atomSite.getId()), atomSite); 461 } 462 } 463 } 464 return new ArrayList<>(uniqueAtomSites.values()); 465 } 466 467 /** 468 * Converts a Chain into a List of {@link AtomSite} objects 469 * @param c the chain 470 * @param model the model number for the output AtomSites 471 * @param chainName the chain identifier (author id) for the output AtomSites 472 * @param chainId the internal chain identifier (asym id) for the output AtomSites 473 * @return 474 */ 475 public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String chainName, String chainId) { 476 477 List<AtomSite> list = new ArrayList<>(); 478 479 if (c.getEntityInfo()==null) { 480 logger.warn("No entity found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName()); 481 } 482 483 for ( int h=0; h<c.getAtomLength();h++){ 484 485 Group g= c.getAtomGroup(h); 486 487 list.addAll(convertGroupToAtomSites(g, model, chainName, chainId)); 488 489 } 490 491 return list; 492 } 493 494 /** 495 * Converts a Structure into a List of {@link AtomSite} objects 496 * @param s 497 * @return 498 */ 499 public static List<AtomSite> convertStructureToAtomSites(Structure s) { 500 List<AtomSite> list = new ArrayList<AtomSite>(); 501 502 for (int m=0;m<s.nrModels();m++) { 503 for (Chain c:s.getChains(m)) { 504 list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId())); 505 } 506 } 507 return list; 508 } 509 510 /** 511 * Finds the max length of each of the String values contained in each of the fields of the given list of beans. 512 * Useful for producing mmCIF loop data that is aligned for all columns. 513 * @param list list of objects. All objects should have the same class. 514 * @param fields Set of fields for the record. If null, will be calculated from the class of the first record 515 * @return 516 * @see #toMMCIF(List, Class) 517 */ 518 private static <T> int[] getFieldSizes(List<T> list, Field[] fields) { 519 520 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 521 522 if(fields == null) 523 fields = getFields(list.get(0).getClass()); 524 525 int[] sizes = new int [fields.length]; 526 527 528 for (T a:list) { 529 int i = -1; 530 for (Field f : fields) { 531 i++; 532 533 f.setAccessible(true); 534 535 try { 536 Object obj = f.get(a); 537 int length; 538 if (obj==null) { 539 length = MMCIF_MISSING_VALUE.length(); 540 } else { 541 String val = (String) obj; 542 length = addMmCifQuoting(val).length(); 543 } 544 545 if (length>sizes[i]) sizes[i] = length; 546 547 } catch (IllegalAccessException e) { 548 logger.warn("Field {} is inaccessible", f.getName()); 549 continue; 550 } catch (ClassCastException e) { 551 logger.warn("Could not cast value to String for field {}",f.getName()); 552 continue; 553 } 554 } 555 } 556 return sizes; 557 } 558 559 /** 560 * Finds the max length of a list of strings 561 * Useful for producing mmCIF single-record data that is aligned for all values. 562 * @param names 563 * @return 564 * @see #toMMCIF(String, Object) 565 */ 566 private static int getMaxStringLength(String[] names) { 567 int size = 0; 568 for(String s : names) { 569 if(s.length()>size) { 570 size = s.length(); 571 } 572 } 573 return size; 574 } 575}