001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmcif; 022 023 024import java.lang.reflect.Field; 025import java.util.*; 026 027import org.biojava.nbio.structure.Atom; 028import org.biojava.nbio.structure.Chain; 029import org.biojava.nbio.structure.Element; 030import org.biojava.nbio.structure.Group; 031import org.biojava.nbio.structure.GroupType; 032import org.biojava.nbio.structure.Structure; 033import org.biojava.nbio.structure.io.FileConvert; 034import org.biojava.nbio.structure.io.mmcif.model.AbstractBean; 035import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 036import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; 037import org.biojava.nbio.structure.io.mmcif.model.Cell; 038import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; 039import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 040import org.biojava.nbio.structure.xtal.CrystalCell; 041import org.biojava.nbio.structure.xtal.SpaceGroup; 042import org.slf4j.Logger; 043import org.slf4j.LoggerFactory; 044 045/** 046 * Some tools for mmCIF file writing. 047 * 048 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf 049 * 050 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}. 051 * By default, all fields from the bean are taken as the CIF labels. Fields 052 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}. 053 * The CIF label for a field may be changed (for instance, for fields that 054 * are not valid Java identifiers) by defining a function 055 * <tt>static Map<String,String> getCIFLabelMap()</tt> 056 * mapping from the field's name to the correct label. 057 * 058 * @author Jose Duarte 059 * @author Spencer Bliven 060 */ 061public class MMCIFFileTools { 062 063 private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class); 064 065 private static final String newline = System.getProperty("line.separator"); 066 067 /** 068 * The character to be printed out in cases where a value is not assigned in mmCIF files 069 */ 070 public static final String MMCIF_MISSING_VALUE = "?"; 071 072 /** 073 * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs 074 */ 075 public static final String MMCIF_DEFAULT_VALUE = "."; 076 077 078 /** 079 * Produces a mmCIF loop header string for the given categoryName and className. 080 * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package 081 * @param categoryName 082 * @param className 083 * @return 084 * @throws ClassNotFoundException if the given className can not be found 085 */ 086 public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException { 087 StringBuilder str = new StringBuilder(); 088 089 str.append(SimpleMMcifParser.LOOP_START+newline); 090 091 Class<?> c = Class.forName(className); 092 093 for (Field f : getFields(c)) { 094 str.append(categoryName+"."+f.getName()+newline); 095 } 096 097 return str.toString(); 098 } 099 100 /** 101 * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to 102 * a String representing it in mmCIF (single-record) format. 103 * @param categoryName 104 * @param o 105 * @return 106 */ 107 public static String toMMCIF(String categoryName, Object o) { 108 109 StringBuilder sb = new StringBuilder(); 110 111 Class<?> c = o.getClass(); 112 113 114 Field[] fields = getFields(c); 115 String[] names = getFieldNames(fields); 116 117 int maxFieldNameLength = getMaxStringLength(names); 118 119 for (int i=0;i<fields.length;i++) { 120 Field f = fields[i]; 121 String name = names[i]; 122 123 sb.append(categoryName).append(".").append(name); 124 125 int spacing = maxFieldNameLength - name.length() + 3; 126 127 try { 128 Object obj = f.get(o); 129 String val; 130 if (obj==null) { 131 logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE); 132 val = MMCIF_MISSING_VALUE; 133 } else { 134 val = (String) obj; 135 } 136 for (int j=0;j<spacing;j++) sb.append(' '); 137 sb.append(addMmCifQuoting(val)); 138 sb.append(newline); 139 140 } catch (IllegalAccessException e) { 141 logger.warn("Field {} is inaccessible", name); 142 continue; 143 } catch (ClassCastException e) { 144 logger.warn("Could not cast value to String for field {}",name); 145 continue; 146 } 147 148 } 149 150 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 151 152 return sb.toString(); 153 } 154 155 /** 156 * Gets all fields for a particular class, filtering fields annotated 157 * with {@link IgnoreField @IgnoreField}. 158 * 159 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 160 * on all fields. 161 * @param c 162 * @return 163 */ 164 public static Field[] getFields(Class<?> c) { 165 Field[] allFields = c.getDeclaredFields(); 166 Field[] fields = new Field[allFields.length]; 167 int n = 0; 168 for(Field f : allFields) { 169 f.setAccessible(true); 170 IgnoreField anno = f.getAnnotation(IgnoreField.class); 171 if(anno == null) { 172 fields[n] = f; 173 n++; 174 } 175 } 176 return Arrays.copyOf(fields, n); 177 } 178 179 /** 180 * Gets the mmCIF record name for each field. This is generally just 181 * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation. 182 * 183 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 184 * on all fields. 185 * @param fields 186 * @return 187 */ 188 public static String[] getFieldNames(Field[] fields) { 189 String[] names = new String[fields.length]; 190 for(int i=0;i<fields.length;i++) { 191 Field f = fields[i]; 192 f.setAccessible(true); 193 String rawName = fields[i].getName(); 194 CIFLabel cifLabel = f.getAnnotation(CIFLabel.class); 195 if(cifLabel != null) { 196 names[i] = cifLabel.label(); 197 } else { 198 names[i] = rawName; 199 } 200 } 201 return names; 202 } 203 204 /** 205 * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to 206 * a String representing them in mmCIF loop format with one record per line. 207 * @param list 208 * @return 209 */ 210 public static <T> String toMMCIF(List<T> list, Class<T> klass) { 211 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 212 213 Field[] fields = getFields(klass); 214 int[] sizes = getFieldSizes(list,fields); 215 216 StringBuilder sb = new StringBuilder(); 217 218 for (T o:list) { 219 sb.append(toSingleLoopLineMmCifString(o, fields, sizes)); 220 } 221 222 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 223 224 return sb.toString(); 225 } 226 227 /** 228 * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line 229 * @param record 230 * @param fields Set of fields for the record. If null, will be calculated from the class of the record 231 * @param sizes the size of each of the fields 232 * @return 233 */ 234 private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) { 235 236 StringBuilder str = new StringBuilder(); 237 238 Class<?> c = record.getClass(); 239 240 if(fields == null) 241 fields = getFields(c); 242 243 if (sizes.length!=fields.length) 244 throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields"); 245 246 int i = -1; 247 for (Field f : fields) { 248 i++; 249 f.setAccessible(true); 250 251 try { 252 Object obj = f.get(record); 253 String val; 254 if (obj==null) { 255 logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE); 256 val = MMCIF_MISSING_VALUE; 257 } else { 258 val = (String) obj; 259 } 260 261 str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val))); 262 263 264 } catch (IllegalAccessException e) { 265 logger.warn("Field {} is inaccessible", f.getName()); 266 continue; 267 } catch (ClassCastException e) { 268 logger.warn("Could not cast value to String for field {}",f.getName()); 269 continue; 270 } 271 } 272 273 str.append(newline); 274 275 return str.toString(); 276 277 } 278 279 /** 280 * Adds quoting to a String according to the STAR format (mmCIF) rules 281 * @param val 282 * @return 283 */ 284 private static String addMmCifQuoting(String val) { 285 String newval; 286 287 if (val.contains("'")) { 288 // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does) 289 newval = "\""+val+"\""; 290 } else if (val.contains(" ")) { 291 // single quoting for stings containing spaces 292 newval = "'"+val+"'"; 293 } else { 294 if (val.contains(" ") && val.contains("'")) { 295 // TODO deal with this case 296 logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val); 297 } 298 newval = val; 299 } 300 // TODO deal with all the other cases: e.g. multi-line quoting with ;; 301 302 return newval; 303 } 304 305 /** 306 * Converts a SpaceGroup object to a {@link Symmetry} object. 307 * @param sg 308 * @return 309 */ 310 public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) { 311 Symmetry sym = new Symmetry(); 312 sym.setSpace_group_name_H_M(sg.getShortSymbol()); 313 // TODO do we need to fill any of the other values? 314 return sym; 315 } 316 317 /** 318 * Converts a CrystalCell object to a {@link Cell} object. 319 * @param c 320 * @return 321 */ 322 public static Cell convertCrystalCellToCell(CrystalCell c) { 323 Cell cell = new Cell(); 324 cell.setLength_a(String.format("%.3f",c.getA())); 325 cell.setLength_b(String.format("%.3f",c.getB())); 326 cell.setLength_c(String.format("%.3f",c.getC())); 327 cell.setAngle_alpha(String.format("%.3f",c.getAlpha())); 328 cell.setAngle_beta(String.format("%.3f",c.getBeta())); 329 cell.setAngle_gamma(String.format("%.3f",c.getGamma())); 330 331 return cell; 332 } 333 334 /** 335 * Converts an Atom object to an {@link AtomSite} object. 336 * @param a 337 * @param model the model number for the output AtomSites 338 * @param chainName the chain identifier (author id) for the output AtomSites 339 * @param chainId the internal chain identifier (asym id) for the output AtomSites 340 * @return 341 */ 342 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId) { 343 return convertAtomToAtomSite(a, model, chainName, chainId, a.getPDBserial()); 344 } 345 346 /** 347 * Converts an Atom object to an {@link AtomSite} object. 348 * @param a the atom 349 * @param model the model number for the output AtomSites 350 * @param chainName the chain identifier (author id) for the output AtomSites 351 * @param chainId the internal chain identifier (asym id) for the output AtomSites 352 * @param atomId the atom id to be written to AtomSite 353 * @return 354 */ 355 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId, int atomId) { 356 357 /* 358 ATOM 7 C CD . GLU A 1 24 ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24 GLU A CD 1 359 ATOM 8 O OE1 . GLU A 1 24 ? -9.659 14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24 GLU A OE1 1 360 ATOM 9 O OE2 . GLU A 1 24 ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24 GLU A OE2 1 361 ATOM 10 N N . LEU A 1 25 ? -5.907 18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25 LEU A N 1 362 ATOM 11 C CA . LEU A 1 25 ? -5.168 19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25 LEU A CA 1 363 */ 364 365 Group g = a.getGroup(); 366 367 String record ; 368 if ( g.getType().equals(GroupType.HETATM) ) { 369 record = "HETATM"; 370 } else { 371 record = "ATOM"; 372 } 373 374 String entityId = "0"; 375 String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum()); 376 if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) { 377 entityId = Integer.toString(g.getChain().getEntityInfo().getMolId()); 378 labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain())); 379 } 380 381 Character altLoc = a.getAltLoc() ; 382 String altLocStr; 383 if (altLoc==null || altLoc == ' ') { 384 altLocStr = MMCIF_DEFAULT_VALUE; 385 } else { 386 altLocStr = altLoc.toString(); 387 } 388 389 Element e = a.getElement(); 390 String eString = e.toString().toUpperCase(); 391 if ( e.equals(Element.R)) { 392 eString = "X"; 393 } 394 395 String insCode = MMCIF_MISSING_VALUE; 396 if (g.getResidueNumber().getInsCode()!=null ) { 397 insCode = Character.toString(g.getResidueNumber().getInsCode()); 398 } 399 400 AtomSite atomSite = new AtomSite(); 401 atomSite.setGroup_PDB(record); 402 atomSite.setId(Integer.toString(atomId)); 403 atomSite.setType_symbol(eString); 404 atomSite.setLabel_atom_id(a.getName()); 405 atomSite.setLabel_alt_id(altLocStr); 406 atomSite.setLabel_comp_id(g.getPDBName()); 407 atomSite.setLabel_asym_id(chainId); 408 atomSite.setLabel_entity_id(entityId); 409 atomSite.setLabel_seq_id(labelSeqId); 410 atomSite.setPdbx_PDB_ins_code(insCode); 411 atomSite.setCartn_x(FileConvert.d3.format(a.getX())); 412 atomSite.setCartn_y(FileConvert.d3.format(a.getY())); 413 atomSite.setCartn_z(FileConvert.d3.format(a.getZ())); 414 atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy())); 415 atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor())); 416 atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum())); 417 atomSite.setAuth_comp_id(g.getPDBName()); 418 atomSite.setAuth_asym_id(chainName); 419 atomSite.setAuth_atom_id(a.getName()); 420 atomSite.setPdbx_PDB_model_num(Integer.toString(model)); 421 422 return atomSite; 423 } 424 425 /** 426 * Converts a Group into a List of {@link AtomSite} objects. 427 * Atoms in other altloc groups (different from the main group) are also included, removing possible duplicates 428 * via using the atom identifier to assess uniqueness. 429 * @param g the group 430 * @param model the model number for the output AtomSites 431 * @param chainName the chain identifier (author id) for the output AtomSites 432 * @param chainId the internal chain identifier (asym id) for the output AtomSites 433 * @return 434 */ 435 public static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainName, String chainId) { 436 437 // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have 438 // all atoms (see StructureTools#cleanUpAltLocs) 439 // Thus we have to remove duplicates here by using the atom id 440 // See issue https://github.com/biojava/biojava/issues/778 and TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs 441 Map<Integer, AtomSite> uniqueAtomSites = new LinkedHashMap<>(); 442 443 int groupsize = g.size(); 444 445 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 446 Atom a = g.getAtom(atompos); 447 if ( a == null) 448 continue ; 449 450 uniqueAtomSites.put(a.getPDBserial(), convertAtomToAtomSite(a, model, chainName, chainId)); 451 } 452 453 if ( g.hasAltLoc()){ 454 for (Group alt : g.getAltLocs() ) { 455 for (AtomSite atomSite : convertGroupToAtomSites(alt, model, chainName, chainId)) { 456 uniqueAtomSites.put(Integer.parseInt(atomSite.getId()), atomSite); 457 } 458 } 459 } 460 return new ArrayList<>(uniqueAtomSites.values()); 461 } 462 463 /** 464 * Converts a Chain into a List of {@link AtomSite} objects 465 * @param c the chain 466 * @param model the model number for the output AtomSites 467 * @param chainName the chain identifier (author id) for the output AtomSites 468 * @param chainId the internal chain identifier (asym id) for the output AtomSites 469 * @return 470 */ 471 public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String chainName, String chainId) { 472 473 List<AtomSite> list = new ArrayList<>(); 474 475 if (c.getEntityInfo()==null) { 476 logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName()); 477 } 478 479 for ( int h=0; h<c.getAtomLength();h++){ 480 481 Group g= c.getAtomGroup(h); 482 483 list.addAll(convertGroupToAtomSites(g, model, chainName, chainId)); 484 485 } 486 487 return list; 488 } 489 490 /** 491 * Converts a Structure into a List of {@link AtomSite} objects 492 * @param s 493 * @return 494 */ 495 public static List<AtomSite> convertStructureToAtomSites(Structure s) { 496 List<AtomSite> list = new ArrayList<AtomSite>(); 497 498 for (int m=0;m<s.nrModels();m++) { 499 for (Chain c:s.getChains(m)) { 500 list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId())); 501 } 502 } 503 return list; 504 } 505 506 /** 507 * Finds the max length of each of the String values contained in each of the fields of the given list of beans. 508 * Useful for producing mmCIF loop data that is aligned for all columns. 509 * @param list list of objects. All objects should have the same class. 510 * @param fields Set of fields for the record. If null, will be calculated from the class of the first record 511 * @return 512 * @see #toMMCIF(List, Class) 513 */ 514 private static <T> int[] getFieldSizes(List<T> list, Field[] fields) { 515 516 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 517 518 if(fields == null) 519 fields = getFields(list.get(0).getClass()); 520 521 int[] sizes = new int [fields.length]; 522 523 524 for (T a:list) { 525 int i = -1; 526 for (Field f : fields) { 527 i++; 528 529 f.setAccessible(true); 530 531 try { 532 Object obj = f.get(a); 533 int length; 534 if (obj==null) { 535 length = MMCIF_MISSING_VALUE.length(); 536 } else { 537 String val = (String) obj; 538 length = addMmCifQuoting(val).length(); 539 } 540 541 if (length>sizes[i]) sizes[i] = length; 542 543 } catch (IllegalAccessException e) { 544 logger.warn("Field {} is inaccessible", f.getName()); 545 continue; 546 } catch (ClassCastException e) { 547 logger.warn("Could not cast value to String for field {}",f.getName()); 548 continue; 549 } 550 } 551 } 552 return sizes; 553 } 554 555 /** 556 * Finds the max length of a list of strings 557 * Useful for producing mmCIF single-record data that is aligned for all values. 558 * @param names 559 * @return 560 * @see #toMMCIF(String, Object) 561 */ 562 private static int getMaxStringLength(String[] names) { 563 int size = 0; 564 for(String s : names) { 565 if(s.length()>size) { 566 size = s.length(); 567 } 568 } 569 return size; 570 } 571}