001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmcif; 022 023 024import java.lang.reflect.Field; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.List; 028 029import org.biojava.nbio.structure.Atom; 030import org.biojava.nbio.structure.Chain; 031import org.biojava.nbio.structure.Element; 032import org.biojava.nbio.structure.Group; 033import org.biojava.nbio.structure.GroupType; 034import org.biojava.nbio.structure.Structure; 035import org.biojava.nbio.structure.io.FileConvert; 036import org.biojava.nbio.structure.io.mmcif.model.AbstractBean; 037import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 038import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; 039import org.biojava.nbio.structure.io.mmcif.model.Cell; 040import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; 041import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 042import org.biojava.nbio.structure.xtal.CrystalCell; 043import org.biojava.nbio.structure.xtal.SpaceGroup; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * Some tools for mmCIF file writing. 049 * 050 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf 051 * 052 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}. 053 * By default, all fields from the bean are taken as the CIF labels. Fields 054 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}. 055 * The CIF label for a field may be changed (for instance, for fields that 056 * are not valid Java identifiers) by defining a function 057 * <tt>static Map<String,String> getCIFLabelMap()</tt> 058 * mapping from the field's name to the correct label. 059 * 060 * @author Jose Duarte 061 * @author Spencer Bliven 062 */ 063public class MMCIFFileTools { 064 065 private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class); 066 067 private static final String newline = System.getProperty("line.separator"); 068 069 /** 070 * The character to be printed out in cases where a value is not assigned in mmCIF files 071 */ 072 public static final String MMCIF_MISSING_VALUE = "?"; 073 074 /** 075 * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs 076 */ 077 public static final String MMCIF_DEFAULT_VALUE = "."; 078 079 080 /** 081 * Produces a mmCIF loop header string for the given categoryName and className. 082 * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package 083 * @param categoryName 084 * @param className 085 * @return 086 * @throws ClassNotFoundException if the given className can not be found 087 */ 088 public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException { 089 StringBuilder str = new StringBuilder(); 090 091 str.append(SimpleMMcifParser.LOOP_START+newline); 092 093 Class<?> c = Class.forName(className); 094 095 for (Field f : getFields(c)) { 096 str.append(categoryName+"."+f.getName()+newline); 097 } 098 099 return str.toString(); 100 } 101 102 /** 103 * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to 104 * a String representing it in mmCIF (single-record) format. 105 * @param categoryName 106 * @param o 107 * @return 108 */ 109 public static String toMMCIF(String categoryName, Object o) { 110 111 StringBuilder sb = new StringBuilder(); 112 113 Class<?> c = o.getClass(); 114 115 116 Field[] fields = getFields(c); 117 String[] names = getFieldNames(fields); 118 119 int maxFieldNameLength = getMaxStringLength(names); 120 121 for (int i=0;i<fields.length;i++) { 122 Field f = fields[i]; 123 String name = names[i]; 124 125 sb.append(categoryName+"."+name); 126 127 int spacing = maxFieldNameLength - name.length() + 3; 128 129 try { 130 Object obj = f.get(o); 131 String val; 132 if (obj==null) { 133 logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE); 134 val = MMCIF_MISSING_VALUE; 135 } else { 136 val = (String) obj; 137 } 138 for (int j=0;j<spacing;j++) sb.append(' '); 139 sb.append(addMmCifQuoting(val)); 140 sb.append(newline); 141 142 } catch (IllegalAccessException e) { 143 logger.warn("Field {} is inaccessible", name); 144 continue; 145 } catch (ClassCastException e) { 146 logger.warn("Could not cast value to String for field {}",name); 147 continue; 148 } 149 150 } 151 152 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 153 154 return sb.toString(); 155 } 156 157 /** 158 * Gets all fields for a particular class, filtering fields annotated 159 * with {@link IgnoreField @IgnoreField}. 160 * 161 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 162 * on all fields. 163 * @param c 164 * @return 165 */ 166 public static Field[] getFields(Class<?> c) { 167 Field[] allFields = c.getDeclaredFields(); 168 Field[] fields = new Field[allFields.length]; 169 int n = 0; 170 for(Field f : allFields) { 171 f.setAccessible(true); 172 IgnoreField anno = f.getAnnotation(IgnoreField.class); 173 if(anno == null) { 174 fields[n] = f; 175 n++; 176 } 177 } 178 return Arrays.copyOf(fields, n); 179 } 180 181 /** 182 * Gets the mmCIF record name for each field. This is generally just 183 * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation. 184 * 185 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 186 * on all fields. 187 * @param fields 188 * @return 189 */ 190 public static String[] getFieldNames(Field[] fields) { 191 String[] names = new String[fields.length]; 192 for(int i=0;i<fields.length;i++) { 193 Field f = fields[i]; 194 f.setAccessible(true); 195 String rawName = fields[i].getName(); 196 CIFLabel cifLabel = f.getAnnotation(CIFLabel.class); 197 if(cifLabel != null) { 198 names[i] = cifLabel.label(); 199 } else { 200 names[i] = rawName; 201 } 202 } 203 return names; 204 } 205 206 /** 207 * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to 208 * a String representing them in mmCIF loop format with one record per line. 209 * @param list 210 * @return 211 */ 212 public static <T> String toMMCIF(List<T> list, Class<T> klass) { 213 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 214 215 Field[] fields = getFields(klass); 216 int[] sizes = getFieldSizes(list,fields); 217 218 StringBuilder sb = new StringBuilder(); 219 220 for (T o:list) { 221 sb.append(toSingleLoopLineMmCifString(o, fields, sizes)); 222 } 223 224 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 225 226 return sb.toString(); 227 } 228 /** 229 * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to 230 * a String representing them in mmCIF loop format with one record per line. 231 * @param list 232 * @return 233 * @deprecated The {@link #toMMCIF(List, Class)} provides compile-time type safety 234 * @throws ClassCastException if not all list elements have the same type 235 */ 236 @Deprecated 237 @SuppressWarnings("unchecked") 238 public static <T> String toMMCIF(List<T> list) { 239 Class<T> klass = (Class<T>)list.get(0).getClass(); 240 for(T t : list) { 241 if( klass != t.getClass() ) { 242 throw new ClassCastException("Not all loop elements have the same fields"); 243 } 244 } 245 return toMMCIF(list,klass); 246 } 247 248 /** 249 * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line 250 * @param record 251 * @param fields Set of fields for the record. If null, will be calculated from the class of the record 252 * @param sizes the size of each of the fields 253 * @return 254 */ 255 private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) { 256 257 StringBuilder str = new StringBuilder(); 258 259 Class<?> c = record.getClass(); 260 261 if(fields == null) 262 fields = getFields(c); 263 264 if (sizes.length!=fields.length) 265 throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields"); 266 267 int i = -1; 268 for (Field f : fields) { 269 i++; 270 f.setAccessible(true); 271 272 try { 273 Object obj = f.get(record); 274 String val; 275 if (obj==null) { 276 logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE); 277 val = MMCIF_MISSING_VALUE; 278 } else { 279 val = (String) obj; 280 } 281 282 str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val))); 283 284 285 } catch (IllegalAccessException e) { 286 logger.warn("Field {} is inaccessible", f.getName()); 287 continue; 288 } catch (ClassCastException e) { 289 logger.warn("Could not cast value to String for field {}",f.getName()); 290 continue; 291 } 292 } 293 294 str.append(newline); 295 296 return str.toString(); 297 298 } 299 300 /** 301 * Adds quoting to a String according to the STAR format (mmCIF) rules 302 * @param val 303 * @return 304 */ 305 private static String addMmCifQuoting(String val) { 306 String newval; 307 308 if (val.contains("'")) { 309 // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does) 310 newval = "\""+val+"\""; 311 } else if (val.contains(" ")) { 312 // single quoting for stings containing spaces 313 newval = "'"+val+"'"; 314 } else { 315 if (val.contains(" ") && val.contains("'")) { 316 // TODO deal with this case 317 logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val); 318 } 319 newval = val; 320 } 321 // TODO deal with all the other cases: e.g. multi-line quoting with ;; 322 323 return newval; 324 } 325 326 /** 327 * Converts a SpaceGroup object to a {@link Symmetry} object. 328 * @param sg 329 * @return 330 */ 331 public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) { 332 Symmetry sym = new Symmetry(); 333 sym.setSpace_group_name_H_M(sg.getShortSymbol()); 334 // TODO do we need to fill any of the other values? 335 return sym; 336 } 337 338 /** 339 * Converts a CrystalCell object to a {@link Cell} object. 340 * @param c 341 * @return 342 */ 343 public static Cell convertCrystalCellToCell(CrystalCell c) { 344 Cell cell = new Cell(); 345 cell.setLength_a(String.format("%.3f",c.getA())); 346 cell.setLength_b(String.format("%.3f",c.getB())); 347 cell.setLength_c(String.format("%.3f",c.getC())); 348 cell.setAngle_alpha(String.format("%.3f",c.getAlpha())); 349 cell.setAngle_beta(String.format("%.3f",c.getBeta())); 350 cell.setAngle_gamma(String.format("%.3f",c.getGamma())); 351 352 return cell; 353 } 354 355 /** 356 * Converts an Atom object to an {@link AtomSite} object. 357 * @param a 358 * @param model 359 * @param chainId 360 * @param internalChainId 361 * @return 362 */ 363 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId) { 364 return convertAtomToAtomSite(a, model, chainId, internalChainId, a.getPDBserial()); 365 } 366 367 /** 368 * Converts an Atom object to an {@link AtomSite} object. 369 * @param a 370 * @param model 371 * @param chainId 372 * @param internalChainId 373 * @param atomId the atom id to be written to AtomSite 374 * @return 375 */ 376 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId, int atomId) { 377 378 /* 379 ATOM 7 C CD . GLU A 1 24 ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24 GLU A CD 1 380 ATOM 8 O OE1 . GLU A 1 24 ? -9.659 14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24 GLU A OE1 1 381 ATOM 9 O OE2 . GLU A 1 24 ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24 GLU A OE2 1 382 ATOM 10 N N . LEU A 1 25 ? -5.907 18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25 LEU A N 1 383 ATOM 11 C CA . LEU A 1 25 ? -5.168 19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25 LEU A CA 1 384 */ 385 386 Group g = a.getGroup(); 387 388 String record ; 389 if ( g.getType().equals(GroupType.HETATM) ) { 390 record = "HETATM"; 391 } else { 392 record = "ATOM"; 393 } 394 395 String entityId = "0"; 396 String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum()); 397 if (g.getChain()!=null && g.getChain().getCompound()!=null) { 398 entityId = Integer.toString(g.getChain().getCompound().getMolId()); 399 labelSeqId = Integer.toString(g.getChain().getCompound().getAlignedResIndex(g, g.getChain())); 400 } 401 402 Character altLoc = a.getAltLoc() ; 403 String altLocStr; 404 if (altLoc==null || altLoc == ' ') { 405 altLocStr = MMCIF_DEFAULT_VALUE; 406 } else { 407 altLocStr = altLoc.toString(); 408 } 409 410 Element e = a.getElement(); 411 String eString = e.toString().toUpperCase(); 412 if ( e.equals(Element.R)) { 413 eString = "X"; 414 } 415 416 String insCode = MMCIF_MISSING_VALUE; 417 if (g.getResidueNumber().getInsCode()!=null ) { 418 insCode = Character.toString(g.getResidueNumber().getInsCode()); 419 } 420 421 AtomSite atomSite = new AtomSite(); 422 atomSite.setGroup_PDB(record); 423 atomSite.setId(Integer.toString(atomId)); 424 atomSite.setType_symbol(eString); 425 atomSite.setLabel_atom_id(a.getName()); 426 atomSite.setLabel_alt_id(altLocStr); 427 atomSite.setLabel_comp_id(g.getPDBName()); 428 atomSite.setLabel_asym_id(internalChainId); 429 atomSite.setLabel_entity_id(entityId); 430 atomSite.setLabel_seq_id(labelSeqId); 431 atomSite.setPdbx_PDB_ins_code(insCode); 432 atomSite.setCartn_x(FileConvert.d3.format(a.getX())); 433 atomSite.setCartn_y(FileConvert.d3.format(a.getY())); 434 atomSite.setCartn_z(FileConvert.d3.format(a.getZ())); 435 atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy())); 436 atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor())); 437 atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum())); 438 atomSite.setAuth_comp_id(g.getPDBName()); 439 atomSite.setAuth_asym_id(chainId); 440 atomSite.setAuth_atom_id(a.getName()); 441 atomSite.setPdbx_PDB_model_num(Integer.toString(model)); 442 443 return atomSite; 444 } 445 446 /** 447 * Converts a Group into a List of {@link AtomSite} objects 448 * @param g 449 * @param model 450 * @param chainId 451 * @param internalChainId 452 * @return 453 */ 454 private static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainId, String internalChainId) { 455 456 List<AtomSite> list = new ArrayList<AtomSite>(); 457 458 int groupsize = g.size(); 459 460 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 461 Atom a = null ; 462 463 a = g.getAtom(atompos); 464 if ( a == null) 465 continue ; 466 467 list.add(convertAtomToAtomSite(a, model, chainId, internalChainId)); 468 469 } 470 if ( g.hasAltLoc()){ 471 for (Group alt : g.getAltLocs() ) { 472 list.addAll(convertGroupToAtomSites(alt, model, chainId, internalChainId)); 473 } 474 } 475 return list; 476 } 477 478 /** 479 * Converts a Chain into a List of {@link AtomSite} objects 480 * @param c 481 * @param model 482 * @param chainId 483 * @param internalChainId 484 * @return 485 */ 486 public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String chainId, String internalChainId) { 487 488 List<AtomSite> list = new ArrayList<AtomSite>(); 489 490 if (c.getCompound()==null) { 491 logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getChainID()); 492 } 493 494 for ( int h=0; h<c.getAtomLength();h++){ 495 496 Group g= c.getAtomGroup(h); 497 498 list.addAll(convertGroupToAtomSites(g, model, chainId, internalChainId)); 499 500 } 501 502 return list; 503 } 504 505 /** 506 * Converts a Structure into a List of {@link AtomSite} objects 507 * @param s 508 * @return 509 */ 510 public static List<AtomSite> convertStructureToAtomSites(Structure s) { 511 List<AtomSite> list = new ArrayList<AtomSite>(); 512 513 for (int m=0;m<s.nrModels();m++) { 514 for (Chain c:s.getChains(m)) { 515 list.addAll(convertChainToAtomSites(c, m+1, c.getChainID(), c.getInternalChainID())); 516 } 517 } 518 return list; 519 } 520 521 /** 522 * Finds the max length of each of the String values contained in each of the fields of the given list of beans. 523 * Useful for producing mmCIF loop data that is aligned for all columns. 524 * @param list list of objects. All objects should have the same class. 525 * @param fields Set of fields for the record. If null, will be calculated from the class of the first record 526 * @return 527 * @see #toMMCIF(List, Class) 528 */ 529 private static <T> int[] getFieldSizes(List<T> list, Field[] fields) { 530 531 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 532 533 if(fields == null) 534 fields = getFields(list.get(0).getClass()); 535 536 int[] sizes = new int [fields.length]; 537 538 539 for (T a:list) { 540 int i = -1; 541 for (Field f : fields) { 542 i++; 543 544 f.setAccessible(true); 545 546 try { 547 Object obj = f.get(a); 548 int length; 549 if (obj==null) { 550 length = MMCIF_MISSING_VALUE.length(); 551 } else { 552 String val = (String) obj; 553 length = addMmCifQuoting(val).length(); 554 } 555 556 if (length>sizes[i]) sizes[i] = length; 557 558 } catch (IllegalAccessException e) { 559 logger.warn("Field {} is inaccessible", f.getName()); 560 continue; 561 } catch (ClassCastException e) { 562 logger.warn("Could not cast value to String for field {}",f.getName()); 563 continue; 564 } 565 } 566 } 567 return sizes; 568 } 569 570 /** 571 * Finds the max length of a list of strings 572 * Useful for producing mmCIF single-record data that is aligned for all values. 573 * @param names 574 * @return 575 * @see #toMMCIF(String, Object) 576 */ 577 private static int getMaxStringLength(String[] names) { 578 int size = 0; 579 for(String s : names) { 580 if(s.length()>size) { 581 size = s.length(); 582 } 583 } 584 return size; 585 } 586}