001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.io.mmcif; 022 023 024import java.lang.reflect.Field; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.List; 028 029import org.biojava.nbio.structure.Atom; 030import org.biojava.nbio.structure.Chain; 031import org.biojava.nbio.structure.Element; 032import org.biojava.nbio.structure.Group; 033import org.biojava.nbio.structure.GroupType; 034import org.biojava.nbio.structure.Structure; 035import org.biojava.nbio.structure.io.FileConvert; 036import org.biojava.nbio.structure.io.mmcif.model.AbstractBean; 037import org.biojava.nbio.structure.io.mmcif.model.AtomSite; 038import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; 039import org.biojava.nbio.structure.io.mmcif.model.Cell; 040import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; 041import org.biojava.nbio.structure.io.mmcif.model.Symmetry; 042import org.biojava.nbio.structure.xtal.CrystalCell; 043import org.biojava.nbio.structure.xtal.SpaceGroup; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * Some tools for mmCIF file writing. 049 * 050 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf 051 * 052 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}. 053 * By default, all fields from the bean are taken as the CIF labels. Fields 054 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}. 055 * The CIF label for a field may be changed (for instance, for fields that 056 * are not valid Java identifiers) by defining a function 057 * <tt>static Map<String,String> getCIFLabelMap()</tt> 058 * mapping from the field's name to the correct label. 059 * 060 * @author Jose Duarte 061 * @author Spencer Bliven 062 */ 063public class MMCIFFileTools { 064 065 private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class); 066 067 private static final String newline = System.getProperty("line.separator"); 068 069 /** 070 * The character to be printed out in cases where a value is not assigned in mmCIF files 071 */ 072 public static final String MMCIF_MISSING_VALUE = "?"; 073 074 /** 075 * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs 076 */ 077 public static final String MMCIF_DEFAULT_VALUE = "."; 078 079 080 /** 081 * Produces a mmCIF loop header string for the given categoryName and className. 082 * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package 083 * @param categoryName 084 * @param className 085 * @return 086 * @throws ClassNotFoundException if the given className can not be found 087 */ 088 public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException { 089 StringBuilder str = new StringBuilder(); 090 091 str.append(SimpleMMcifParser.LOOP_START+newline); 092 093 Class<?> c = Class.forName(className); 094 095 for (Field f : getFields(c)) { 096 str.append(categoryName+"."+f.getName()+newline); 097 } 098 099 return str.toString(); 100 } 101 102 /** 103 * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to 104 * a String representing it in mmCIF (single-record) format. 105 * @param categoryName 106 * @param o 107 * @return 108 */ 109 public static String toMMCIF(String categoryName, Object o) { 110 111 StringBuilder sb = new StringBuilder(); 112 113 Class<?> c = o.getClass(); 114 115 116 Field[] fields = getFields(c); 117 String[] names = getFieldNames(fields); 118 119 int maxFieldNameLength = getMaxStringLength(names); 120 121 for (int i=0;i<fields.length;i++) { 122 Field f = fields[i]; 123 String name = names[i]; 124 125 sb.append(categoryName).append(".").append(name); 126 127 int spacing = maxFieldNameLength - name.length() + 3; 128 129 try { 130 Object obj = f.get(o); 131 String val; 132 if (obj==null) { 133 logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE); 134 val = MMCIF_MISSING_VALUE; 135 } else { 136 val = (String) obj; 137 } 138 for (int j=0;j<spacing;j++) sb.append(' '); 139 sb.append(addMmCifQuoting(val)); 140 sb.append(newline); 141 142 } catch (IllegalAccessException e) { 143 logger.warn("Field {} is inaccessible", name); 144 continue; 145 } catch (ClassCastException e) { 146 logger.warn("Could not cast value to String for field {}",name); 147 continue; 148 } 149 150 } 151 152 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 153 154 return sb.toString(); 155 } 156 157 /** 158 * Gets all fields for a particular class, filtering fields annotated 159 * with {@link IgnoreField @IgnoreField}. 160 * 161 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 162 * on all fields. 163 * @param c 164 * @return 165 */ 166 public static Field[] getFields(Class<?> c) { 167 Field[] allFields = c.getDeclaredFields(); 168 Field[] fields = new Field[allFields.length]; 169 int n = 0; 170 for(Field f : allFields) { 171 f.setAccessible(true); 172 IgnoreField anno = f.getAnnotation(IgnoreField.class); 173 if(anno == null) { 174 fields[n] = f; 175 n++; 176 } 177 } 178 return Arrays.copyOf(fields, n); 179 } 180 181 /** 182 * Gets the mmCIF record name for each field. This is generally just 183 * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation. 184 * 185 * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} 186 * on all fields. 187 * @param fields 188 * @return 189 */ 190 public static String[] getFieldNames(Field[] fields) { 191 String[] names = new String[fields.length]; 192 for(int i=0;i<fields.length;i++) { 193 Field f = fields[i]; 194 f.setAccessible(true); 195 String rawName = fields[i].getName(); 196 CIFLabel cifLabel = f.getAnnotation(CIFLabel.class); 197 if(cifLabel != null) { 198 names[i] = cifLabel.label(); 199 } else { 200 names[i] = rawName; 201 } 202 } 203 return names; 204 } 205 206 /** 207 * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to 208 * a String representing them in mmCIF loop format with one record per line. 209 * @param list 210 * @return 211 */ 212 public static <T> String toMMCIF(List<T> list, Class<T> klass) { 213 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 214 215 Field[] fields = getFields(klass); 216 int[] sizes = getFieldSizes(list,fields); 217 218 StringBuilder sb = new StringBuilder(); 219 220 for (T o:list) { 221 sb.append(toSingleLoopLineMmCifString(o, fields, sizes)); 222 } 223 224 sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); 225 226 return sb.toString(); 227 } 228 229 /** 230 * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line 231 * @param record 232 * @param fields Set of fields for the record. If null, will be calculated from the class of the record 233 * @param sizes the size of each of the fields 234 * @return 235 */ 236 private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) { 237 238 StringBuilder str = new StringBuilder(); 239 240 Class<?> c = record.getClass(); 241 242 if(fields == null) 243 fields = getFields(c); 244 245 if (sizes.length!=fields.length) 246 throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields"); 247 248 int i = -1; 249 for (Field f : fields) { 250 i++; 251 f.setAccessible(true); 252 253 try { 254 Object obj = f.get(record); 255 String val; 256 if (obj==null) { 257 logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE); 258 val = MMCIF_MISSING_VALUE; 259 } else { 260 val = (String) obj; 261 } 262 263 str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val))); 264 265 266 } catch (IllegalAccessException e) { 267 logger.warn("Field {} is inaccessible", f.getName()); 268 continue; 269 } catch (ClassCastException e) { 270 logger.warn("Could not cast value to String for field {}",f.getName()); 271 continue; 272 } 273 } 274 275 str.append(newline); 276 277 return str.toString(); 278 279 } 280 281 /** 282 * Adds quoting to a String according to the STAR format (mmCIF) rules 283 * @param val 284 * @return 285 */ 286 private static String addMmCifQuoting(String val) { 287 String newval; 288 289 if (val.contains("'")) { 290 // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does) 291 newval = "\""+val+"\""; 292 } else if (val.contains(" ")) { 293 // single quoting for stings containing spaces 294 newval = "'"+val+"'"; 295 } else { 296 if (val.contains(" ") && val.contains("'")) { 297 // TODO deal with this case 298 logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val); 299 } 300 newval = val; 301 } 302 // TODO deal with all the other cases: e.g. multi-line quoting with ;; 303 304 return newval; 305 } 306 307 /** 308 * Converts a SpaceGroup object to a {@link Symmetry} object. 309 * @param sg 310 * @return 311 */ 312 public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) { 313 Symmetry sym = new Symmetry(); 314 sym.setSpace_group_name_H_M(sg.getShortSymbol()); 315 // TODO do we need to fill any of the other values? 316 return sym; 317 } 318 319 /** 320 * Converts a CrystalCell object to a {@link Cell} object. 321 * @param c 322 * @return 323 */ 324 public static Cell convertCrystalCellToCell(CrystalCell c) { 325 Cell cell = new Cell(); 326 cell.setLength_a(String.format("%.3f",c.getA())); 327 cell.setLength_b(String.format("%.3f",c.getB())); 328 cell.setLength_c(String.format("%.3f",c.getC())); 329 cell.setAngle_alpha(String.format("%.3f",c.getAlpha())); 330 cell.setAngle_beta(String.format("%.3f",c.getBeta())); 331 cell.setAngle_gamma(String.format("%.3f",c.getGamma())); 332 333 return cell; 334 } 335 336 /** 337 * Converts an Atom object to an {@link AtomSite} object. 338 * @param a 339 * @param model 340 * @param chainId 341 * @param internalChainId 342 * @return 343 */ 344 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId) { 345 return convertAtomToAtomSite(a, model, chainId, internalChainId, a.getPDBserial()); 346 } 347 348 /** 349 * Converts an Atom object to an {@link AtomSite} object. 350 * @param a 351 * @param model 352 * @param chainId 353 * @param internalChainId 354 * @param atomId the atom id to be written to AtomSite 355 * @return 356 */ 357 public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId, int atomId) { 358 359 /* 360 ATOM 7 C CD . GLU A 1 24 ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24 GLU A CD 1 361 ATOM 8 O OE1 . GLU A 1 24 ? -9.659 14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24 GLU A OE1 1 362 ATOM 9 O OE2 . GLU A 1 24 ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24 GLU A OE2 1 363 ATOM 10 N N . LEU A 1 25 ? -5.907 18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25 LEU A N 1 364 ATOM 11 C CA . LEU A 1 25 ? -5.168 19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25 LEU A CA 1 365 */ 366 367 Group g = a.getGroup(); 368 369 String record ; 370 if ( g.getType().equals(GroupType.HETATM) ) { 371 record = "HETATM"; 372 } else { 373 record = "ATOM"; 374 } 375 376 String entityId = "0"; 377 String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum()); 378 if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) { 379 entityId = Integer.toString(g.getChain().getEntityInfo().getMolId()); 380 labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain())); 381 } 382 383 Character altLoc = a.getAltLoc() ; 384 String altLocStr; 385 if (altLoc==null || altLoc == ' ') { 386 altLocStr = MMCIF_DEFAULT_VALUE; 387 } else { 388 altLocStr = altLoc.toString(); 389 } 390 391 Element e = a.getElement(); 392 String eString = e.toString().toUpperCase(); 393 if ( e.equals(Element.R)) { 394 eString = "X"; 395 } 396 397 String insCode = MMCIF_MISSING_VALUE; 398 if (g.getResidueNumber().getInsCode()!=null ) { 399 insCode = Character.toString(g.getResidueNumber().getInsCode()); 400 } 401 402 AtomSite atomSite = new AtomSite(); 403 atomSite.setGroup_PDB(record); 404 atomSite.setId(Integer.toString(atomId)); 405 atomSite.setType_symbol(eString); 406 atomSite.setLabel_atom_id(a.getName()); 407 atomSite.setLabel_alt_id(altLocStr); 408 atomSite.setLabel_comp_id(g.getPDBName()); 409 atomSite.setLabel_asym_id(internalChainId); 410 atomSite.setLabel_entity_id(entityId); 411 atomSite.setLabel_seq_id(labelSeqId); 412 atomSite.setPdbx_PDB_ins_code(insCode); 413 atomSite.setCartn_x(FileConvert.d3.format(a.getX())); 414 atomSite.setCartn_y(FileConvert.d3.format(a.getY())); 415 atomSite.setCartn_z(FileConvert.d3.format(a.getZ())); 416 atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy())); 417 atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor())); 418 atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum())); 419 atomSite.setAuth_comp_id(g.getPDBName()); 420 atomSite.setAuth_asym_id(chainId); 421 atomSite.setAuth_atom_id(a.getName()); 422 atomSite.setPdbx_PDB_model_num(Integer.toString(model)); 423 424 return atomSite; 425 } 426 427 /** 428 * Converts a Group into a List of {@link AtomSite} objects 429 * @param g 430 * @param model 431 * @param chainId 432 * @param internalChainId 433 * @return 434 */ 435 private static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainId, String internalChainId) { 436 437 List<AtomSite> list = new ArrayList<AtomSite>(); 438 439 int groupsize = g.size(); 440 441 for ( int atompos = 0 ; atompos < groupsize; atompos++) { 442 Atom a = null ; 443 444 a = g.getAtom(atompos); 445 if ( a == null) 446 continue ; 447 448 list.add(convertAtomToAtomSite(a, model, chainId, internalChainId)); 449 450 } 451 if ( g.hasAltLoc()){ 452 for (Group alt : g.getAltLocs() ) { 453 list.addAll(convertGroupToAtomSites(alt, model, chainId, internalChainId)); 454 } 455 } 456 return list; 457 } 458 459 /** 460 * Converts a Chain into a List of {@link AtomSite} objects 461 * @param c 462 * @param model 463 * @param authorId 464 * @param asymId 465 * @return 466 */ 467 public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String authorId, String asymId) { 468 469 List<AtomSite> list = new ArrayList<AtomSite>(); 470 471 if (c.getEntityInfo()==null) { 472 logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName()); 473 } 474 475 for ( int h=0; h<c.getAtomLength();h++){ 476 477 Group g= c.getAtomGroup(h); 478 479 list.addAll(convertGroupToAtomSites(g, model, authorId, asymId)); 480 481 } 482 483 return list; 484 } 485 486 /** 487 * Converts a Structure into a List of {@link AtomSite} objects 488 * @param s 489 * @return 490 */ 491 public static List<AtomSite> convertStructureToAtomSites(Structure s) { 492 List<AtomSite> list = new ArrayList<AtomSite>(); 493 494 for (int m=0;m<s.nrModels();m++) { 495 for (Chain c:s.getChains(m)) { 496 list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId())); 497 } 498 } 499 return list; 500 } 501 502 /** 503 * Finds the max length of each of the String values contained in each of the fields of the given list of beans. 504 * Useful for producing mmCIF loop data that is aligned for all columns. 505 * @param list list of objects. All objects should have the same class. 506 * @param fields Set of fields for the record. If null, will be calculated from the class of the first record 507 * @return 508 * @see #toMMCIF(List, Class) 509 */ 510 private static <T> int[] getFieldSizes(List<T> list, Field[] fields) { 511 512 if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); 513 514 if(fields == null) 515 fields = getFields(list.get(0).getClass()); 516 517 int[] sizes = new int [fields.length]; 518 519 520 for (T a:list) { 521 int i = -1; 522 for (Field f : fields) { 523 i++; 524 525 f.setAccessible(true); 526 527 try { 528 Object obj = f.get(a); 529 int length; 530 if (obj==null) { 531 length = MMCIF_MISSING_VALUE.length(); 532 } else { 533 String val = (String) obj; 534 length = addMmCifQuoting(val).length(); 535 } 536 537 if (length>sizes[i]) sizes[i] = length; 538 539 } catch (IllegalAccessException e) { 540 logger.warn("Field {} is inaccessible", f.getName()); 541 continue; 542 } catch (ClassCastException e) { 543 logger.warn("Could not cast value to String for field {}",f.getName()); 544 continue; 545 } 546 } 547 } 548 return sizes; 549 } 550 551 /** 552 * Finds the max length of a list of strings 553 * Useful for producing mmCIF single-record data that is aligned for all values. 554 * @param names 555 * @return 556 * @see #toMMCIF(String, Object) 557 */ 558 private static int getMaxStringLength(String[] names) { 559 int size = 0; 560 for(String s : names) { 561 if(s.length()>size) { 562 size = s.length(); 563 } 564 } 565 return size; 566 } 567}