001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023
024import java.lang.reflect.Field;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.List;
028
029import org.biojava.nbio.structure.Atom;
030import org.biojava.nbio.structure.Chain;
031import org.biojava.nbio.structure.Element;
032import org.biojava.nbio.structure.Group;
033import org.biojava.nbio.structure.GroupType;
034import org.biojava.nbio.structure.Structure;
035import org.biojava.nbio.structure.io.FileConvert;
036import org.biojava.nbio.structure.io.mmcif.model.AbstractBean;
037import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
038import org.biojava.nbio.structure.io.mmcif.model.CIFLabel;
039import org.biojava.nbio.structure.io.mmcif.model.Cell;
040import org.biojava.nbio.structure.io.mmcif.model.IgnoreField;
041import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
042import org.biojava.nbio.structure.xtal.CrystalCell;
043import org.biojava.nbio.structure.xtal.SpaceGroup;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 * Some tools for mmCIF file writing.
049 *
050 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf
051 *
052 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}.
053 * By default, all fields from the bean are taken as the CIF labels. Fields
054 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}.
055 * The CIF label for a field may be changed (for instance, for fields that
056 * are not valid Java identifiers) by defining a function
057 * <tt>static Map<String,String> getCIFLabelMap()</tt>
058 * mapping from the field's name to the correct label.
059 * 
060 * @author Jose Duarte
061 * @author Spencer Bliven
062 */
063public class MMCIFFileTools {
064
065        private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class);
066
067        private static final String newline = System.getProperty("line.separator");
068
069        /**
070         * The character to be printed out in cases where a value is not assigned in mmCIF files
071         */
072        public static final String MMCIF_MISSING_VALUE = "?";
073
074        /**
075         * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs
076         */
077        public static final String MMCIF_DEFAULT_VALUE = ".";
078
079
080        /**
081         * Produces a mmCIF loop header string for the given categoryName and className.
082         * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package
083         * @param categoryName
084         * @param className
085         * @return
086         * @throws ClassNotFoundException if the given className can not be found
087         */
088        public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException {
089                StringBuilder str = new StringBuilder();
090
091                str.append(SimpleMMcifParser.LOOP_START+newline);
092
093                Class<?> c = Class.forName(className);
094
095                for (Field f : getFields(c)) {
096                        str.append(categoryName+"."+f.getName()+newline);
097                }
098
099                return str.toString();
100        }
101
102        /**
103         * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to
104         * a String representing it in mmCIF (single-record) format.
105         * @param categoryName
106         * @param o
107         * @return
108         */
109        public static String toMMCIF(String categoryName, Object o) {
110
111                StringBuilder sb = new StringBuilder();
112
113                Class<?> c = o.getClass();
114
115
116                Field[] fields = getFields(c);
117                String[] names = getFieldNames(fields);
118
119                int maxFieldNameLength = getMaxStringLength(names);
120
121                for (int i=0;i<fields.length;i++) {
122                        Field f = fields[i];
123                        String name = names[i];
124
125                        sb.append(categoryName).append(".").append(name);
126
127                        int spacing = maxFieldNameLength - name.length() + 3;
128
129                        try {
130                                Object obj = f.get(o);
131                                String val;
132                                if (obj==null) {
133                                        logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE);
134                                        val = MMCIF_MISSING_VALUE;
135                                } else {
136                                        val = (String) obj;
137                                }
138                                for (int j=0;j<spacing;j++) sb.append(' ');
139                                sb.append(addMmCifQuoting(val));
140                                sb.append(newline);
141
142                        } catch (IllegalAccessException e) {
143                                logger.warn("Field {} is inaccessible", name);
144                                continue;
145                        } catch (ClassCastException e) {
146                                logger.warn("Could not cast value to String for field {}",name);
147                                continue;
148                        }
149
150                }
151
152                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
153
154                return sb.toString();
155        }
156
157        /**
158         * Gets all fields for a particular class, filtering fields annotated
159         * with {@link IgnoreField @IgnoreField}.
160         * 
161         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
162         * on all fields.
163         * @param c
164         * @return
165         */
166        public static Field[] getFields(Class<?> c) {
167                Field[] allFields = c.getDeclaredFields();
168                Field[] fields = new Field[allFields.length];
169                int n = 0;
170                for(Field f : allFields) {
171                        f.setAccessible(true);
172                        IgnoreField anno = f.getAnnotation(IgnoreField.class);
173                        if(anno == null) {
174                                fields[n] = f;
175                                n++;
176                        }
177                }
178                return Arrays.copyOf(fields, n);
179        }
180
181        /**
182         * Gets the mmCIF record name for each field. This is generally just
183         * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation.
184         * 
185         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
186         * on all fields.
187         * @param fields
188         * @return
189         */
190        public static String[] getFieldNames(Field[] fields) {
191                String[] names = new String[fields.length];
192                for(int i=0;i<fields.length;i++) {
193                        Field f = fields[i];
194                        f.setAccessible(true);
195                        String rawName = fields[i].getName();
196                        CIFLabel cifLabel = f.getAnnotation(CIFLabel.class);
197                        if(cifLabel != null) {
198                                names[i] = cifLabel.label();
199                        } else {
200                                names[i] = rawName;
201                        }
202                }
203                return names;
204        }
205
206        /**
207         * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to
208         * a String representing them in mmCIF loop format with one record per line.
209         * @param list
210         * @return
211         */
212        public static <T> String toMMCIF(List<T> list, Class<T> klass) {
213                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
214
215                Field[] fields = getFields(klass);
216                int[] sizes = getFieldSizes(list,fields);
217
218                StringBuilder sb = new StringBuilder();
219
220                for (T o:list) {
221                        sb.append(toSingleLoopLineMmCifString(o, fields, sizes));
222                }
223
224                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
225
226                return sb.toString();
227        }
228
229        /**
230         * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line
231         * @param record
232         * @param fields Set of fields for the record. If null, will be calculated from the class of the record
233         * @param sizes the size of each of the fields
234         * @return
235         */
236        private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) {
237
238                StringBuilder str = new StringBuilder();
239
240                Class<?> c = record.getClass();
241
242                if(fields == null)
243                        fields = getFields(c);
244                
245                if (sizes.length!=fields.length)
246                        throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields");
247
248                int i = -1;
249                for (Field f : fields) {
250                        i++;
251                        f.setAccessible(true);
252
253                        try {
254                                Object obj = f.get(record);
255                                String val;
256                                if (obj==null) {
257                                        logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE);
258                                        val = MMCIF_MISSING_VALUE;
259                                } else {
260                                        val = (String) obj;
261                                }
262
263                                str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val)));
264
265
266                        } catch (IllegalAccessException e) {
267                                logger.warn("Field {} is inaccessible", f.getName());
268                                continue;
269                        } catch (ClassCastException e) {
270                                logger.warn("Could not cast value to String for field {}",f.getName());
271                                continue;
272                        }
273                }
274
275                str.append(newline);
276
277                return str.toString();
278
279        }
280
281        /**
282         * Adds quoting to a String according to the STAR format (mmCIF) rules
283         * @param val
284         * @return
285         */
286        private static String addMmCifQuoting(String val) {
287                String newval;
288
289                if (val.contains("'")) {
290                        // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does)
291                        newval = "\""+val+"\"";
292                } else if (val.contains(" ")) {
293                        // single quoting for stings containing spaces
294                        newval = "'"+val+"'";
295                } else {
296                        if (val.contains(" ") && val.contains("'")) {
297                                // TODO deal with this case
298                                logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val);
299                        }
300                        newval = val;
301                }
302                // TODO deal with all the other cases: e.g. multi-line quoting with ;;
303
304                return newval;
305        }
306
307        /**
308         * Converts a SpaceGroup object to a {@link Symmetry} object.
309         * @param sg
310         * @return
311         */
312        public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) {
313                Symmetry sym = new Symmetry();
314                sym.setSpace_group_name_H_M(sg.getShortSymbol());
315                // TODO do we need to fill any of the other values?
316                return sym;
317        }
318
319        /**
320         * Converts a CrystalCell object to a {@link Cell} object.
321         * @param c
322         * @return
323         */
324        public static Cell convertCrystalCellToCell(CrystalCell c) {
325                Cell cell = new Cell();
326                cell.setLength_a(String.format("%.3f",c.getA()));
327                cell.setLength_b(String.format("%.3f",c.getB()));
328                cell.setLength_c(String.format("%.3f",c.getC()));
329                cell.setAngle_alpha(String.format("%.3f",c.getAlpha()));
330                cell.setAngle_beta(String.format("%.3f",c.getBeta()));
331                cell.setAngle_gamma(String.format("%.3f",c.getGamma()));
332
333                return cell;
334        }
335
336        /**
337         * Converts an Atom object to an {@link AtomSite} object.
338         * @param a
339         * @param model
340         * @param chainId
341         * @param internalChainId
342         * @return
343         */
344        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId) {
345                return convertAtomToAtomSite(a, model, chainId, internalChainId, a.getPDBserial());
346        }
347
348        /**
349         * Converts an Atom object to an {@link AtomSite} object.
350         * @param a
351         * @param model
352         * @param chainId
353         * @param internalChainId
354         * @param atomId the atom id to be written to AtomSite
355         * @return
356         */
357        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId, int atomId) {
358
359                /*
360                ATOM 7    C CD  . GLU A 1 24  ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24  GLU A CD  1
361                ATOM 8    O OE1 . GLU A 1 24  ? -9.659  14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24  GLU A OE1 1
362                ATOM 9    O OE2 . GLU A 1 24  ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24  GLU A OE2 1
363                ATOM 10   N N   . LEU A 1 25  ? -5.907  18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25  LEU A N   1
364                ATOM 11   C CA  . LEU A 1 25  ? -5.168  19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25  LEU A CA  1
365                */
366
367                Group g = a.getGroup();
368
369                String record ;
370                if ( g.getType().equals(GroupType.HETATM) ) {
371                        record = "HETATM";
372                } else {
373                        record = "ATOM";
374                }
375
376                String entityId = "0";
377                String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum());
378                if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) {
379                        entityId = Integer.toString(g.getChain().getEntityInfo().getMolId());
380                        labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain()));
381                }
382
383                Character  altLoc = a.getAltLoc()           ;
384                String altLocStr;
385                if (altLoc==null || altLoc == ' ') {
386                        altLocStr = MMCIF_DEFAULT_VALUE;
387                } else {
388                        altLocStr = altLoc.toString();
389                }
390
391                Element e = a.getElement();
392                String eString = e.toString().toUpperCase();
393                if ( e.equals(Element.R)) {
394                        eString = "X";
395                }
396
397                String insCode = MMCIF_MISSING_VALUE;
398                if (g.getResidueNumber().getInsCode()!=null ) {
399                        insCode = Character.toString(g.getResidueNumber().getInsCode());
400                }
401
402                AtomSite atomSite = new AtomSite();
403                atomSite.setGroup_PDB(record);
404                atomSite.setId(Integer.toString(atomId));
405                atomSite.setType_symbol(eString);
406                atomSite.setLabel_atom_id(a.getName());
407                atomSite.setLabel_alt_id(altLocStr);
408                atomSite.setLabel_comp_id(g.getPDBName());
409                atomSite.setLabel_asym_id(internalChainId);
410                atomSite.setLabel_entity_id(entityId);
411                atomSite.setLabel_seq_id(labelSeqId);
412                atomSite.setPdbx_PDB_ins_code(insCode);
413                atomSite.setCartn_x(FileConvert.d3.format(a.getX()));
414                atomSite.setCartn_y(FileConvert.d3.format(a.getY()));
415                atomSite.setCartn_z(FileConvert.d3.format(a.getZ()));
416                atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy()));
417                atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor()));
418                atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum()));
419                atomSite.setAuth_comp_id(g.getPDBName());
420                atomSite.setAuth_asym_id(chainId);
421                atomSite.setAuth_atom_id(a.getName());
422                atomSite.setPdbx_PDB_model_num(Integer.toString(model));
423
424                return atomSite;
425        }
426
427        /**
428         * Converts a Group into a List of {@link AtomSite} objects
429         * @param g
430         * @param model
431         * @param chainId
432         * @param internalChainId
433         * @return
434         */
435        private static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainId, String internalChainId) {
436
437                List<AtomSite> list = new ArrayList<AtomSite>();
438
439                int groupsize  = g.size();
440
441                for ( int atompos = 0 ; atompos < groupsize; atompos++) {
442                        Atom a = null ;
443
444                        a = g.getAtom(atompos);
445                        if ( a == null)
446                                continue ;
447
448                        list.add(convertAtomToAtomSite(a, model, chainId, internalChainId));
449
450                }
451                if ( g.hasAltLoc()){
452                        for (Group alt : g.getAltLocs() ) {
453                                list.addAll(convertGroupToAtomSites(alt, model, chainId, internalChainId));
454                        }
455                }
456                return list;
457        }
458
459        /**
460         * Converts a Chain into a List of {@link AtomSite} objects
461         * @param c
462         * @param model
463         * @param authorId
464         * @param asymId
465         * @return
466         */
467        public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String authorId, String asymId) {
468
469                List<AtomSite> list = new ArrayList<AtomSite>();
470
471                if (c.getEntityInfo()==null) {
472                        logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName());
473                }
474
475                for ( int h=0; h<c.getAtomLength();h++){
476
477                        Group g= c.getAtomGroup(h);
478
479                        list.addAll(convertGroupToAtomSites(g, model, authorId, asymId));
480
481                }
482
483                return list;
484        }
485
486        /**
487         * Converts a Structure into a List of {@link AtomSite} objects
488         * @param s
489         * @return
490         */
491        public static List<AtomSite> convertStructureToAtomSites(Structure s) {
492                List<AtomSite> list = new ArrayList<AtomSite>();
493
494                for (int m=0;m<s.nrModels();m++) {
495                        for (Chain c:s.getChains(m)) {
496                                list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId()));
497                        }
498                }
499                return list;
500        }
501
502        /**
503         * Finds the max length of each of the String values contained in each of the fields of the given list of beans.
504         * Useful for producing mmCIF loop data that is aligned for all columns.
505         * @param list list of objects. All objects should have the same class.
506         * @param fields Set of fields for the record. If null, will be calculated from the class of the first record
507         * @return
508         * @see #toMMCIF(List, Class)
509         */
510        private static <T> int[] getFieldSizes(List<T> list, Field[] fields) {
511
512                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
513
514                if(fields == null)
515                        fields = getFields(list.get(0).getClass());
516
517                int[] sizes = new int [fields.length];
518
519
520                for (T a:list) {
521                        int i = -1;
522                        for (Field f : fields) {
523                                i++;
524
525                                f.setAccessible(true);
526
527                                try {
528                                        Object obj = f.get(a);
529                                        int length;
530                                        if (obj==null) {
531                                                length = MMCIF_MISSING_VALUE.length();
532                                        } else {
533                                                String val = (String) obj;
534                                                length = addMmCifQuoting(val).length();
535                                        }
536
537                                        if (length>sizes[i]) sizes[i] = length;
538
539                                } catch (IllegalAccessException e) {
540                                        logger.warn("Field {} is inaccessible", f.getName());
541                                        continue;
542                                } catch (ClassCastException e) {
543                                        logger.warn("Could not cast value to String for field {}",f.getName());
544                                        continue;
545                                }
546                        }
547                }
548                return sizes;
549        }
550
551        /**
552         * Finds the max length of a list of strings
553         * Useful for producing mmCIF single-record data that is aligned for all values.
554         * @param names
555         * @return
556         * @see #toMMCIF(String, Object)
557         */
558        private static int getMaxStringLength(String[] names) {
559                int size = 0;
560                for(String s : names) {
561                        if(s.length()>size) {
562                                size = s.length();
563                        }
564                }
565                return size;
566        }
567}