001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023
024import java.lang.reflect.Field;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.List;
028
029import org.biojava.nbio.structure.Atom;
030import org.biojava.nbio.structure.Chain;
031import org.biojava.nbio.structure.Element;
032import org.biojava.nbio.structure.Group;
033import org.biojava.nbio.structure.GroupType;
034import org.biojava.nbio.structure.Structure;
035import org.biojava.nbio.structure.io.FileConvert;
036import org.biojava.nbio.structure.io.mmcif.model.AbstractBean;
037import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
038import org.biojava.nbio.structure.io.mmcif.model.CIFLabel;
039import org.biojava.nbio.structure.io.mmcif.model.Cell;
040import org.biojava.nbio.structure.io.mmcif.model.IgnoreField;
041import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
042import org.biojava.nbio.structure.xtal.CrystalCell;
043import org.biojava.nbio.structure.xtal.SpaceGroup;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 * Some tools for mmCIF file writing.
049 *
050 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf
051 *
052 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}.
053 * By default, all fields from the bean are taken as the CIF labels. Fields
054 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}.
055 * The CIF label for a field may be changed (for instance, for fields that
056 * are not valid Java identifiers) by defining a function
057 * <tt>static Map<String,String> getCIFLabelMap()</tt>
058 * mapping from the field's name to the correct label.
059 * 
060 * @author Jose Duarte
061 * @author Spencer Bliven
062 */
063public class MMCIFFileTools {
064
065        private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class);
066
067        private static final String newline = System.getProperty("line.separator");
068
069        /**
070         * The character to be printed out in cases where a value is not assigned in mmCIF files
071         */
072        public static final String MMCIF_MISSING_VALUE = "?";
073
074        /**
075         * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs
076         */
077        public static final String MMCIF_DEFAULT_VALUE = ".";
078
079
080        /**
081         * Produces a mmCIF loop header string for the given categoryName and className.
082         * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package
083         * @param categoryName
084         * @param className
085         * @return
086         * @throws ClassNotFoundException if the given className can not be found
087         */
088        public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException {
089                StringBuilder str = new StringBuilder();
090
091                str.append(SimpleMMcifParser.LOOP_START+newline);
092
093                Class<?> c = Class.forName(className);
094
095                for (Field f : getFields(c)) {
096                        str.append(categoryName+"."+f.getName()+newline);
097                }
098
099                return str.toString();
100        }
101
102        /**
103         * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to
104         * a String representing it in mmCIF (single-record) format.
105         * @param categoryName
106         * @param o
107         * @return
108         */
109        public static String toMMCIF(String categoryName, Object o) {
110
111                StringBuilder sb = new StringBuilder();
112
113                Class<?> c = o.getClass();
114
115
116                Field[] fields = getFields(c);
117                String[] names = getFieldNames(fields);
118
119                int maxFieldNameLength = getMaxStringLength(names);
120
121                for (int i=0;i<fields.length;i++) {
122                        Field f = fields[i];
123                        String name = names[i];
124
125                        sb.append(categoryName+"."+name);
126
127                        int spacing = maxFieldNameLength - name.length() + 3;
128
129                        try {
130                                Object obj = f.get(o);
131                                String val;
132                                if (obj==null) {
133                                        logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE);
134                                        val = MMCIF_MISSING_VALUE;
135                                } else {
136                                        val = (String) obj;
137                                }
138                                for (int j=0;j<spacing;j++) sb.append(' ');
139                                sb.append(addMmCifQuoting(val));
140                                sb.append(newline);
141
142                        } catch (IllegalAccessException e) {
143                                logger.warn("Field {} is inaccessible", name);
144                                continue;
145                        } catch (ClassCastException e) {
146                                logger.warn("Could not cast value to String for field {}",name);
147                                continue;
148                        }
149
150                }
151
152                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
153
154                return sb.toString();
155        }
156
157        /**
158         * Gets all fields for a particular class, filtering fields annotated
159         * with {@link IgnoreField @IgnoreField}.
160         * 
161         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
162         * on all fields.
163         * @param c
164         * @return
165         */
166        public static Field[] getFields(Class<?> c) {
167                Field[] allFields = c.getDeclaredFields();
168                Field[] fields = new Field[allFields.length];
169                int n = 0;
170                for(Field f : allFields) {
171                        f.setAccessible(true);
172                        IgnoreField anno = f.getAnnotation(IgnoreField.class);
173                        if(anno == null) {
174                                fields[n] = f;
175                                n++;
176                        }
177                }
178                return Arrays.copyOf(fields, n);
179        }
180
181        /**
182         * Gets the mmCIF record name for each field. This is generally just
183         * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation.
184         * 
185         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
186         * on all fields.
187         * @param fields
188         * @return
189         */
190        public static String[] getFieldNames(Field[] fields) {
191                String[] names = new String[fields.length];
192                for(int i=0;i<fields.length;i++) {
193                        Field f = fields[i];
194                        f.setAccessible(true);
195                        String rawName = fields[i].getName();
196                        CIFLabel cifLabel = f.getAnnotation(CIFLabel.class);
197                        if(cifLabel != null) {
198                                names[i] = cifLabel.label();
199                        } else {
200                                names[i] = rawName;
201                        }
202                }
203                return names;
204        }
205
206        /**
207         * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to
208         * a String representing them in mmCIF loop format with one record per line.
209         * @param list
210         * @return
211         */
212        public static <T> String toMMCIF(List<T> list, Class<T> klass) {
213                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
214
215                Field[] fields = getFields(klass);
216                int[] sizes = getFieldSizes(list,fields);
217
218                StringBuilder sb = new StringBuilder();
219
220                for (T o:list) {
221                        sb.append(toSingleLoopLineMmCifString(o, fields, sizes));
222                }
223
224                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
225
226                return sb.toString();
227        }
228        /**
229         * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to
230         * a String representing them in mmCIF loop format with one record per line.
231         * @param list
232         * @return
233         * @deprecated The {@link #toMMCIF(List, Class)} provides compile-time type safety
234         * @throws ClassCastException if not all list elements have the same type
235         */
236        @Deprecated
237        @SuppressWarnings("unchecked")
238        public static <T> String toMMCIF(List<T> list) {
239                Class<T> klass = (Class<T>)list.get(0).getClass();
240                for(T t : list) {
241                        if( klass != t.getClass() ) {
242                                throw new ClassCastException("Not all loop elements have the same fields");
243                        }
244                }
245                return toMMCIF(list,klass);
246        }
247
248        /**
249         * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line
250         * @param record
251         * @param fields Set of fields for the record. If null, will be calculated from the class of the record
252         * @param sizes the size of each of the fields
253         * @return
254         */
255        private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) {
256
257                StringBuilder str = new StringBuilder();
258
259                Class<?> c = record.getClass();
260
261                if(fields == null)
262                        fields = getFields(c);
263                
264                if (sizes.length!=fields.length)
265                        throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields");
266
267                int i = -1;
268                for (Field f : fields) {
269                        i++;
270                        f.setAccessible(true);
271
272                        try {
273                                Object obj = f.get(record);
274                                String val;
275                                if (obj==null) {
276                                        logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE);
277                                        val = MMCIF_MISSING_VALUE;
278                                } else {
279                                        val = (String) obj;
280                                }
281
282                                str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val)));
283
284
285                        } catch (IllegalAccessException e) {
286                                logger.warn("Field {} is inaccessible", f.getName());
287                                continue;
288                        } catch (ClassCastException e) {
289                                logger.warn("Could not cast value to String for field {}",f.getName());
290                                continue;
291                        }
292                }
293
294                str.append(newline);
295
296                return str.toString();
297
298        }
299
300        /**
301         * Adds quoting to a String according to the STAR format (mmCIF) rules
302         * @param val
303         * @return
304         */
305        private static String addMmCifQuoting(String val) {
306                String newval;
307
308                if (val.contains("'")) {
309                        // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does)
310                        newval = "\""+val+"\"";
311                } else if (val.contains(" ")) {
312                        // single quoting for stings containing spaces
313                        newval = "'"+val+"'";
314                } else {
315                        if (val.contains(" ") && val.contains("'")) {
316                                // TODO deal with this case
317                                logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val);
318                        }
319                        newval = val;
320                }
321                // TODO deal with all the other cases: e.g. multi-line quoting with ;;
322
323                return newval;
324        }
325
326        /**
327         * Converts a SpaceGroup object to a {@link Symmetry} object.
328         * @param sg
329         * @return
330         */
331        public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) {
332                Symmetry sym = new Symmetry();
333                sym.setSpace_group_name_H_M(sg.getShortSymbol());
334                // TODO do we need to fill any of the other values?
335                return sym;
336        }
337
338        /**
339         * Converts a CrystalCell object to a {@link Cell} object.
340         * @param c
341         * @return
342         */
343        public static Cell convertCrystalCellToCell(CrystalCell c) {
344                Cell cell = new Cell();
345                cell.setLength_a(String.format("%.3f",c.getA()));
346                cell.setLength_b(String.format("%.3f",c.getB()));
347                cell.setLength_c(String.format("%.3f",c.getC()));
348                cell.setAngle_alpha(String.format("%.3f",c.getAlpha()));
349                cell.setAngle_beta(String.format("%.3f",c.getBeta()));
350                cell.setAngle_gamma(String.format("%.3f",c.getGamma()));
351
352                return cell;
353        }
354
355        /**
356         * Converts an Atom object to an {@link AtomSite} object.
357         * @param a
358         * @param model
359         * @param chainId
360         * @param internalChainId
361         * @return
362         */
363        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId) {
364                return convertAtomToAtomSite(a, model, chainId, internalChainId, a.getPDBserial());
365        }
366
367        /**
368         * Converts an Atom object to an {@link AtomSite} object.
369         * @param a
370         * @param model
371         * @param chainId
372         * @param internalChainId
373         * @param atomId the atom id to be written to AtomSite
374         * @return
375         */
376        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId, int atomId) {
377
378                /*
379                ATOM 7    C CD  . GLU A 1 24  ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24  GLU A CD  1
380                ATOM 8    O OE1 . GLU A 1 24  ? -9.659  14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24  GLU A OE1 1
381                ATOM 9    O OE2 . GLU A 1 24  ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24  GLU A OE2 1
382                ATOM 10   N N   . LEU A 1 25  ? -5.907  18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25  LEU A N   1
383                ATOM 11   C CA  . LEU A 1 25  ? -5.168  19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25  LEU A CA  1
384                */
385
386                Group g = a.getGroup();
387
388                String record ;
389                if ( g.getType().equals(GroupType.HETATM) ) {
390                        record = "HETATM";
391                } else {
392                        record = "ATOM";
393                }
394
395                String entityId = "0";
396                String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum());
397                if (g.getChain()!=null && g.getChain().getCompound()!=null) {
398                        entityId = Integer.toString(g.getChain().getCompound().getMolId());
399                        labelSeqId = Integer.toString(g.getChain().getCompound().getAlignedResIndex(g, g.getChain()));
400                }
401
402                Character  altLoc = a.getAltLoc()           ;
403                String altLocStr;
404                if (altLoc==null || altLoc == ' ') {
405                        altLocStr = MMCIF_DEFAULT_VALUE;
406                } else {
407                        altLocStr = altLoc.toString();
408                }
409
410                Element e = a.getElement();
411                String eString = e.toString().toUpperCase();
412                if ( e.equals(Element.R)) {
413                        eString = "X";
414                }
415
416                String insCode = MMCIF_MISSING_VALUE;
417                if (g.getResidueNumber().getInsCode()!=null ) {
418                        insCode = Character.toString(g.getResidueNumber().getInsCode());
419                }
420
421                AtomSite atomSite = new AtomSite();
422                atomSite.setGroup_PDB(record);
423                atomSite.setId(Integer.toString(atomId));
424                atomSite.setType_symbol(eString);
425                atomSite.setLabel_atom_id(a.getName());
426                atomSite.setLabel_alt_id(altLocStr);
427                atomSite.setLabel_comp_id(g.getPDBName());
428                atomSite.setLabel_asym_id(internalChainId);
429                atomSite.setLabel_entity_id(entityId);
430                atomSite.setLabel_seq_id(labelSeqId);
431                atomSite.setPdbx_PDB_ins_code(insCode);
432                atomSite.setCartn_x(FileConvert.d3.format(a.getX()));
433                atomSite.setCartn_y(FileConvert.d3.format(a.getY()));
434                atomSite.setCartn_z(FileConvert.d3.format(a.getZ()));
435                atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy()));
436                atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor()));
437                atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum()));
438                atomSite.setAuth_comp_id(g.getPDBName());
439                atomSite.setAuth_asym_id(chainId);
440                atomSite.setAuth_atom_id(a.getName());
441                atomSite.setPdbx_PDB_model_num(Integer.toString(model));
442
443                return atomSite;
444        }
445
446        /**
447         * Converts a Group into a List of {@link AtomSite} objects
448         * @param g
449         * @param model
450         * @param chainId
451         * @param internalChainId
452         * @return
453         */
454        private static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainId, String internalChainId) {
455
456                List<AtomSite> list = new ArrayList<AtomSite>();
457
458                int groupsize  = g.size();
459
460                for ( int atompos = 0 ; atompos < groupsize; atompos++) {
461                        Atom a = null ;
462
463                        a = g.getAtom(atompos);
464                        if ( a == null)
465                                continue ;
466
467                        list.add(convertAtomToAtomSite(a, model, chainId, internalChainId));
468
469                }
470                if ( g.hasAltLoc()){
471                        for (Group alt : g.getAltLocs() ) {
472                                list.addAll(convertGroupToAtomSites(alt, model, chainId, internalChainId));
473                        }
474                }
475                return list;
476        }
477
478        /**
479         * Converts a Chain into a List of {@link AtomSite} objects
480         * @param c
481         * @param model
482         * @param chainId
483         * @param internalChainId
484         * @return
485         */
486        public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String chainId, String internalChainId) {
487
488                List<AtomSite> list = new ArrayList<AtomSite>();
489
490                if (c.getCompound()==null) {
491                        logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getChainID());
492                }
493
494                for ( int h=0; h<c.getAtomLength();h++){
495
496                        Group g= c.getAtomGroup(h);
497
498                        list.addAll(convertGroupToAtomSites(g, model, chainId, internalChainId));
499
500                }
501
502                return list;
503        }
504
505        /**
506         * Converts a Structure into a List of {@link AtomSite} objects
507         * @param s
508         * @return
509         */
510        public static List<AtomSite> convertStructureToAtomSites(Structure s) {
511                List<AtomSite> list = new ArrayList<AtomSite>();
512
513                for (int m=0;m<s.nrModels();m++) {
514                        for (Chain c:s.getChains(m)) {
515                                list.addAll(convertChainToAtomSites(c, m+1, c.getChainID(), c.getInternalChainID()));
516                        }
517                }
518                return list;
519        }
520
521        /**
522         * Finds the max length of each of the String values contained in each of the fields of the given list of beans.
523         * Useful for producing mmCIF loop data that is aligned for all columns.
524         * @param list list of objects. All objects should have the same class.
525         * @param fields Set of fields for the record. If null, will be calculated from the class of the first record
526         * @return
527         * @see #toMMCIF(List, Class)
528         */
529        private static <T> int[] getFieldSizes(List<T> list, Field[] fields) {
530
531                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
532
533                if(fields == null)
534                        fields = getFields(list.get(0).getClass());
535
536                int[] sizes = new int [fields.length];
537
538
539                for (T a:list) {
540                        int i = -1;
541                        for (Field f : fields) {
542                                i++;
543
544                                f.setAccessible(true);
545
546                                try {
547                                        Object obj = f.get(a);
548                                        int length;
549                                        if (obj==null) {
550                                                length = MMCIF_MISSING_VALUE.length();
551                                        } else {
552                                                String val = (String) obj;
553                                                length = addMmCifQuoting(val).length();
554                                        }
555
556                                        if (length>sizes[i]) sizes[i] = length;
557
558                                } catch (IllegalAccessException e) {
559                                        logger.warn("Field {} is inaccessible", f.getName());
560                                        continue;
561                                } catch (ClassCastException e) {
562                                        logger.warn("Could not cast value to String for field {}",f.getName());
563                                        continue;
564                                }
565                        }
566                }
567                return sizes;
568        }
569
570        /**
571         * Finds the max length of a list of strings
572         * Useful for producing mmCIF single-record data that is aligned for all values.
573         * @param names
574         * @return
575         * @see #toMMCIF(String, Object)
576         */
577        private static int getMaxStringLength(String[] names) {
578                int size = 0;
579                for(String s : names) {
580                        if(s.length()>size) {
581                                size = s.length();
582                        }
583                }
584                return size;
585        }
586}