001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.io.mmcif;
022
023
024import java.lang.reflect.Field;
025import java.util.*;
026
027import org.biojava.nbio.structure.Atom;
028import org.biojava.nbio.structure.Chain;
029import org.biojava.nbio.structure.Element;
030import org.biojava.nbio.structure.EntityType;
031import org.biojava.nbio.structure.Group;
032import org.biojava.nbio.structure.GroupType;
033import org.biojava.nbio.structure.Structure;
034import org.biojava.nbio.structure.io.FileConvert;
035import org.biojava.nbio.structure.io.mmcif.model.AbstractBean;
036import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
037import org.biojava.nbio.structure.io.mmcif.model.CIFLabel;
038import org.biojava.nbio.structure.io.mmcif.model.Cell;
039import org.biojava.nbio.structure.io.mmcif.model.IgnoreField;
040import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
041import org.biojava.nbio.structure.xtal.CrystalCell;
042import org.biojava.nbio.structure.xtal.SpaceGroup;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046/**
047 * Some tools for mmCIF file writing.
048 *
049 * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf
050 *
051 * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}.
052 * By default, all fields from the bean are taken as the CIF labels. Fields
053 * may be omitted by annotating them as {@link IgnoreField @IgnoreField}.
054 * The CIF label for a field may be changed (for instance, for fields that
055 * are not valid Java identifiers) by defining a function
056 * <tt>static Map<String,String> getCIFLabelMap()</tt>
057 * mapping from the field's name to the correct label.
058 *
059 * @author Jose Duarte
060 * @author Spencer Bliven
061 */
062public class MMCIFFileTools {
063
064        private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class);
065
066        private static final String newline = System.getProperty("line.separator");
067
068        /**
069         * The character to be printed out in cases where a value is not assigned in mmCIF files
070         */
071        public static final String MMCIF_MISSING_VALUE = "?";
072
073        /**
074         * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs
075         */
076        public static final String MMCIF_DEFAULT_VALUE = ".";
077
078
079        /**
080         * Produces a mmCIF loop header string for the given categoryName and className.
081         * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package
082         * @param categoryName
083         * @param className
084         * @return
085         * @throws ClassNotFoundException if the given className can not be found
086         */
087        public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException {
088                StringBuilder str = new StringBuilder();
089
090                str.append(SimpleMMcifParser.LOOP_START+newline);
091
092                Class<?> c = Class.forName(className);
093
094                for (Field f : getFields(c)) {
095                        str.append(categoryName+"."+f.getName()+newline);
096                }
097
098                return str.toString();
099        }
100
101        /**
102         * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to
103         * a String representing it in mmCIF (single-record) format.
104         * @param categoryName
105         * @param o
106         * @return
107         */
108        public static String toMMCIF(String categoryName, Object o) {
109
110                StringBuilder sb = new StringBuilder();
111
112                Class<?> c = o.getClass();
113
114
115                Field[] fields = getFields(c);
116                String[] names = getFieldNames(fields);
117
118                int maxFieldNameLength = getMaxStringLength(names);
119
120                for (int i=0;i<fields.length;i++) {
121                        Field f = fields[i];
122                        String name = names[i];
123
124                        sb.append(categoryName).append(".").append(name);
125
126                        int spacing = maxFieldNameLength - name.length() + 3;
127
128                        try {
129                                Object obj = f.get(o);
130                                String val;
131                                if (obj==null) {
132                                        logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE);
133                                        val = MMCIF_MISSING_VALUE;
134                                } else {
135                                        val = (String) obj;
136                                }
137                                for (int j=0;j<spacing;j++) sb.append(' ');
138                                sb.append(addMmCifQuoting(val));
139                                sb.append(newline);
140
141                        } catch (IllegalAccessException e) {
142                                logger.warn("Field {} is inaccessible", name);
143                                continue;
144                        } catch (ClassCastException e) {
145                                logger.warn("Could not cast value to String for field {}",name);
146                                continue;
147                        }
148
149                }
150
151                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
152
153                return sb.toString();
154        }
155
156        /**
157         * Gets all fields for a particular class, filtering fields annotated
158         * with {@link IgnoreField @IgnoreField}.
159         *
160         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
161         * on all fields.
162         * @param c
163         * @return
164         */
165        public static Field[] getFields(Class<?> c) {
166                Field[] allFields = c.getDeclaredFields();
167                Field[] fields = new Field[allFields.length];
168                int n = 0;
169                for(Field f : allFields) {
170                        f.setAccessible(true);
171                        IgnoreField anno = f.getAnnotation(IgnoreField.class);
172                        if(anno == null) {
173                                fields[n] = f;
174                                n++;
175                        }
176                }
177                return Arrays.copyOf(fields, n);
178        }
179
180        /**
181         * Gets the mmCIF record name for each field. This is generally just
182         * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation.
183         *
184         * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)}
185         * on all fields.
186         * @param fields
187         * @return
188         */
189        public static String[] getFieldNames(Field[] fields) {
190                String[] names = new String[fields.length];
191                for(int i=0;i<fields.length;i++) {
192                        Field f = fields[i];
193                        f.setAccessible(true);
194                        String rawName = fields[i].getName();
195                        CIFLabel cifLabel = f.getAnnotation(CIFLabel.class);
196                        if(cifLabel != null) {
197                                names[i] = cifLabel.label();
198                        } else {
199                                names[i] = rawName;
200                        }
201                }
202                return names;
203        }
204
205        /**
206         * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to
207         * a String representing them in mmCIF loop format with one record per line.
208         * @param list
209         * @return
210         */
211        public static <T> String toMMCIF(List<T> list, Class<T> klass) {
212                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
213
214                Field[] fields = getFields(klass);
215                int[] sizes = getFieldSizes(list,fields);
216
217                StringBuilder sb = new StringBuilder();
218
219                for (T o:list) {
220                        sb.append(toSingleLoopLineMmCifString(o, fields, sizes));
221                }
222
223                sb.append(SimpleMMcifParser.COMMENT_CHAR+newline);
224
225                return sb.toString();
226        }
227
228        /**
229         * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line
230         * @param record
231         * @param fields Set of fields for the record. If null, will be calculated from the class of the record
232         * @param sizes the size of each of the fields
233         * @return
234         */
235        private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) {
236
237                StringBuilder str = new StringBuilder();
238
239                Class<?> c = record.getClass();
240
241                if(fields == null)
242                        fields = getFields(c);
243
244                if (sizes.length!=fields.length)
245                        throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields");
246
247                int i = -1;
248                for (Field f : fields) {
249                        i++;
250                        f.setAccessible(true);
251
252                        try {
253                                Object obj = f.get(record);
254                                String val;
255                                if (obj==null) {
256                                        logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE);
257                                        val = MMCIF_MISSING_VALUE;
258                                } else {
259                                        val = (String) obj;
260                                }
261
262                                str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val)));
263
264
265                        } catch (IllegalAccessException e) {
266                                logger.warn("Field {} is inaccessible", f.getName());
267                                continue;
268                        } catch (ClassCastException e) {
269                                logger.warn("Could not cast value to String for field {}",f.getName());
270                                continue;
271                        }
272                }
273
274                str.append(newline);
275
276                return str.toString();
277
278        }
279
280        /**
281         * Adds quoting to a String according to the STAR format (mmCIF) rules
282         * @param val
283         * @return
284         */
285        private static String addMmCifQuoting(String val) {
286                String newval;
287
288                if (val.contains("'")) {
289                        // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does)
290                        newval = "\""+val+"\"";
291                } else if (val.contains(" ")) {
292                        // single quoting for stings containing spaces
293                        newval = "'"+val+"'";
294                } else {
295                        if (val.contains(" ") && val.contains("'")) {
296                                // TODO deal with this case
297                                logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val);
298                        }
299                        newval = val;
300                }
301                // TODO deal with all the other cases: e.g. multi-line quoting with ;;
302
303                return newval;
304        }
305
306        /**
307         * Converts a SpaceGroup object to a {@link Symmetry} object.
308         * @param sg
309         * @return
310         */
311        public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) {
312                Symmetry sym = new Symmetry();
313                sym.setSpace_group_name_H_M(sg.getShortSymbol());
314                // TODO do we need to fill any of the other values?
315                return sym;
316        }
317
318        /**
319         * Converts a CrystalCell object to a {@link Cell} object.
320         * @param c
321         * @return
322         */
323        public static Cell convertCrystalCellToCell(CrystalCell c) {
324                Cell cell = new Cell();
325                cell.setLength_a(String.format("%.3f",c.getA()));
326                cell.setLength_b(String.format("%.3f",c.getB()));
327                cell.setLength_c(String.format("%.3f",c.getC()));
328                cell.setAngle_alpha(String.format("%.3f",c.getAlpha()));
329                cell.setAngle_beta(String.format("%.3f",c.getBeta()));
330                cell.setAngle_gamma(String.format("%.3f",c.getGamma()));
331
332                return cell;
333        }
334
335        /**
336         * Converts an Atom object to an {@link AtomSite} object.
337         * @param a
338         * @param model the model number for the output AtomSites
339         * @param chainName the chain identifier (author id) for the output AtomSites
340         * @param chainId the internal chain identifier (asym id) for the output AtomSites
341         * @return
342         */
343        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId) {
344                return convertAtomToAtomSite(a, model, chainName, chainId, a.getPDBserial());
345        }
346
347        /**
348         * Converts an Atom object to an {@link AtomSite} object.
349         * @param a the atom
350         * @param model the model number for the output AtomSites
351         * @param chainName the chain identifier (author id) for the output AtomSites
352         * @param chainId the internal chain identifier (asym id) for the output AtomSites
353         * @param atomId the atom id to be written to AtomSite
354         * @return
355         */
356        public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainName, String chainId, int atomId) {
357
358                /*
359                ATOM 7    C CD  . GLU A 1 24  ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24  GLU A CD  1
360                ATOM 8    O OE1 . GLU A 1 24  ? -9.659  14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24  GLU A OE1 1
361                ATOM 9    O OE2 . GLU A 1 24  ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24  GLU A OE2 1
362                ATOM 10   N N   . LEU A 1 25  ? -5.907  18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25  LEU A N   1
363                ATOM 11   C CA  . LEU A 1 25  ? -5.168  19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25  LEU A CA  1
364                */
365
366                Group g = a.getGroup();
367
368                String record ;
369                if ( g.getType().equals(GroupType.HETATM) ) {
370                        record = "HETATM";
371                } else {
372                        record = "ATOM";
373                }
374
375                String entityId = "0";
376                String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum());
377                if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) {
378                        entityId = Integer.toString(g.getChain().getEntityInfo().getMolId());
379                        if (g.getChain().getEntityInfo().getType() == EntityType.POLYMER) {
380                                // this only makes sense for polymeric chains, non-polymer chains will never have seqres groups and there's no point in calling getAlignedResIndex
381                                labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain()));
382                        }
383                }
384
385                Character  altLoc = a.getAltLoc();
386                String altLocStr;
387                if (altLoc==null || altLoc == ' ') {
388                        altLocStr = MMCIF_DEFAULT_VALUE;
389                } else {
390                        altLocStr = altLoc.toString();
391                }
392
393                Element e = a.getElement();
394                String eString = e.toString().toUpperCase();
395                if ( e.equals(Element.R)) {
396                        eString = "X";
397                }
398
399                String insCode = MMCIF_MISSING_VALUE;
400                if (g.getResidueNumber().getInsCode()!=null ) {
401                        insCode = Character.toString(g.getResidueNumber().getInsCode());
402                }
403
404                AtomSite atomSite = new AtomSite();
405                atomSite.setGroup_PDB(record);
406                atomSite.setId(Integer.toString(atomId));
407                atomSite.setType_symbol(eString);
408                atomSite.setLabel_atom_id(a.getName());
409                atomSite.setLabel_alt_id(altLocStr);
410                atomSite.setLabel_comp_id(g.getPDBName());
411                atomSite.setLabel_asym_id(chainId);
412                atomSite.setLabel_entity_id(entityId);
413                atomSite.setLabel_seq_id(labelSeqId);
414                atomSite.setPdbx_PDB_ins_code(insCode);
415                atomSite.setCartn_x(FileConvert.d3.format(a.getX()));
416                atomSite.setCartn_y(FileConvert.d3.format(a.getY()));
417                atomSite.setCartn_z(FileConvert.d3.format(a.getZ()));
418                atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy()));
419                atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor()));
420                atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum()));
421                atomSite.setAuth_comp_id(g.getPDBName());
422                atomSite.setAuth_asym_id(chainName);
423                atomSite.setAuth_atom_id(a.getName());
424                atomSite.setPdbx_PDB_model_num(Integer.toString(model));
425
426                return atomSite;
427        }
428
429        /**
430         * Converts a Group into a List of {@link AtomSite} objects.
431         * Atoms in other altloc groups (different from the main group) are also included, removing possible duplicates
432         * via using the atom identifier to assess uniqueness.
433         * @param g the group
434         * @param model the model number for the output AtomSites
435         * @param chainName the chain identifier (author id) for the output AtomSites
436         * @param chainId the internal chain identifier (asym id) for the output AtomSites
437         * @return
438         */
439        public static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainName, String chainId) {
440
441                // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have
442                // all atoms (see StructureTools#cleanUpAltLocs)
443                // Thus we have to remove duplicates here by using the atom id
444                // See issue https://github.com/biojava/biojava/issues/778 and TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs
445                Map<Integer, AtomSite> uniqueAtomSites = new LinkedHashMap<>();
446
447                int groupsize  = g.size();
448
449                for ( int atompos = 0 ; atompos < groupsize; atompos++) {
450                        Atom a = g.getAtom(atompos);
451                        if ( a == null)
452                                continue ;
453
454                        uniqueAtomSites.put(a.getPDBserial(), convertAtomToAtomSite(a, model, chainName, chainId));
455                }
456
457                if ( g.hasAltLoc()){
458                        for (Group alt : g.getAltLocs() ) {
459                                for (AtomSite atomSite : convertGroupToAtomSites(alt, model, chainName, chainId)) {
460                                        uniqueAtomSites.put(Integer.parseInt(atomSite.getId()), atomSite);
461                                }
462                        }
463                }
464                return new ArrayList<>(uniqueAtomSites.values());
465        }
466
467        /**
468         * Converts a Chain into a List of {@link AtomSite} objects
469         * @param c the chain
470         * @param model the model number for the output AtomSites
471         * @param chainName the chain identifier (author id) for the output AtomSites
472         * @param chainId the internal chain identifier (asym id) for the output AtomSites
473         * @return
474         */
475        public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String chainName, String chainId) {
476
477                List<AtomSite> list = new ArrayList<>();
478
479                if (c.getEntityInfo()==null) {
480                        logger.warn("No entity found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName());
481                }
482
483                for ( int h=0; h<c.getAtomLength();h++){
484
485                        Group g= c.getAtomGroup(h);
486
487                        list.addAll(convertGroupToAtomSites(g, model, chainName, chainId));
488
489                }
490
491                return list;
492        }
493
494        /**
495         * Converts a Structure into a List of {@link AtomSite} objects
496         * @param s
497         * @return
498         */
499        public static List<AtomSite> convertStructureToAtomSites(Structure s) {
500                List<AtomSite> list = new ArrayList<AtomSite>();
501
502                for (int m=0;m<s.nrModels();m++) {
503                        for (Chain c:s.getChains(m)) {
504                                list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId()));
505                        }
506                }
507                return list;
508        }
509
510        /**
511         * Finds the max length of each of the String values contained in each of the fields of the given list of beans.
512         * Useful for producing mmCIF loop data that is aligned for all columns.
513         * @param list list of objects. All objects should have the same class.
514         * @param fields Set of fields for the record. If null, will be calculated from the class of the first record
515         * @return
516         * @see #toMMCIF(List, Class)
517         */
518        private static <T> int[] getFieldSizes(List<T> list, Field[] fields) {
519
520                if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!");
521
522                if(fields == null)
523                        fields = getFields(list.get(0).getClass());
524
525                int[] sizes = new int [fields.length];
526
527
528                for (T a:list) {
529                        int i = -1;
530                        for (Field f : fields) {
531                                i++;
532
533                                f.setAccessible(true);
534
535                                try {
536                                        Object obj = f.get(a);
537                                        int length;
538                                        if (obj==null) {
539                                                length = MMCIF_MISSING_VALUE.length();
540                                        } else {
541                                                String val = (String) obj;
542                                                length = addMmCifQuoting(val).length();
543                                        }
544
545                                        if (length>sizes[i]) sizes[i] = length;
546
547                                } catch (IllegalAccessException e) {
548                                        logger.warn("Field {} is inaccessible", f.getName());
549                                        continue;
550                                } catch (ClassCastException e) {
551                                        logger.warn("Could not cast value to String for field {}",f.getName());
552                                        continue;
553                                }
554                        }
555                }
556                return sizes;
557        }
558
559        /**
560         * Finds the max length of a list of strings
561         * Useful for producing mmCIF single-record data that is aligned for all values.
562         * @param names
563         * @return
564         * @see #toMMCIF(String, Object)
565         */
566        private static int getMaxStringLength(String[] names) {
567                int size = 0;
568                for(String s : names) {
569                        if(s.length()>size) {
570                                size = s.length();
571                        }
572                }
573                return size;
574        }
575}