Source code

001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jan 4, 2006
021 *
022 */
023package org.biojava.nbio.structure;
024
025import java.io.File;
026import java.io.FileInputStream;
027import java.io.IOException;
028import java.io.InputStream;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.Collection;
032import java.util.Collections;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.LinkedHashSet;
037import java.util.List;
038import java.util.Map;
039import java.util.Set;
040
041import javax.vecmath.Point3d;
042
043import org.biojava.nbio.structure.align.util.AtomCache;
044import org.biojava.nbio.structure.contact.AtomContactSet;
045import org.biojava.nbio.structure.contact.Grid;
046import org.biojava.nbio.structure.io.FileParsingParameters;
047import org.biojava.nbio.structure.io.PDBFileParser;
048import org.biojava.nbio.core.util.FileDownloadUtils;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052/**
053 * A class that provides some tool methods.
054 *
055 * @author Andreas Prlic
056 * @author Jules Jacobsen
057 * @since 1.0
058 */
059public class StructureTools {
060
061        private static final Logger logger = LoggerFactory.getLogger(StructureTools.class);
062
063        // Amino Acid backbone
064        /**
065         * The atom name of the backbone C-alpha atom. Note that this can be
066         * ambiguous depending on the context since Calcium atoms use the same name
067         * in PDB.
068         */
069        public static final String CA_ATOM_NAME = "CA";
070
071        /**
072         * The atom name for the backbone amide nitrogen
073         */
074        public static final String N_ATOM_NAME = "N";
075
076        /**
077         * The atom name for the backbone carbonyl
078         */
079        public static final String C_ATOM_NAME = "C";
080
081        /**
082         * The atom name for the backbone carbonyl oxygen
083         */
084        public static final String O_ATOM_NAME = "O";
085
086        /**
087         * The atom name of the side-chain C-beta atom
088         */
089        public static final String CB_ATOM_NAME = "CB";
090
091        // Nucleotide backbone
092        /**
093         * The atom name of the backbone C1' in RNA
094         */
095        public static final String C1_ATOM_NAME = "C1'";
096        /**
097         * The atom name of the backbone C2' in RNA
098         */
099        public static final String C2_ATOM_NAME = "C2'";
100        /**
101         * The atom name of the backbone C3' in RNA
102         */
103        public static final String C3_ATOM_NAME = "C3'";
104        /**
105         * The atom name of the backbone C4' in RNA
106         */
107        public static final String C4_ATOM_NAME = "C4'";
108        /**
109         * The atom name of the backbone O2' in RNA
110         */
111        public static final String O2_ATOM_NAME = "O2'";
112        /**
113         * The atom name of the backbone O3' in RNA
114         */
115        public static final String O3_ATOM_NAME = "O3'";
116        /**
117         * The atom name of the backbone O4' in RNA
118         */
119        public static final String O4_ATOM_NAME = "O4'";
120        /**
121         * The atom name of the backbone O4' in RNA
122         */
123        public static final String O5_ATOM_NAME = "O5'";
124        /**
125         * The atom name of the backbone O4' in RNA
126         */
127        public static final String OP1_ATOM_NAME = "OP1";
128        /**
129         * The atom name of the backbone O4' in RNA
130         */
131        public static final String OP2_ATOM_NAME = "OP2";
132        /**
133         * The atom name of the backbone phosphate in RNA
134         */
135        public static final String P_ATOM_NAME = "P";
136
137        /**
138         * The atom used as representative for nucleotides, equivalent to
139         * {@link #CA_ATOM_NAME} for proteins
140         */
141        public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME;
142
143        /**
144         * The character to use for unknown compounds in sequence strings
145         */
146        public static final char UNKNOWN_GROUP_LABEL = 'X';
147
148        /**
149         * Below this ratio of aminoacid/nucleotide residues to the sequence total,
150         * we use simple majority of aminoacid/nucleotide residues to decide the
151         * character of the chain (protein/nucleotide)
152         */
153        public static final double RATIO_RESIDUES_TO_TOTAL = 0.95;
154
155        /**
156         * Threshold for plausible binding of a ligand to the selected substructure
157         */
158        public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5;
159
160        // there is a file format change in PDB 3.0 and nucleotides are being
161        // renamed
162        private static final Map<String, Character> nucleotides30;
163        private static final Map<String, Character> nucleotides23;
164
165        // amino acid 3 and 1 letter code definitions
166        private static final Map<String, Character> aminoAcids;
167
168        private static final Set<Element> hBondDonorAcceptors;
169
170        private static final Set<String> NUCLEOTIDE_BACKBONE_ATOMS;
171        private static final Set<String> AMINOACID_BACKBONE_ATOMS;
172
173        static {
174                nucleotides30 = new HashMap<>();
175                nucleotides30.put("DA", 'A');
176                nucleotides30.put("DC", 'C');
177                nucleotides30.put("DG", 'G');
178                nucleotides30.put("DT", 'T');
179                nucleotides30.put("DI", 'I');
180                nucleotides30.put("A", 'A');
181                nucleotides30.put("G", 'G');
182                nucleotides30.put("C", 'C');
183                nucleotides30.put("U", 'U');
184                nucleotides30.put("I", 'I');
185
186                // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced
187                // with these:
188                nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
189                nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl
190                nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
191                nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3'
192                // terminator)
193                nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant
194                nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator
195                nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic
196                // sugar
197                nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine
198                nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine
199                nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide
200                // nucleic acid, with
201                // added methyl
202                nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide
203                // nucleic acid, with
204                // added methyl
205                nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide
206                // nucleic acid
207                nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide
208                // nucleic acid
209                nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide
210                // nucleic acid
211                nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide
212                // nucleic acid
213                nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant
214
215                // store nucleic acids (C, G, A, T, U, and I), and
216                // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I),
217                // and
218                nucleotides23 = new HashMap<>();
219                String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A",
220                                "+T", "+U", "+I" };
221                for (String n : names) {
222                        nucleotides23.put(n, n.charAt(n.length() - 1));
223                }
224
225                aminoAcids = new HashMap<>();
226                aminoAcids.put("GLY", 'G');
227                aminoAcids.put("ALA", 'A');
228                aminoAcids.put("VAL", 'V');
229                aminoAcids.put("LEU", 'L');
230                aminoAcids.put("ILE", 'I');
231                aminoAcids.put("PHE", 'F');
232                aminoAcids.put("TYR", 'Y');
233                aminoAcids.put("TRP", 'W');
234                aminoAcids.put("PRO", 'P');
235                aminoAcids.put("HIS", 'H');
236                aminoAcids.put("LYS", 'K');
237                aminoAcids.put("ARG", 'R');
238                aminoAcids.put("SER", 'S');
239                aminoAcids.put("THR", 'T');
240                aminoAcids.put("GLU", 'E');
241                aminoAcids.put("GLN", 'Q');
242                aminoAcids.put("ASP", 'D');
243                aminoAcids.put("ASN", 'N');
244                aminoAcids.put("CYS", 'C');
245                aminoAcids.put("MET", 'M');
246                // MSE is only found as a molecular replacement for MET
247                aminoAcids.put("MSE", 'M');
248                // 'non-standard', genetically encoded
249                // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
250                // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE'
251                // likewise 'PYL' (IUBMB) and 'PYH' (PDB)
252                aminoAcids.put("CSE", 'U');
253                aminoAcids.put("SEC", 'U');
254                aminoAcids.put("PYH", 'O');
255                aminoAcids.put("PYL", 'O');
256
257                hBondDonorAcceptors = new HashSet<>();
258                hBondDonorAcceptors.add(Element.N);
259                hBondDonorAcceptors.add(Element.O);
260                hBondDonorAcceptors.add(Element.S);
261
262                NUCLEOTIDE_BACKBONE_ATOMS = new HashSet<>(Arrays.asList(C1_ATOM_NAME, C2_ATOM_NAME, C3_ATOM_NAME, C4_ATOM_NAME, O2_ATOM_NAME, O3_ATOM_NAME, O4_ATOM_NAME, O5_ATOM_NAME, OP1_ATOM_NAME, OP2_ATOM_NAME, P_ATOM_NAME));
263                AMINOACID_BACKBONE_ATOMS = new HashSet<>(Arrays.asList(CA_ATOM_NAME, C_ATOM_NAME, N_ATOM_NAME, O_ATOM_NAME));
264
265        }
266
267        /**
268         * Count how many Atoms are contained within a Structure object.
269         *
270         * @param s
271         *            the structure object
272         * @return the number of Atoms in this Structure
273         */
274        public static int getNrAtoms(Structure s) {
275
276                int nrAtoms = 0;
277
278                Iterator<Group> iter = new GroupIterator(s);
279
280                while (iter.hasNext()) {
281                        Group g = iter.next();
282                        nrAtoms += g.size();
283                }
284
285                return nrAtoms;
286        }
287
288        /**
289         * Count how many groups are contained within a structure object.
290         *
291         * @param s
292         *            the structure object
293         * @return the number of groups in the structure
294         */
295        public static int getNrGroups(Structure s) {
296                int nrGroups = 0;
297
298                List<Chain> chains = s.getChains(0);
299                for (Chain c : chains) {
300                        nrGroups += c.getAtomLength();
301                }
302                return nrGroups;
303        }
304
305        /**
306         * Returns an array of the requested Atoms from the Structure object.
307         * Iterates over all groups and checks if the requested atoms are in this
308         * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl}
309         * group. If the group does not contain all requested atoms then no atoms
310         * are added for that group. For structures with more than one model, only
311         * model 0 will be used.
312         *
313         * @param s
314         *            the structure to get the atoms from
315         *
316         * @param atomNames
317         *            contains the atom names to be used.
318         * @return an Atom[] array
319         */
320        public static Atom[] getAtomArray(Structure s, String[] atomNames) {
321                List<Chain> chains = s.getModel(0);
322
323                List<Atom> atoms = new ArrayList<>();
324
325                extractAtoms(atomNames, chains, atoms);
326                return atoms.toArray(new Atom[0]);
327
328        }
329
330        /**
331         * Returns an array of the requested Atoms from the Structure object. In
332         * contrast to {@link #getAtomArray(Structure, String[])} this method
333         * iterates over all chains. Iterates over all chains and groups and checks
334         * if the requested atoms are in this group, no matter if this is a
335         * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not
336         * contain all requested atoms then no atoms are added for that group. For
337         * structures with more than one model, only model 0 will be used.
338         *
339         * @param s
340         *            the structure to get the atoms from
341         *
342         * @param atomNames
343         *            contains the atom names to be used.
344         * @return an Atom[] array
345         */
346        public static Atom[] getAtomArrayAllModels(Structure s,
347                        String[] atomNames) {
348
349                List<Atom> atoms = new ArrayList<>();
350
351                for (int i = 0; i < s.nrModels(); i++) {
352                        List<Chain> chains = s.getModel(i);
353                        extractAtoms(atomNames, chains, atoms);
354                }
355                return atoms.toArray(new Atom[0]);
356
357        }
358
359        /**
360         * Convert all atoms of the structure (all models) into an Atom array
361         *
362         * @param s
363         *            input structure
364         * @return all atom array
365         */
366        public static Atom[] getAllAtomArray(Structure s) {
367                List<Atom> atoms = new ArrayList<>();
368
369                AtomIterator iter = new AtomIterator(s);
370                while (iter.hasNext()) {
371                        Atom a = iter.next();
372                        atoms.add(a);
373                }
374                return atoms.toArray(new Atom[0]);
375        }
376        /**
377         * Convert all atoms of the structure (specified model) into an Atom array
378         *
379         * @param s
380         *            input structure
381         * @return all atom array
382         */
383        public static Atom[] getAllAtomArray(Structure s, int model) {
384                List<Atom> atoms = new ArrayList<>();
385
386                AtomIterator iter = new AtomIterator(s,model);
387                while (iter.hasNext()) {
388                        Atom a = iter.next();
389                        atoms.add(a);
390                }
391                return atoms.toArray(new Atom[0]);
392
393        }
394
395        /**
396         * Returns and array of all atoms of the chain, including
397         * Hydrogens (if present) and all HETATOMs. Waters are not included.
398         *
399         * @param c
400         *            input chain
401         * @return all atom array
402         */
403        public static Atom[] getAllAtomArray(Chain c) {
404                List<Atom> atoms = new ArrayList<>();
405
406                for (Group g : c.getAtomGroups()) {
407                        if (g.isWater())
408                                continue;
409                        atoms.addAll(g.getAtoms());
410                }
411                return atoms.toArray(new Atom[0]);
412        }
413
414        /**
415         * List of groups from the structure not included in ca (e.g. ligands).
416         *
417         * Unaligned groups are searched from all chains referenced in ca, as well
418         * as any chains in the first model of the structure from ca[0], if any.
419         *
420         * @param ca an array of atoms
421         * @return
422         */
423        public static List<Group> getUnalignedGroups(Atom[] ca) {
424                Set<Chain> chains = new HashSet<>();
425                Set<Group> caGroups = new HashSet<>();
426
427                // Create list of all chains in this structure
428                Structure s = null;
429                if (ca.length > 0) {
430                        Group g = ca[0].getGroup();
431                        if (g != null) {
432                                Chain c = g.getChain();
433                                if (c != null) {
434                                        s = c.getStructure();
435                                }
436                        }
437                }
438                if (s != null) {
439                        // Add all chains from the structure
440                        chains.addAll(s.getChains(0));
441                }
442
443                // Add groups and chains from ca
444                for (Atom a : ca) {
445                        Group g = a.getGroup();
446                        if (g != null) {
447                                caGroups.add(g);
448
449                                Chain c = g.getChain();
450                                if (c != null) {
451                                        chains.add(c);
452                                }
453                        }
454                }
455
456                // Iterate through all chains, finding groups not in ca
457                List<Group> unadded = new ArrayList<>();
458                for (Chain c : chains) {
459                        for (Group g : c.getAtomGroups()) {
460                                if (!caGroups.contains(g)) {
461                                        unadded.add(g);
462                                }
463                        }
464                }
465                return unadded;
466        }
467
468        /**
469         * Finds all ligand groups from the target which fall within the cutoff distance
470         * of some atom from the query set.
471         *
472         * @param target Set of groups including the ligands
473         * @param query Atom selection
474         * @param cutoff Distance from query atoms to consider, in angstroms
475         * @return All groups from the target with at least one atom within cutoff of a query atom
476         * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF
477         */
478        public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) {
479                // Spatial hashing of the reduced structure
480                Grid grid = new Grid(cutoff);
481                grid.addAtoms(query);
482
483                List<Group> ligands = new ArrayList<>();
484                for(Group g :target ) {
485                        // don't worry about waters
486                        if(g.isWater()) {
487                                continue;
488                        }
489
490                        if(g.isPolymeric() ) {
491                                // Polymers aren't ligands
492                                continue;
493                        }
494
495                        // It is a ligand!
496
497                        // Check that it's within cutoff of something in reduced
498                        List<Atom> groupAtoms = g.getAtoms();
499                        if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) {
500                                continue;
501                        }
502
503                        ligands.add(g);
504                }
505                return ligands;
506        }
507
508        /**
509         * Adds a particular group to a structure. A new chain will be created if necessary.
510         *
511         * <p>When adding multiple groups, pass the return value of one call as the
512         * chainGuess parameter of the next call for efficiency.
513         * <pre>
514         * Chain guess = null;
515         * for(Group g : groups) {
516         *     guess = addGroupToStructure(s, g, guess );
517         * }
518         * </pre>
519         * @param s structure to receive the group
520         * @param g group to add
521         * @param chainGuess (optional) If not null, should be a chain from s. Used
522         *  to improve performance when adding many groups from the same chain
523         * @param clone Indicates whether the input group should be cloned before
524         *  being added to the new chain
525         * @return the chain g was added to
526         */
527        public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) {
528                // Find or create the chain
529                String chainId = g.getChainId();
530                assert !chainId.isEmpty();
531                Chain chain;
532                if (chainGuess != null && chainGuess.getId().equals(chainId)) {
533                        // previously guessed chain
534                        chain = chainGuess;
535                } else {
536                        // Try to guess
537                        chain = s.getChain(chainId, model);
538                        if (chain == null) {
539                                // no chain found
540                                chain = new ChainImpl();
541                                chain.setId(chainId);
542
543                                Chain oldChain = g.getChain();
544                                chain.setName(oldChain.getName());
545
546                                EntityInfo oldEntityInfo = oldChain.getEntityInfo();
547
548                                EntityInfo newEntityInfo;
549                                if (oldEntityInfo == null) {
550                                        newEntityInfo = new EntityInfo();
551                                        s.addEntityInfo(newEntityInfo);
552                                } else {
553                                        newEntityInfo = s.getEntityById(oldEntityInfo.getMolId());
554                                        if (newEntityInfo == null) {
555                                                newEntityInfo = new EntityInfo(oldEntityInfo);
556                                                s.addEntityInfo(newEntityInfo);
557                                        }
558                                }
559                                newEntityInfo.addChain(chain);
560                                chain.setEntityInfo(newEntityInfo);
561
562                                // TODO Do the seqres need to be cloned too? -SB 2016-10-7
563                                chain.setSeqResGroups(oldChain.getSeqResGroups());
564                                chain.setSeqMisMatches(oldChain.getSeqMisMatches());
565
566                                s.addChain(chain, model);
567                        }
568                }
569
570                // Add cloned group
571                if (clone) {
572                        g = (Group) g.clone();
573                }
574                chain.addGroup(g);
575
576                return chain;
577        }
578
579        /**
580         * Add a list of groups to a new structure. Chains will be automatically
581         * created in the new structure as needed.
582         * @param s structure to receive the group
583         * @param groups groups to add
584         * @param model model number
585         * @param clone Indicates whether the input groups should be cloned before
586         *  being added to the new chain
587         */
588        public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) {
589                Chain chainGuess = null;
590                for(Group g : groups) {
591                        chainGuess = addGroupToStructure(s, g, model, chainGuess, clone);
592                }
593        }
594
595        /**
596         * Expand a set of atoms into all groups from the same structure.
597         *
598         * If the structure is set, only the first atom is used (assuming all
599         * atoms come from the same original structure).
600         * If the atoms aren't linked to a structure (for instance, for cloned atoms),
601         * searches all chains of all atoms for groups.
602         * @param atoms Sample of atoms
603         * @return All groups from all chains accessible from the input atoms
604         */
605        public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) {
606                return getAllGroupsFromSubset(atoms,null);
607        }
608
609        /**
610         * Expand a set of atoms into all groups from the same structure.
611         *
612         * If the structure is set, only the first atom is used (assuming all
613         * atoms come from the same original structure).
614         * If the atoms aren't linked to a structure (for instance, for cloned atoms),
615         * searches all chains of all atoms for groups.
616         * @param atoms Sample of atoms
617         * @param types Type of groups to return (useful for getting only ligands, for instance).
618         *  Null gets all groups.
619         * @return All groups from all chains accessible from the input atoms
620         */
621        public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) {
622                // Get the full structure
623                Structure s = null;
624                if (atoms.length > 0) {
625                        Group g = atoms[0].getGroup();
626                        if (g != null) {
627                                Chain c = g.getChain();
628                                if (c != null) {
629                                        s = c.getStructure();
630                                }
631                        }
632                }
633                // Collect all groups from the structure
634                Set<Chain> allChains = new HashSet<>();
635                if( s != null ) {
636                        allChains.addAll(s.getChains());
637                }
638                // In case the structure wasn't set, need to use ca chains too
639                for(Atom a : atoms) {
640                        Group g = a.getGroup();
641                        if(g != null) {
642                                Chain c = g.getChain();
643                                if( c != null ) {
644                                        allChains.add(c);
645                                }
646                        }
647                }
648
649                if(allChains.isEmpty() ) {
650                        return Collections.emptySet();
651                }
652
653                // Extract all ligand groups
654                Set<Group> full = new HashSet<>();
655                for(Chain c : allChains) {
656                        if(types == null) {
657                                full.addAll(c.getAtomGroups());
658                        } else {
659                                full.addAll(c.getAtomGroups(types));
660                        }
661                }
662
663                return full;
664        }
665
666
667        /**
668         * Returns and array of all non-Hydrogen atoms in the given Structure,
669         * optionally including HET atoms or not. Waters are not included.
670         *
671         * @param s
672         * @param hetAtoms
673         *            if true HET atoms are included in array, if false they are not
674         * @return
675         */
676        public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) {
677                AtomIterator iter = new AtomIterator(s);
678                return getAllNonHAtomArray(s, hetAtoms, iter);
679        }
680        /**
681         * Returns and array of all non-Hydrogen atoms in the given Structure,
682         * optionally including HET atoms or not. Waters are not included.
683         *
684         * @param s
685         * @param hetAtoms
686         *            if true HET atoms are included in array, if false they are not
687         * @param modelNr Model number to draw atoms from
688         * @return
689         */
690        public static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) {
691                AtomIterator iter = new AtomIterator(s,modelNr);
692                return getAllNonHAtomArray(s, hetAtoms, iter);
693        }
694
695        private static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) {
696                List<Atom> atoms = new ArrayList<>();
697
698                while (iter.hasNext()) {
699                        Atom a = iter.next();
700                        if (a.getElement() == Element.H)
701                                continue;
702
703                        Group g = a.getGroup();
704
705                        if (g.isWater())
706                                continue;
707
708                        if (!hetAtoms && g.getType().equals(GroupType.HETATM))
709                                continue;
710
711                        atoms.add(a);
712                }
713                return atoms.toArray(new Atom[0]);
714        }
715
716        /**
717         * Returns and array of all non-Hydrogen atoms in the given Chain,
718         * optionally including HET atoms or not Waters are not included.
719         *
720         * @param c
721         * @param hetAtoms
722         *            if true HET atoms are included in array, if false they are not
723         * @return
724         */
725        public static Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) {
726                List<Atom> atoms = new ArrayList<>();
727
728                for (Group g : c.getAtomGroups()) {
729                        if (g.isWater())
730                                continue;
731                        for (Atom a : g.getAtoms()) {
732
733                                if (a.getElement() == Element.H)
734                                        continue;
735
736                                if (!hetAtoms && g.getType().equals(GroupType.HETATM))
737                                        continue;
738
739                                atoms.add(a);
740                        }
741                }
742                return atoms.toArray(new Atom[0]);
743        }
744
745        /**
746         * Returns and array of all non-Hydrogen atoms coordinates in the given Chain,
747         * optionally including HET atoms or not Waters are not included.
748         *
749         * @param c
750         * @param hetAtoms
751         *            if true HET atoms are included in array, if false they are not
752         * @return
753         */
754        public static Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) {
755                List<Point3d> atoms = new ArrayList<>();
756
757                for (Group g : c.getAtomGroups()) {
758                        if (g.isWater())
759                                continue;
760                        for (Atom a : g.getAtoms()) {
761
762                                if (a.getElement() == Element.H)
763                                        continue;
764
765                                if (!hetAtoms && g.getType().equals(GroupType.HETATM))
766                                        continue;
767
768                                atoms.add(a.getCoordsAsPoint3d());
769                        }
770                }
771                return atoms.toArray(new Point3d[0]);
772        }
773
774        /**
775         * Adds to the given atoms list, all atoms of groups that contained all
776         * requested atomNames, i.e. if a group does not contain all of the
777         * requested atom names, its atoms won't be added.
778         *
779         * @param atomNames
780         * @param chains
781         * @param atoms
782         */
783        private static void extractAtoms(String[] atomNames, List<Chain> chains, List<Atom> atoms) {
784
785                for (Chain c : chains) {
786
787                        for (Group g : c.getAtomGroups()) {
788
789                                // a temp container for the atoms of this group
790                                List<Atom> thisGroupAtoms = new ArrayList<>();
791                                // flag to check if this group contains all the requested atoms.
792                                boolean thisGroupAllAtoms = true;
793                                for (String atomName : atomNames) {
794                                        Atom a = g.getAtom(atomName);
795
796                                        if (a == null) {
797                                                // this group does not have a required atom, skip it...
798                                                thisGroupAllAtoms = false;
799                                                break;
800                                        }
801                                        thisGroupAtoms.add(a);
802                                }
803                                if (thisGroupAllAtoms) {
804                                        // add the atoms of this group to the array.
805                                        atoms.addAll(thisGroupAtoms);
806                                }
807                        }
808                }
809        }
810
811        /**
812         * Returns an array of the requested Atoms from the Chain object. Iterates
813         * over all groups and checks if the requested atoms are in this group, no
814         * matter if this is a AminoAcid or Hetatom group. If the group does not
815         * contain all requested atoms then no atoms are added for that group.
816         *
817         * @param c
818         *            the Chain to get the atoms from
819         *
820         * @param atomNames
821         *            contains the atom names to be used.
822         * @return an Atom[] array
823         */
824        public static Atom[] getAtomArray(Chain c, String[] atomNames) {
825
826                List<Atom> atoms = new ArrayList<>();
827
828                for (Group g : c.getAtomGroups()) {
829
830                        // a temp container for the atoms of this group
831                        List<Atom> thisGroupAtoms = new ArrayList<>();
832                        // flag to check if this group contains all the requested atoms.
833                        boolean thisGroupAllAtoms = true;
834                        for (String atomName : atomNames) {
835                                Atom a = g.getAtom(atomName);
836                                if (a == null) {
837                                        logger.debug("Group {} ({}) does not have the required atom '{}'", g.getResidueNumber(), g.getPDBName(), atomName);
838                                        // this group does not have a required atom, skip it...
839                                        thisGroupAllAtoms = false;
840                                        break;
841                                }
842                                thisGroupAtoms.add(a);
843                        }
844
845                        if (thisGroupAllAtoms) {
846                                // add the atoms of this group to the array.
847                                atoms.addAll(thisGroupAtoms);
848                        }
849
850                }
851                return atoms.toArray(new Atom[0]);
852
853        }
854
855        /**
856         * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and
857         * has CA name will be returned.
858         *
859         * @param c
860         *            the structure object
861         * @return an Atom[] array
862         * @see #getRepresentativeAtomArray(Chain)
863         */
864        public static Atom[] getAtomCAArray(Chain c) {
865                List<Atom> atoms = new ArrayList<>();
866
867                for (Group g : c.getAtomGroups()) {
868                        if (g.hasAtom(CA_ATOM_NAME)
869                                        && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
870                                atoms.add(g.getAtom(CA_ATOM_NAME));
871                        }
872                }
873
874                return atoms.toArray(new Atom[0]);
875        }
876
877        /**
878         * Gets a representative atom for each group that is part of the chain
879         * backbone. Note that modified aminoacids won't be returned as part of the
880         * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the
881         * structure.
882         *
883         * For amino acids, the representative is a CA carbon. For nucleotides, the
884         * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
885         * types will be ignored.
886         *
887         * @param c
888         * @return representative Atoms of the chain backbone
889         * @since Biojava 4.1.0
890         */
891        public static Atom[] getRepresentativeAtomArray(Chain c) {
892                List<Atom> atoms = new ArrayList<>();
893
894                for (Group g : c.getAtomGroups()) {
895
896                        switch (g.getType()) {
897                        case AMINOACID:
898                                if (g.hasAtom(CA_ATOM_NAME)
899                                                && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
900                                        atoms.add(g.getAtom(CA_ATOM_NAME));
901                                }
902                                break;
903                        case NUCLEOTIDE:
904                                if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) {
905                                        atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE));
906                                }
907                                break;
908                        default:
909                                // don't add
910                        }
911                }
912
913                return atoms.toArray(new Atom[0]);
914
915        }
916
917        /**
918         * Provides an equivalent copy of Atoms in a new array. Clones everything,
919         * starting with parent groups and chains. The chain will only contain
920         * groups that are part of the input array.
921         *
922         * @param ca
923         *            array of representative atoms, e.g. CA atoms
924         * @return Atom array
925         * @since Biojava 4.1.0
926         */
927        public static Atom[] cloneAtomArray(Atom[] ca) {
928                Atom[] newCA = new Atom[ca.length];
929
930                List<Chain> model = new ArrayList<>();
931                int apos = -1;
932                for (Atom a : ca) {
933                        apos++;
934                        Group parentG = a.getGroup();
935                        Chain parentC = parentG.getChain();
936
937                        Chain newChain = null;
938                        for (Chain c : model) {
939                                if (c.getName().equals(parentC.getName())) {
940                                        newChain = c;
941                                        break;
942                                }
943                        }
944                        if (newChain == null) {
945                                newChain = new ChainImpl();
946                                newChain.setId(parentC.getId());
947                                newChain.setName(parentC.getName());
948                                model.add(newChain);
949                        }
950
951                        Group parentN = (Group) parentG.clone();
952
953                        newCA[apos] = parentN.getAtom(a.getName());
954                        try {
955                                // if the group doesn't exist yet, this produces a StructureException
956                                newChain.getGroupByPDB(parentN.getResidueNumber());
957                        } catch (StructureException e) {
958                                // the group doesn't exist yet in the newChain, let's add it
959                                newChain.addGroup(parentN);
960                        }
961
962                }
963                return newCA;
964        }
965
966        /**
967         * Clone a set of representative Atoms, but returns the parent groups
968         *
969         * @param ca
970         *            Atom array
971         * @return Group array
972         */
973        public static Group[] cloneGroups(Atom[] ca) {
974                Group[] newGroup = new Group[ca.length];
975
976                List<Chain> model = new ArrayList<>();
977                int apos = -1;
978                for (Atom a : ca) {
979                        apos++;
980                        Group parentG = a.getGroup();
981                        Chain parentC = parentG.getChain();
982
983                        Chain newChain = null;
984                        for (Chain c : model) {
985                                if (c.getName().equals(parentC.getName())) {
986                                        newChain = c;
987                                        break;
988                                }
989                        }
990                        if (newChain == null) {
991                                newChain = new ChainImpl();
992                                newChain.setName(parentC.getName());
993                                model.add(newChain);
994                        }
995
996                        Group ng = (Group) parentG.clone();
997                        newGroup[apos] = ng;
998                        newChain.addGroup(ng);
999                }
1000                return newGroup;
1001        }
1002
1003        /**
1004         * Utility method for working with circular permutations. Creates a
1005         * duplicated and cloned set of Calpha atoms from the input array.
1006         *
1007         * @param ca2
1008         *            atom array
1009         * @return cloned and duplicated set of input array
1010         */
1011        public static Atom[] duplicateCA2(Atom[] ca2) {
1012                // we don't want to rotate input atoms, do we?
1013                Atom[] ca2clone = new Atom[ca2.length * 2];
1014
1015                int pos = 0;
1016
1017                Chain c = null;
1018                String prevChainId = "";
1019                for (Atom a : ca2) {
1020                        Group g = (Group) a.getGroup().clone(); // works because each group
1021                        // has only a single atom
1022
1023                        if (c == null) {
1024                                c = new ChainImpl();
1025                                Chain orig = a.getGroup().getChain();
1026                                c.setId(orig.getId());
1027                                c.setName(orig.getName());
1028                        } else {
1029                                Chain orig = a.getGroup().getChain();
1030                                if (!orig.getId().equals(prevChainId)) {
1031                                        c = new ChainImpl();
1032                                        c.setId(orig.getId());
1033                                        c.setName(orig.getName());
1034                                }
1035                        }
1036
1037                        c.addGroup(g);
1038                        ca2clone[pos] = g.getAtom(a.getName());
1039
1040                        pos++;
1041                }
1042
1043                // Duplicate ca2!
1044                c = null;
1045                prevChainId = "";
1046                for (Atom a : ca2) {
1047                        Group g = (Group) a.getGroup().clone();
1048
1049                        if (c == null) {
1050                                c = new ChainImpl();
1051                                Chain orig = a.getGroup().getChain();
1052                                c.setId(orig.getId());
1053                                c.setName(orig.getName());
1054                        } else {
1055                                Chain orig = a.getGroup().getChain();
1056                                if (!orig.getId().equals(prevChainId)) {
1057                                        c = new ChainImpl();
1058                                        c.setId(orig.getId());
1059                                        c.setName(orig.getName());
1060                                }
1061                        }
1062
1063                        c.addGroup(g);
1064                        ca2clone[pos] = g.getAtom(a.getName());
1065
1066                        pos++;
1067                }
1068
1069                return ca2clone;
1070
1071        }
1072
1073        /**
1074         * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and
1075         * has CA name will be returned.
1076         *
1077         * @param s
1078         *            the structure object
1079         * @return an Atom[] array
1080         * @see #getRepresentativeAtomArray(Structure)
1081         */
1082        public static Atom[] getAtomCAArray(Structure s) {
1083
1084                List<Atom> atoms = new ArrayList<>();
1085
1086                for (Chain c : s.getChains()) {
1087                        for (Group g : c.getAtomGroups()) {
1088                                if (g.hasAtom(CA_ATOM_NAME)
1089                                                && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
1090                                        atoms.add(g.getAtom(CA_ATOM_NAME));
1091                                }
1092                        }
1093                }
1094
1095                return atoms.toArray(new Atom[atoms.size()]);
1096        }
1097
1098        /**
1099         * Gets a representative atom for each group that is part of the chain
1100         * backbone. Note that modified aminoacids won't be returned as part of the
1101         * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the
1102         * structure.
1103         *
1104         * For amino acids, the representative is a CA carbon. For nucleotides, the
1105         * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
1106         * types will be ignored.
1107         *
1108         * @param s
1109         *            Input structure
1110         * @return representative Atoms of the structure backbone
1111         * @since Biojava 4.1.0
1112         */
1113        public static Atom[] getRepresentativeAtomArray(Structure s) {
1114
1115                List<Atom> atoms = new ArrayList<>();
1116
1117                for (Chain c : s.getChains()) {
1118                        Atom[] chainAtoms = getRepresentativeAtomArray(c);
1119                        atoms.addAll(Arrays.asList(chainAtoms));
1120                }
1121
1122                return atoms.toArray(new Atom[0]);
1123        }
1124
1125        /**
1126         * Return an Atom array of the main chain atoms: CA, C, N, O Any group that
1127         * contains those atoms will be included, be it a standard aminoacid or not
1128         *
1129         * @param s
1130         *            the structure object
1131         * @return an Atom[] array
1132         */
1133        public static Atom[] getBackboneAtomArray(Structure s) {
1134                List<Atom> atoms = new ArrayList<>();
1135                for (Chain c : s.getChains()) {
1136                        for (Group g : c.getAtomGroups()) {
1137                                if (g.hasAminoAtoms()) {
1138                                        if (g.getType() == GroupType.NUCLEOTIDE) {
1139                                                addNucleotideAndAminoAtoms(atoms, g, NUCLEOTIDE_BACKBONE_ATOMS);
1140                                        } else {
1141                                                addNucleotideAndAminoAtoms(atoms, g, AMINOACID_BACKBONE_ATOMS);
1142                                        }
1143                                }
1144                        }
1145                }
1146                return atoms.toArray(new Atom[0]);
1147        }
1148
1149        /**
1150         * This method will be used to add the Nucleotide and Amino atoms to the backbone Atom arrays based on the pre-defined Atom names.
1151         * @param atoms
1152         * @param g
1153         * @param atomNames
1154         */
1155        private static void addNucleotideAndAminoAtoms(List<Atom> atoms, Group g, Set<String> atomNames) {
1156                for (Atom a : g.getAtoms()) {
1157                        if (atomNames.contains(a.getName())) {
1158                                atoms.add(a);
1159                        }
1160                }
1161        }
1162
1163
1164        /**
1165         * Convert three character amino acid codes into single character e.g.
1166         * convert CYS to C. Valid 3-letter codes will be those of the standard 20
1167         * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids}
1168         * map)
1169         *
1170         * @return the 1 letter code, or null if the given 3 letter code does not
1171         *         correspond to an amino acid code
1172         * @param groupCode3
1173         *            a three character amino acid representation String
1174         * @see #get1LetterCode(String)
1175         */
1176        public static Character get1LetterCodeAmino(String groupCode3) {
1177                return aminoAcids.get(groupCode3);
1178        }
1179
1180        /**
1181         * Convert a three letter amino acid or nucleotide code into a single
1182         * character code. If the code does not correspond to an amino acid or
1183         * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}.
1184         *
1185         * Returned null for nucleotides prior to version 4.0.1.
1186         *
1187         * @param groupCode3
1188         *            three letter representation
1189         * @return The 1-letter abbreviation
1190         */
1191        public static Character get1LetterCode(String groupCode3) {
1192
1193                Character code1;
1194
1195                // is it a standard amino acid ?
1196                code1 = get1LetterCodeAmino(groupCode3);
1197
1198                if (code1 == null) {
1199                        // hm groupCode3 is not standard
1200                        // perhaps it is a nucleotide?
1201                        groupCode3 = groupCode3.trim();
1202                        if (isNucleotide(groupCode3)) {
1203                                code1 = nucleotides30.get(groupCode3);
1204                                if (code1 == null) {
1205                                        code1 = nucleotides23.get(groupCode3);
1206                                }
1207                                if (code1 == null) {
1208                                        code1 = UNKNOWN_GROUP_LABEL;
1209                                }
1210                        } else {
1211                                // does not seem to be so let's assume it is
1212                                // nonstandard aminoacid and label it "X"
1213                                // logger.warning("unknown group name "+groupCode3 );
1214                                code1 = UNKNOWN_GROUP_LABEL;
1215                        }
1216                }
1217
1218                return code1;
1219
1220        }
1221
1222        /**
1223         * Test if the three-letter code of an ATOM entry corresponds to a
1224         * nucleotide or to an aminoacid.
1225         *
1226         * @param groupCode3
1227         *            3-character code for a group.
1228         *
1229         */
1230        public static boolean isNucleotide(String groupCode3) {
1231                String code = groupCode3.trim();
1232                return nucleotides30.containsKey(code)
1233                                || nucleotides23.containsKey(code);
1234        }
1235
1236        public static String convertAtomsToSeq(Atom[] atoms) {
1237
1238                StringBuilder buf = new StringBuilder();
1239                Group prevGroup = null;
1240                for (Atom a : atoms) {
1241                        Group g = a.getGroup();
1242                        if (prevGroup != null) {
1243                                if (prevGroup.equals(g)) {
1244                                        // we add each group only once.
1245                                        continue;
1246                                }
1247                        }
1248                        String code3 = g.getPDBName();
1249                        Character code1 = get1LetterCodeAmino(code3);
1250                        if (code1 == null)
1251                                code1 = UNKNOWN_GROUP_LABEL;
1252
1253                        buf.append(code1);
1254
1255                        prevGroup = g;
1256
1257                }
1258                return buf.toString();
1259        }
1260
1261        /**
1262         * Get a group represented by a ResidueNumber.
1263         *
1264         * @param struc
1265         *            a {@link Structure}
1266         * @param pdbResNum
1267         *            a {@link ResidueNumber}
1268         * @return a group in the structure that is represented by the pdbResNum.
1269         * @throws StructureException
1270         *             if the group cannot be found.
1271         */
1272        public static Group getGroupByPDBResidueNumber(Structure struc,
1273                        ResidueNumber pdbResNum) throws StructureException {
1274                if (struc == null || pdbResNum == null) {
1275                        throw new IllegalArgumentException("Null argument(s).");
1276                }
1277
1278                Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName());
1279
1280                return chain.getGroupByPDB(pdbResNum);
1281        }
1282
1283        /**
1284         * Returns the set of intra-chain contacts for the given chain for given
1285         * atom names, i.e. the contact map. Uses a spatial hashing algorithm that
1286         * speeds up the calculation without need of full distance matrix. The
1287         * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs
1288         * to be set to true for this to work.
1289         *
1290         * @param chain
1291         * @param atomNames
1292         *            the array with atom names to be used. Beware: CA will do both
1293         *            C-alphas an Calciums if null all non-H atoms of non-hetatoms
1294         *            will be used
1295         * @param cutoff
1296         * @return
1297         */
1298        public static AtomContactSet getAtomsInContact(Chain chain,
1299                        String[] atomNames, double cutoff) {
1300                Grid grid = new Grid(cutoff);
1301
1302                Atom[] atoms = null;
1303                if (atomNames == null) {
1304                        atoms = getAllNonHAtomArray(chain, false);
1305                } else {
1306                        atoms = getAtomArray(chain, atomNames);
1307                }
1308                // If tha
1309                if(atoms.length==0){
1310                        logger.warn("No atoms found for buidling grid!");
1311                        return new AtomContactSet(cutoff);
1312                }
1313                grid.addAtoms(atoms);
1314
1315                return grid.getAtomContacts();
1316        }
1317
1318        /**
1319         * Returns the set of intra-chain contacts for the given chain for all non-H
1320         * atoms of non-hetatoms, i.e. the contact map. Uses a spatial hashing
1321         * algorithm that speeds up the calculation without need of full distance
1322         * matrix. The parsing mode
1323         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1324         * true for this to work.
1325         *
1326         * @param chain
1327         * @param cutoff
1328         * @return
1329         */
1330        public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
1331                return getAtomsInContact(chain, (String[]) null, cutoff);
1332        }
1333
1334        /**
1335         * Returns the set of intra-chain contacts for the given chain for C-alpha
1336         * atoms (including non-standard aminoacids appearing as HETATM groups),
1337         * i.e. the contact map. Uses a spatial hashing algorithm that speeds up
1338         * the calculation without need of full distance matrix. The parsing mode
1339         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1340         * true for this to work.
1341         *
1342         * @param chain
1343         * @param cutoff
1344         * @return
1345         * @see #getRepresentativeAtomsInContact(Chain, double)
1346         */
1347        public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
1348                Grid grid = new Grid(cutoff);
1349
1350                Atom[] atoms = getAtomCAArray(chain);
1351
1352                grid.addAtoms(atoms);
1353
1354                return grid.getAtomContacts();
1355        }
1356
1357        /**
1358         * Returns the set of intra-chain contacts for the given chain for C-alpha
1359         * or C3' atoms (including non-standard aminoacids appearing as HETATM
1360         * groups), i.e. the contact map. Uses a spatial hashing algorithm that
1361         * speeds up the calculation without need of full distance matrix.
1362         *
1363         * @param chain
1364         * @param cutoff
1365         * @return
1366         * @since Biojava 4.1.0
1367         */
1368        public static AtomContactSet getRepresentativeAtomsInContact(Chain chain,
1369                        double cutoff) {
1370                Grid grid = new Grid(cutoff);
1371
1372                Atom[] atoms = getRepresentativeAtomArray(chain);
1373
1374                grid.addAtoms(atoms);
1375
1376                return grid.getAtomContacts();
1377        }
1378
1379        /**
1380         * Returns the set of inter-chain contacts between the two given chains for
1381         * the given atom names. Uses a spatial hashing algorithm that speeds up
1382         * the calculation without need of full distance matrix. The parsing mode
1383         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1384         * true for this to work.
1385         *
1386         * @param chain1
1387         * @param chain2
1388         * @param atomNames
1389         *            the array with atom names to be used. For Calphas use {"CA"},
1390         *            if null all non-H atoms will be used. Note HET atoms are
1391         *            ignored unless this parameter is null.
1392         * @param cutoff
1393         * @param hetAtoms
1394         *            if true HET atoms are included, if false they are not
1395         * @return
1396         */
1397        public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
1398                        String[] atomNames, double cutoff, boolean hetAtoms) {
1399                Grid grid = new Grid(cutoff);
1400                Atom[] atoms1 = null;
1401                Atom[] atoms2 = null;
1402                if (atomNames == null) {
1403                        atoms1 = getAllNonHAtomArray(chain1, hetAtoms);
1404                        atoms2 = getAllNonHAtomArray(chain2, hetAtoms);
1405                } else {
1406                        atoms1 = getAtomArray(chain1, atomNames);
1407                        atoms2 = getAtomArray(chain2, atomNames);
1408                }
1409                grid.addAtoms(atoms1, atoms2);
1410
1411                return grid.getAtomContacts();
1412        }
1413
1414        /**
1415         * Returns the set of inter-chain contacts between the two given chains for
1416         * all non-H atoms. Uses a spatial hashing algorithm that speeds up the
1417         * calculation without need of full distance matrix. The parsing mode
1418         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1419         * true for this to work.
1420         *
1421         * @param chain1
1422         * @param chain2
1423         * @param cutoff
1424         * @param hetAtoms
1425         *            if true HET atoms are included, if false they are not
1426         * @return
1427         */
1428        public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
1429                        double cutoff, boolean hetAtoms) {
1430                return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms);
1431        }
1432
1433        /**
1434         * Finds Groups in {@code structure} that contain at least one Atom that is
1435         * within {@code radius} Angstroms of {@code centroid}.
1436         *
1437         * @param structure
1438         *            The structure from which to find Groups
1439         * @param centroid
1440         *            The centroid of the shell
1441         * @param excludeResidues
1442         *            A list of ResidueNumbers to exclude
1443         * @param radius
1444         *            The radius from {@code centroid}, in Angstroms
1445         * @param includeWater
1446         *            Whether to include Groups whose <em>only</em> atoms are water
1447         * @param useAverageDistance
1448         *            When set to true, distances are the arithmetic mean (1-norm)
1449         *            of the distances of atoms that belong to the group and that
1450         *            are within the shell; otherwise, distances are the minimum of
1451         *            these values
1452         * @return A map of Groups within (or partially within) the shell, to their
1453         *         distances in Angstroms
1454         */
1455        public static Map<Group, Double> getGroupDistancesWithinShell(
1456                        Structure structure, Atom centroid,
1457                        Set<ResidueNumber> excludeResidues, double radius,
1458                        boolean includeWater, boolean useAverageDistance) {
1459
1460                // for speed, we avoid calculating square roots
1461                radius = radius * radius;
1462
1463                Map<Group, Double> distances = new HashMap<>();
1464
1465                // we only need this if we're averaging distances
1466                // note that we can't use group.getAtoms().size() because some the
1467                // group's atoms be outside the shell
1468                Map<Group, Integer> atomCounts = new HashMap<>();
1469
1470                for (Chain chain : structure.getChains()) {
1471                        groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
1472
1473                                // exclude water
1474                                if (!includeWater && chainGroup.isWater())
1475                                        continue;
1476
1477                                // check blacklist of residue numbers
1478                                for (ResidueNumber rn : excludeResidues) {
1479                                        if (rn.equals(chainGroup.getResidueNumber()))
1480                                                continue groupLoop;
1481                                }
1482
1483                                for (Atom testAtom : chainGroup.getAtoms()) {
1484
1485                                        // use getDistanceFast as we are doing a lot of comparisons
1486                                        double dist = Calc.getDistanceFast(centroid, testAtom);
1487
1488                                        // if we're the shell
1489                                        if (dist <= radius) {
1490                                                if (!distances.containsKey(chainGroup))
1491                                                        distances.put(chainGroup, Double.POSITIVE_INFINITY);
1492                                                if (useAverageDistance) {
1493                                                        // sum the distance; we'll divide by the total
1494                                                        // number later
1495                                                        // here, we CANNOT use fastDistance (distance
1496                                                        // squared) because we want the arithmetic mean
1497                                                        distances.put(chainGroup, distances.get(chainGroup)
1498                                                                        + Math.sqrt(dist));
1499                                                        if (!atomCounts.containsKey(chainGroup))
1500                                                                atomCounts.put(chainGroup, 0);
1501                                                        atomCounts.put(chainGroup,
1502                                                                        atomCounts.get(chainGroup) + 1);
1503                                                } else {
1504                                                        // take the minimum distance among all atoms of
1505                                                        // chainGroup
1506                                                        // note that we can't break here because we might
1507                                                        // find a smaller distance
1508                                                        if (dist < distances.get(chainGroup)) {
1509                                                                distances.put(chainGroup, dist);
1510                                                        }
1511                                                }
1512                                        }
1513
1514                                }
1515                        }
1516                }
1517
1518                if (useAverageDistance) {
1519                        for (Map.Entry<Group, Double> entry : distances.entrySet()) {
1520                                int count = atomCounts.get(entry.getKey());
1521                                distances.put(entry.getKey(), entry.getValue() / count);
1522                        }
1523                } else {
1524                        // in this case we used getDistanceFast
1525                        distances.replaceAll((k, v) -> Math.sqrt(v));
1526                }
1527
1528                return distances;
1529
1530        }
1531
1532        public static Set<Group> getGroupsWithinShell(Structure structure,
1533                        Atom atom, Set<ResidueNumber> excludeResidues, double distance,
1534                        boolean includeWater) {
1535
1536                // square the distance to use as a comparison against getDistanceFast
1537                // which returns the square of a distance.
1538                distance = distance * distance;
1539
1540                Set<Group> returnSet = new LinkedHashSet<>();
1541                for (Chain chain : structure.getChains()) {
1542                        groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
1543                                if (!includeWater && chainGroup.isWater())
1544                                        continue;
1545                                for (ResidueNumber rn : excludeResidues) {
1546                                        if (rn.equals(chainGroup.getResidueNumber()))
1547                                                continue groupLoop;
1548                                }
1549                                for (Atom atomB : chainGroup.getAtoms()) {
1550
1551                                        // use getDistanceFast as we are doing a lot of comparisons
1552                                        double dist = Calc.getDistanceFast(atom, atomB);
1553                                        if (dist <= distance) {
1554                                                returnSet.add(chainGroup);
1555                                                break;
1556                                        }
1557
1558                                }
1559                        }
1560                }
1561                return returnSet;
1562        }
1563
1564        /**
1565         * <p>
1566         * Returns a Set of Groups in a structure within the distance specified of a
1567         * given group.
1568         * </p>
1569         * <p>
1570         * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of
1571         * Groups returned
1572         *
1573         * @param structure
1574         *            The structure to work with
1575         * @param group
1576         *            The 'query' group
1577         * @param distance
1578         *            The cutoff distance
1579         * @param includeWater
1580         *            Should water residues be included in the output?
1581         * @return {@link LinkedHashSet} of {@link Group}s within at least one atom
1582         *         with {@code distance} of at least one atom in {@code group}
1583         */
1584        public static Set<Group> getGroupsWithinShell(Structure structure,
1585                        Group group, double distance, boolean includeWater) {
1586
1587                Set<Group> returnList = new LinkedHashSet<>();
1588
1589                Set<ResidueNumber> excludeGroups = new HashSet<>();
1590                excludeGroups.add(group.getResidueNumber());
1591                for (Atom atom : group.getAtoms()) {
1592                        Set<Group> set = getGroupsWithinShell(structure, atom,
1593                                        excludeGroups, distance, includeWater);
1594                        returnList.addAll(set);
1595                }
1596
1597                return returnList;
1598        }
1599
1600        /**
1601         * Remove all models from a Structure and keep only the first
1602         *
1603         * @param s
1604         *            original Structure
1605         * @return a structure that contains only the first model
1606         * @since 3.0.5
1607         */
1608        public static Structure removeModels(Structure s) {
1609                if (s.nrModels() == 1)
1610                        return s;
1611
1612                Structure n = new StructureImpl();
1613                // go through whole substructure and clone ...
1614
1615                // copy structure data
1616
1617                n.setPdbId(s.getPdbId());
1618                n.setName(s.getName());
1619
1620                // TODO: do deep copying of data!
1621                n.setPDBHeader(s.getPDBHeader());
1622                n.setDBRefs(s.getDBRefs());
1623
1624                n.setSites(s.getSites());
1625
1626                n.setChains(s.getModel(0));
1627
1628                return n;
1629
1630        }
1631
1632        /**
1633         * Removes all polymeric and solvent groups from a list of groups
1634         *
1635         */
1636        public static List<Group> filterLigands(List<Group> allGroups) {
1637
1638                List<Group> groups = new ArrayList<>();
1639                for (Group g : allGroups) {
1640
1641                        if ( g.isPolymeric())
1642                                continue;
1643
1644                        if (!g.isWater()) {
1645                                groups.add(g);
1646                        }
1647                }
1648
1649                return groups;
1650        }
1651
1652        /**
1653         * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)}
1654         * which creates new parsers when needed
1655         *
1656         * @param name
1657         * @return
1658         * @throws IOException
1659         * @throws StructureException
1660         */
1661        public static Structure getStructure(String name) throws IOException,
1662        StructureException {
1663                return StructureTools.getStructure(name, null, null);
1664        }
1665
1666        /**
1667         * Flexibly get a structure from an input String. The intent of this method
1668         * is to allow any reasonable string which could refer to a structure to be
1669         * correctly parsed. The following are currently supported:
1670         * <ol>
1671         * <li>Filename (if name refers to an existing file)
1672         * <li>PDB ID
1673         * <li>SCOP domains
1674         * <li>PDP domains
1675         * <li>Residue ranges
1676         * <li>Other formats supported by AtomCache
1677         * </ol>
1678         *
1679         * @param name
1680         *            Some reference to the protein structure
1681         * @param parser
1682         *            A clean PDBFileParser to use if it is a file. If null, a
1683         *            PDBFileParser will be instantiated if needed.
1684         * @param cache
1685         *            An AtomCache to use if the structure can be fetched from the
1686         *            PDB. If null, a AtomCache will be instantiated if needed.
1687         * @return A Structure object
1688         * @throws IOException
1689         *             if name is an existing file, but doesn't parse correctly
1690         * @throws StructureException
1691         *             if the format is unknown, or if AtomCache throws an
1692         *             exception.
1693         */
1694        public static Structure getStructure(String name, PDBFileParser parser,
1695                        AtomCache cache) throws IOException, StructureException {
1696                File f = new File(FileDownloadUtils.expandUserHome(name));
1697                if (f.exists()) {
1698                        if (parser == null) {
1699                                parser = new PDBFileParser();
1700                        }
1701                        InputStream inStream = new FileInputStream(f);
1702                        return parser.parsePDBFile(inStream);
1703                } else {
1704                        if (cache == null) {
1705                                cache = new AtomCache();
1706                        }
1707                        return cache.getStructure(name);
1708                }
1709        }
1710
1711        /**
1712         * Cleans up the structure's alternate location (altloc) groups. All alternate location groups should have all atoms (except
1713         * in the case of microheterogenity) or when a deuterium exists.
1714         * Ensure that all the alt loc groups have all the atoms in the main group.
1715         * @param structure The Structure to be cleaned up
1716         */
1717        public static void cleanUpAltLocs(Structure structure) {
1718                for (int i =0; i< structure.nrModels() ; i++){
1719                        for (Chain chain : structure.getModel(i)) {
1720                                for (Group group : chain.getAtomGroups()) {
1721                                        for (Group altLocGroup : group.getAltLocs()) {
1722                                                for ( Atom groupAtom : group.getAtoms()) {
1723                                                        // If this alt loc doesn't have this atom
1724                                                        if (! altLocGroup.hasAtom(groupAtom.getName())) {
1725                                                                // Fix for microheterogenity
1726                                                                if (altLocGroup.getPDBName().equals(group.getPDBName())) {
1727                                                                        // If it's a Hydrogen then we check for it's Deuterated brother
1728                                                                        if(!hasDeuteratedEquiv(groupAtom, altLocGroup)){
1729                                                                                altLocGroup.addAtom(groupAtom);
1730                                                                        }
1731                                                                }
1732                                                        }
1733                                                }
1734                                        }
1735                                }
1736                        }
1737                }
1738        }
1739
1740        /**
1741         * Check to see if an Deuterated atom has a non deuterated brother in the group.
1742         * @param atom the input atom that is putatively deuterium
1743         * @param currentGroup the group the atom is in
1744         * @return true if the atom is deuterated and it's hydrogen equive exists.
1745         */
1746        public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) {
1747                // If it's deuterated and has a non-deuterated brother
1748                return atom.getElement() == Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'D', 'H'));
1749        }
1750
1751        /**
1752         * Check to see if a Hydrogen has a  Deuterated brother in the group.
1753         * @param atom the input atom that is putatively hydorgen
1754         * @param currentGroup the group the atom is in
1755         * @return true if the atom is hydrogen and it's Deuterium equiv exists.
1756         */
1757        public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) {
1758                // If it's hydrogen and has a deuterated brother
1759                return atom.getElement() == Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'H', 'D'));
1760        }
1761
1762        private static String replaceFirstChar(String name, char c, char d) {
1763                if(name.charAt(0)==c){
1764                        return name.replaceFirst(String.valueOf(c), String.valueOf(d));
1765                }
1766                return name;
1767        }
1768
1769        /**
1770         * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure.
1771         * @param structure the structure
1772         * @since 5.4.0
1773         */
1774        public static void reduceToRepresentativeAtoms(Structure structure) {
1775                for (int modelIdx = 0; modelIdx<structure.nrModels(); modelIdx++) {
1776                        for (Chain c : structure.getPolyChains(modelIdx)) {
1777                                for (Group g : c.getAtomGroups()) {
1778                                        List<Atom> atoms = g.getAtoms();
1779                                        if (g.isAminoAcid()) {
1780                                                atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME));
1781                                        } else if (g.isNucleotide()) {
1782                                                atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE));
1783                                        }
1784                                        // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures
1785                                }
1786                        }
1787                }
1788        }
1789}