Source code

001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jan 4, 2006
021 *
022 */
023package org.biojava.nbio.structure;
024
025import java.io.File;
026import java.io.FileInputStream;
027import java.io.IOException;
028import java.io.InputStream;
029import java.util.ArrayList;
030import java.util.Arrays;
031import java.util.Collection;
032import java.util.Collections;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.LinkedHashSet;
037import java.util.List;
038import java.util.Map;
039import java.util.Set;
040
041import javax.vecmath.Point3d;
042
043import org.biojava.nbio.structure.align.util.AtomCache;
044import org.biojava.nbio.structure.contact.AtomContactSet;
045import org.biojava.nbio.structure.contact.Grid;
046import org.biojava.nbio.structure.io.FileParsingParameters;
047import org.biojava.nbio.structure.io.PDBFileParser;
048import org.biojava.nbio.core.util.FileDownloadUtils;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052/**
053 * A class that provides some tool methods.
054 *
055 * @author Andreas Prlic
056 * @author Jules Jacobsen
057 * @since 1.0
058 */
059public class StructureTools {
060
061        private static final Logger logger = LoggerFactory.getLogger(StructureTools.class);
062
063        // Amino Acid backbone
064        /**
065         * The atom name of the backbone C-alpha atom. Note that this can be
066         * ambiguous depending on the context since Calcium atoms use the same name
067         * in PDB.
068         */
069        public static final String CA_ATOM_NAME = "CA";
070
071        /**
072         * The atom name for the backbone amide nitrogen
073         */
074        public static final String N_ATOM_NAME = "N";
075
076        /**
077         * The atom name for the backbone carbonyl
078         */
079        public static final String C_ATOM_NAME = "C";
080
081        /**
082         * The atom name for the backbone carbonyl oxygen
083         */
084        public static final String O_ATOM_NAME = "O";
085
086        /**
087         * The atom name of the side-chain C-beta atom
088         */
089        public static final String CB_ATOM_NAME = "CB";
090
091        // Nucleotide backbone
092        /**
093         * The atom name of the backbone C1' in RNA
094         */
095        public static final String C1_ATOM_NAME = "C1'";
096        /**
097         * The atom name of the backbone C2' in RNA
098         */
099        public static final String C2_ATOM_NAME = "C2'";
100        /**
101         * The atom name of the backbone C3' in RNA
102         */
103        public static final String C3_ATOM_NAME = "C3'";
104        /**
105         * The atom name of the backbone C4' in RNA
106         */
107        public static final String C4_ATOM_NAME = "C4'";
108        /**
109         * The atom name of the backbone O2' in RNA
110         */
111        public static final String O2_ATOM_NAME = "O2'";
112        /**
113         * The atom name of the backbone O3' in RNA
114         */
115        public static final String O3_ATOM_NAME = "O3'";
116        /**
117         * The atom name of the backbone O4' in RNA
118         */
119        public static final String O4_ATOM_NAME = "O4'";
120        /**
121         * The atom name of the backbone O4' in RNA
122         */
123        public static final String O5_ATOM_NAME = "O5'";
124        /**
125         * The atom name of the backbone O4' in RNA
126         */
127        public static final String OP1_ATOM_NAME = "OP1";
128        /**
129         * The atom name of the backbone O4' in RNA
130         */
131        public static final String OP2_ATOM_NAME = "OP2";
132        /**
133         * The atom name of the backbone phosphate in RNA
134         */
135        public static final String P_ATOM_NAME = "P";
136
137        /**
138         * The atom used as representative for nucleotides, equivalent to
139         * {@link #CA_ATOM_NAME} for proteins
140         */
141        public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME;
142
143        /**
144         * The character to use for unknown compounds in sequence strings
145         */
146        public static final char UNKNOWN_GROUP_LABEL = 'X';
147
148        /**
149         * Below this ratio of aminoacid/nucleotide residues to the sequence total,
150         * we use simple majority of aminoacid/nucleotide residues to decide the
151         * character of the chain (protein/nucleotide)
152         */
153        public static final double RATIO_RESIDUES_TO_TOTAL = 0.95;
154
155        /**
156         * Threshold for plausible binding of a ligand to the selected substructure
157         */
158        public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5;
159
160        // there is a file format change in PDB 3.0 and nucleotides are being
161        // renamed
162        private static final Map<String, Character> nucleotides30;
163        private static final Map<String, Character> nucleotides23;
164
165        // amino acid 3 and 1 letter code definitions
166        private static final Map<String, Character> aminoAcids;
167
168        private static final Set<Element> hBondDonorAcceptors;
169
170        static {
171                nucleotides30 = new HashMap<String, Character>();
172                nucleotides30.put("DA", 'A');
173                nucleotides30.put("DC", 'C');
174                nucleotides30.put("DG", 'G');
175                nucleotides30.put("DT", 'T');
176                nucleotides30.put("DI", 'I');
177                nucleotides30.put("A", 'A');
178                nucleotides30.put("G", 'G');
179                nucleotides30.put("C", 'C');
180                nucleotides30.put("U", 'U');
181                nucleotides30.put("I", 'I');
182
183                // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced
184                // with these:
185                nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
186                nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl
187                nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
188                nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3'
189                // terminator)
190                nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant
191                nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator
192                nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic
193                // sugar
194                nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine
195                nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine
196                nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide
197                // nucleic acid, with
198                // added methyl
199                nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide
200                // nucleic acid, with
201                // added methyl
202                nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide
203                // nucleic acid
204                nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide
205                // nucleic acid
206                nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide
207                // nucleic acid
208                nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide
209                // nucleic acid
210                nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant
211
212                // store nucleic acids (C, G, A, T, U, and I), and
213                // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I),
214                // and
215                nucleotides23 = new HashMap<String, Character>();
216                String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A",
217                                "+T", "+U", "+I" };
218                for (String n : names) {
219                        nucleotides23.put(n, n.charAt(n.length() - 1));
220                }
221
222                aminoAcids = new HashMap<String, Character>();
223                aminoAcids.put("GLY", 'G');
224                aminoAcids.put("ALA", 'A');
225                aminoAcids.put("VAL", 'V');
226                aminoAcids.put("LEU", 'L');
227                aminoAcids.put("ILE", 'I');
228                aminoAcids.put("PHE", 'F');
229                aminoAcids.put("TYR", 'Y');
230                aminoAcids.put("TRP", 'W');
231                aminoAcids.put("PRO", 'P');
232                aminoAcids.put("HIS", 'H');
233                aminoAcids.put("LYS", 'K');
234                aminoAcids.put("ARG", 'R');
235                aminoAcids.put("SER", 'S');
236                aminoAcids.put("THR", 'T');
237                aminoAcids.put("GLU", 'E');
238                aminoAcids.put("GLN", 'Q');
239                aminoAcids.put("ASP", 'D');
240                aminoAcids.put("ASN", 'N');
241                aminoAcids.put("CYS", 'C');
242                aminoAcids.put("MET", 'M');
243                // MSE is only found as a molecular replacement for MET
244                aminoAcids.put("MSE", 'M');
245                // 'non-standard', genetically encoded
246                // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
247                // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE'
248                // likewise 'PYL' (IUBMB) and 'PYH' (PDB)
249                aminoAcids.put("CSE", 'U');
250                aminoAcids.put("SEC", 'U');
251                aminoAcids.put("PYH", 'O');
252                aminoAcids.put("PYL", 'O');
253
254                hBondDonorAcceptors = new HashSet<Element>();
255                hBondDonorAcceptors.add(Element.N);
256                hBondDonorAcceptors.add(Element.O);
257                hBondDonorAcceptors.add(Element.S);
258
259        }
260
261        /**
262         * Count how many Atoms are contained within a Structure object.
263         *
264         * @param s
265         *            the structure object
266         * @return the number of Atoms in this Structure
267         */
268        public static int getNrAtoms(Structure s) {
269
270                int nrAtoms = 0;
271
272                Iterator<Group> iter = new GroupIterator(s);
273
274                while (iter.hasNext()) {
275                        Group g = iter.next();
276                        nrAtoms += g.size();
277                }
278
279                return nrAtoms;
280        }
281
282        /**
283         * Count how many groups are contained within a structure object.
284         *
285         * @param s
286         *            the structure object
287         * @return the number of groups in the structure
288         */
289        public static int getNrGroups(Structure s) {
290                int nrGroups = 0;
291
292                List<Chain> chains = s.getChains(0);
293                for (Chain c : chains) {
294                        nrGroups += c.getAtomLength();
295                }
296                return nrGroups;
297        }
298
299        /**
300         * Returns an array of the requested Atoms from the Structure object.
301         * Iterates over all groups and checks if the requested atoms are in this
302         * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl}
303         * group. If the group does not contain all requested atoms then no atoms
304         * are added for that group. For structures with more than one model, only
305         * model 0 will be used.
306         *
307         * @param s
308         *            the structure to get the atoms from
309         *
310         * @param atomNames
311         *            contains the atom names to be used.
312         * @return an Atom[] array
313         */
314        public static Atom[] getAtomArray(Structure s, String[] atomNames) {
315                List<Chain> chains = s.getModel(0);
316
317                List<Atom> atoms = new ArrayList<Atom>();
318
319                extractAtoms(atomNames, chains, atoms);
320                return atoms.toArray(new Atom[0]);
321
322        }
323
324        /**
325         * Returns an array of the requested Atoms from the Structure object. In
326         * contrast to {@link #getAtomArray(Structure, String[])} this method
327         * iterates over all chains. Iterates over all chains and groups and checks
328         * if the requested atoms are in this group, no matter if this is a
329         * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not
330         * contain all requested atoms then no atoms are added for that group. For
331         * structures with more than one model, only model 0 will be used.
332         *
333         * @param s
334         *            the structure to get the atoms from
335         *
336         * @param atomNames
337         *            contains the atom names to be used.
338         * @return an Atom[] array
339         */
340        public static Atom[] getAtomArrayAllModels(Structure s,
341                        String[] atomNames) {
342
343                List<Atom> atoms = new ArrayList<>();
344
345                for (int i = 0; i < s.nrModels(); i++) {
346                        List<Chain> chains = s.getModel(i);
347                        extractAtoms(atomNames, chains, atoms);
348                }
349                return atoms.toArray(new Atom[0]);
350
351        }
352
353        /**
354         * Convert all atoms of the structure (all models) into an Atom array
355         *
356         * @param s
357         *            input structure
358         * @return all atom array
359         */
360        public static Atom[] getAllAtomArray(Structure s) {
361                List<Atom> atoms = new ArrayList<>();
362
363                AtomIterator iter = new AtomIterator(s);
364                while (iter.hasNext()) {
365                        Atom a = iter.next();
366                        atoms.add(a);
367                }
368                return atoms.toArray(new Atom[0]);
369        }
370        /**
371         * Convert all atoms of the structure (specified model) into an Atom array
372         *
373         * @param s
374         *            input structure
375         * @return all atom array
376         */
377        public static Atom[] getAllAtomArray(Structure s, int model) {
378                List<Atom> atoms = new ArrayList<>();
379
380                AtomIterator iter = new AtomIterator(s,model);
381                while (iter.hasNext()) {
382                        Atom a = iter.next();
383                        atoms.add(a);
384                }
385                return atoms.toArray(new Atom[0]);
386
387        }
388
389        /**
390         * Returns and array of all atoms of the chain, including
391         * Hydrogens (if present) and all HETATOMs. Waters are not included.
392         *
393         * @param c
394         *            input chain
395         * @return all atom array
396         */
397        public static Atom[] getAllAtomArray(Chain c) {
398                List<Atom> atoms = new ArrayList<>();
399
400                for (Group g : c.getAtomGroups()) {
401                        if (g.isWater())
402                                continue;
403                        atoms.addAll(g.getAtoms());
404                }
405                return atoms.toArray(new Atom[0]);
406        }
407
408        /**
409         * List of groups from the structure not included in ca (e.g. ligands).
410         *
411         * Unaligned groups are searched from all chains referenced in ca, as well
412         * as any chains in the first model of the structure from ca[0], if any.
413         *
414         * @param ca an array of atoms
415         * @return
416         */
417        public static List<Group> getUnalignedGroups(Atom[] ca) {
418                Set<Chain> chains = new HashSet<>();
419                Set<Group> caGroups = new HashSet<>();
420
421                // Create list of all chains in this structure
422                Structure s = null;
423                if (ca.length > 0) {
424                        Group g = ca[0].getGroup();
425                        if (g != null) {
426                                Chain c = g.getChain();
427                                if (c != null) {
428                                        s = c.getStructure();
429                                }
430                        }
431                }
432                if (s != null) {
433                        // Add all chains from the structure
434                        chains.addAll(s.getChains(0));
435                }
436
437                // Add groups and chains from ca
438                for (Atom a : ca) {
439                        Group g = a.getGroup();
440                        if (g != null) {
441                                caGroups.add(g);
442
443                                Chain c = g.getChain();
444                                if (c != null) {
445                                        chains.add(c);
446                                }
447                        }
448                }
449
450                // Iterate through all chains, finding groups not in ca
451                List<Group> unadded = new ArrayList<>();
452                for (Chain c : chains) {
453                        for (Group g : c.getAtomGroups()) {
454                                if (!caGroups.contains(g)) {
455                                        unadded.add(g);
456                                }
457                        }
458                }
459                return unadded;
460        }
461
462        /**
463         * Finds all ligand groups from the target which fall within the cutoff distance
464         * of some atom from the query set.
465         *
466         * @param target Set of groups including the ligands
467         * @param query Atom selection
468         * @param cutoff Distance from query atoms to consider, in angstroms
469         * @return All groups from the target with at least one atom within cutoff of a query atom
470         * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF
471         */
472        public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) {
473                // Spatial hashing of the reduced structure
474                Grid grid = new Grid(cutoff);
475                grid.addAtoms(query);
476
477                List<Group> ligands = new ArrayList<>();
478                for(Group g :target ) {
479                        // don't worry about waters
480                        if(g.isWater()) {
481                                continue;
482                        }
483
484                        if(g.isPolymeric() ) {
485                                // Polymers aren't ligands
486                                continue;
487                        }
488
489                        // It is a ligand!
490
491                        // Check that it's within cutoff of something in reduced
492                        List<Atom> groupAtoms = g.getAtoms();
493                        if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) {
494                                continue;
495                        }
496
497                        ligands.add(g);
498                }
499                return ligands;
500        }
501
502        /**
503         * Adds a particular group to a structure. A new chain will be created if necessary.
504         *
505         * <p>When adding multiple groups, pass the return value of one call as the
506         * chainGuess parameter of the next call for efficiency.
507         * <pre>
508         * Chain guess = null;
509         * for(Group g : groups) {
510         *     guess = addGroupToStructure(s, g, guess );
511         * }
512         * </pre>
513         * @param s structure to receive the group
514         * @param g group to add
515         * @param chainGuess (optional) If not null, should be a chain from s. Used
516         *  to improve performance when adding many groups from the same chain
517         * @param clone Indicates whether the input group should be cloned before
518         *  being added to the new chain
519         * @return the chain g was added to
520         */
521        public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) {
522                // Find or create the chain
523                String chainId = g.getChainId();
524                assert !chainId.isEmpty();
525                Chain chain;
526                if (chainGuess != null && chainGuess.getId().equals(chainId)) {
527                        // previously guessed chain
528                        chain = chainGuess;
529                } else {
530                        // Try to guess
531                        chain = s.getChain(chainId, model);
532                        if (chain == null) {
533                                // no chain found
534                                chain = new ChainImpl();
535                                chain.setId(chainId);
536
537                                Chain oldChain = g.getChain();
538                                chain.setName(oldChain.getName());
539
540                                EntityInfo oldEntityInfo = oldChain.getEntityInfo();
541
542                                EntityInfo newEntityInfo;
543                                if (oldEntityInfo == null) {
544                                        newEntityInfo = new EntityInfo();
545                                        s.addEntityInfo(newEntityInfo);
546                                } else {
547                                        newEntityInfo = s.getEntityById(oldEntityInfo.getMolId());
548                                        if (newEntityInfo == null) {
549                                                newEntityInfo = new EntityInfo(oldEntityInfo);
550                                                s.addEntityInfo(newEntityInfo);
551                                        }
552                                }
553                                newEntityInfo.addChain(chain);
554                                chain.setEntityInfo(newEntityInfo);
555
556                                // TODO Do the seqres need to be cloned too? -SB 2016-10-7
557                                chain.setSeqResGroups(oldChain.getSeqResGroups());
558                                chain.setSeqMisMatches(oldChain.getSeqMisMatches());
559
560                                s.addChain(chain, model);
561                        }
562                }
563
564                // Add cloned group
565                if (clone) {
566                        g = (Group) g.clone();
567                }
568                chain.addGroup(g);
569
570                return chain;
571        }
572
573        /**
574         * Add a list of groups to a new structure. Chains will be automatically
575         * created in the new structure as needed.
576         * @param s structure to receive the group
577         * @param groups groups to add
578         * @param model model number
579         * @param clone Indicates whether the input groups should be cloned before
580         *  being added to the new chain
581         */
582        public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) {
583                Chain chainGuess = null;
584                for(Group g : groups) {
585                        chainGuess = addGroupToStructure(s, g, model, chainGuess, clone);
586                }
587        }
588
589        /**
590         * Expand a set of atoms into all groups from the same structure.
591         *
592         * If the structure is set, only the first atom is used (assuming all
593         * atoms come from the same original structure).
594         * If the atoms aren't linked to a structure (for instance, for cloned atoms),
595         * searches all chains of all atoms for groups.
596         * @param atoms Sample of atoms
597         * @return All groups from all chains accessible from the input atoms
598         */
599        public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) {
600                return getAllGroupsFromSubset(atoms,null);
601        }
602
603        /**
604         * Expand a set of atoms into all groups from the same structure.
605         *
606         * If the structure is set, only the first atom is used (assuming all
607         * atoms come from the same original structure).
608         * If the atoms aren't linked to a structure (for instance, for cloned atoms),
609         * searches all chains of all atoms for groups.
610         * @param atoms Sample of atoms
611         * @param types Type of groups to return (useful for getting only ligands, for instance).
612         *  Null gets all groups.
613         * @return All groups from all chains accessible from the input atoms
614         */
615        public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) {
616                // Get the full structure
617                Structure s = null;
618                if (atoms.length > 0) {
619                        Group g = atoms[0].getGroup();
620                        if (g != null) {
621                                Chain c = g.getChain();
622                                if (c != null) {
623                                        s = c.getStructure();
624                                }
625                        }
626                }
627                // Collect all groups from the structure
628                Set<Chain> allChains = new HashSet<>();
629                if( s != null ) {
630                        allChains.addAll(s.getChains());
631                }
632                // In case the structure wasn't set, need to use ca chains too
633                for(Atom a : atoms) {
634                        Group g = a.getGroup();
635                        if(g != null) {
636                                Chain c = g.getChain();
637                                if( c != null ) {
638                                        allChains.add(c);
639                                }
640                        }
641                }
642
643                if(allChains.isEmpty() ) {
644                        return Collections.emptySet();
645                }
646
647                // Extract all ligand groups
648                Set<Group> full = new HashSet<>();
649                for(Chain c : allChains) {
650                        if(types == null) {
651                                full.addAll(c.getAtomGroups());
652                        } else {
653                                full.addAll(c.getAtomGroups(types));
654                        }
655                }
656
657                return full;
658        }
659
660
661        /**
662         * Returns and array of all non-Hydrogen atoms in the given Structure,
663         * optionally including HET atoms or not. Waters are not included.
664         *
665         * @param s
666         * @param hetAtoms
667         *            if true HET atoms are included in array, if false they are not
668         * @return
669         */
670        public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) {
671                AtomIterator iter = new AtomIterator(s);
672                return getAllNonHAtomArray(s, hetAtoms, iter);
673        }
674        /**
675         * Returns and array of all non-Hydrogen atoms in the given Structure,
676         * optionally including HET atoms or not. Waters are not included.
677         *
678         * @param s
679         * @param hetAtoms
680         *            if true HET atoms are included in array, if false they are not
681         * @param modelNr Model number to draw atoms from
682         * @return
683         */
684        public static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) {
685                AtomIterator iter = new AtomIterator(s,modelNr);
686                return getAllNonHAtomArray(s, hetAtoms, iter);
687        }
688
689        private static Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) {
690                List<Atom> atoms = new ArrayList<>();
691
692                while (iter.hasNext()) {
693                        Atom a = iter.next();
694                        if (a.getElement() == Element.H)
695                                continue;
696
697                        Group g = a.getGroup();
698
699                        if (g.isWater())
700                                continue;
701
702                        if (!hetAtoms && g.getType().equals(GroupType.HETATM))
703                                continue;
704
705                        atoms.add(a);
706                }
707                return atoms.toArray(new Atom[0]);
708        }
709
710        /**
711         * Returns and array of all non-Hydrogen atoms in the given Chain,
712         * optionally including HET atoms or not Waters are not included.
713         *
714         * @param c
715         * @param hetAtoms
716         *            if true HET atoms are included in array, if false they are not
717         * @return
718         */
719        public static Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) {
720                List<Atom> atoms = new ArrayList<>();
721
722                for (Group g : c.getAtomGroups()) {
723                        if (g.isWater())
724                                continue;
725                        for (Atom a : g.getAtoms()) {
726
727                                if (a.getElement() == Element.H)
728                                        continue;
729
730                                if (!hetAtoms && g.getType().equals(GroupType.HETATM))
731                                        continue;
732
733                                atoms.add(a);
734                        }
735                }
736                return atoms.toArray(new Atom[0]);
737        }
738
739        /**
740         * Returns and array of all non-Hydrogen atoms coordinates in the given Chain,
741         * optionally including HET atoms or not Waters are not included.
742         *
743         * @param c
744         * @param hetAtoms
745         *            if true HET atoms are included in array, if false they are not
746         * @return
747         */
748        public static Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) {
749                List<Point3d> atoms = new ArrayList<>();
750
751                for (Group g : c.getAtomGroups()) {
752                        if (g.isWater())
753                                continue;
754                        for (Atom a : g.getAtoms()) {
755
756                                if (a.getElement() == Element.H)
757                                        continue;
758
759                                if (!hetAtoms && g.getType().equals(GroupType.HETATM))
760                                        continue;
761
762                                atoms.add(a.getCoordsAsPoint3d());
763                        }
764                }
765                return atoms.toArray(new Point3d[0]);
766        }
767
768        /**
769         * Adds to the given atoms list, all atoms of groups that contained all
770         * requested atomNames, i.e. if a group does not contain all of the
771         * requested atom names, its atoms won't be added.
772         *
773         * @param atomNames
774         * @param chains
775         * @param atoms
776         */
777        private static void extractAtoms(String[] atomNames, List<Chain> chains, List<Atom> atoms) {
778
779                for (Chain c : chains) {
780
781                        for (Group g : c.getAtomGroups()) {
782
783                                // a temp container for the atoms of this group
784                                List<Atom> thisGroupAtoms = new ArrayList<Atom>();
785                                // flag to check if this group contains all the requested atoms.
786                                boolean thisGroupAllAtoms = true;
787                                for (String atomName : atomNames) {
788                                        Atom a = g.getAtom(atomName);
789
790                                        if (a == null) {
791                                                // this group does not have a required atom, skip it...
792                                                thisGroupAllAtoms = false;
793                                                break;
794                                        }
795                                        thisGroupAtoms.add(a);
796                                }
797                                if (thisGroupAllAtoms) {
798                                        // add the atoms of this group to the array.
799                                        atoms.addAll(thisGroupAtoms);
800                                }
801                        }
802                }
803        }
804
805        /**
806         * Returns an array of the requested Atoms from the Chain object. Iterates
807         * over all groups and checks if the requested atoms are in this group, no
808         * matter if this is a AminoAcid or Hetatom group. If the group does not
809         * contain all requested atoms then no atoms are added for that group.
810         *
811         * @param c
812         *            the Chain to get the atoms from
813         *
814         * @param atomNames
815         *            contains the atom names to be used.
816         * @return an Atom[] array
817         */
818        public static Atom[] getAtomArray(Chain c, String[] atomNames) {
819
820                List<Atom> atoms = new ArrayList<>();
821
822                for (Group g : c.getAtomGroups()) {
823
824                        // a temp container for the atoms of this group
825                        List<Atom> thisGroupAtoms = new ArrayList<>();
826                        // flag to check if this group contains all the requested atoms.
827                        boolean thisGroupAllAtoms = true;
828                        for (String atomName : atomNames) {
829                                Atom a = g.getAtom(atomName);
830                                if (a == null) {
831                                        logger.debug("Group " + g.getResidueNumber() + " ("
832                                                        + g.getPDBName()
833                                                        + ") does not have the required atom '" + atomName
834                                                        + "'");
835                                        // this group does not have a required atom, skip it...
836                                        thisGroupAllAtoms = false;
837                                        break;
838                                }
839                                thisGroupAtoms.add(a);
840                        }
841
842                        if (thisGroupAllAtoms) {
843                                // add the atoms of this group to the array.
844                                atoms.addAll(thisGroupAtoms);
845                        }
846
847                }
848                return atoms.toArray(new Atom[0]);
849
850        }
851
852        /**
853         * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and
854         * has CA name will be returned.
855         *
856         * @param c
857         *            the structure object
858         * @return an Atom[] array
859         * @see #getRepresentativeAtomArray(Chain)
860         */
861        public static Atom[] getAtomCAArray(Chain c) {
862                List<Atom> atoms = new ArrayList<>();
863
864                for (Group g : c.getAtomGroups()) {
865                        if (g.hasAtom(CA_ATOM_NAME)
866                                        && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
867                                atoms.add(g.getAtom(CA_ATOM_NAME));
868                        }
869                }
870
871                return atoms.toArray(new Atom[0]);
872        }
873
874        /**
875         * Gets a representative atom for each group that is part of the chain
876         * backbone. Note that modified aminoacids won't be returned as part of the
877         * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the
878         * structure.
879         *
880         * For amino acids, the representative is a CA carbon. For nucleotides, the
881         * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
882         * types will be ignored.
883         *
884         * @param c
885         * @return representative Atoms of the chain backbone
886         * @since Biojava 4.1.0
887         */
888        public static Atom[] getRepresentativeAtomArray(Chain c) {
889                List<Atom> atoms = new ArrayList<>();
890
891                for (Group g : c.getAtomGroups()) {
892
893                        switch (g.getType()) {
894                        case AMINOACID:
895                                if (g.hasAtom(CA_ATOM_NAME)
896                                                && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
897                                        atoms.add(g.getAtom(CA_ATOM_NAME));
898                                }
899                                break;
900                        case NUCLEOTIDE:
901                                if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) {
902                                        atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE));
903                                }
904                                break;
905                        default:
906                                // don't add
907                        }
908                }
909
910                return atoms.toArray(new Atom[0]);
911
912        }
913
914        /**
915         * Provides an equivalent copy of Atoms in a new array. Clones everything,
916         * starting with parent groups and chains. The chain will only contain
917         * groups that are part of the input array.
918         *
919         * @param ca
920         *            array of representative atoms, e.g. CA atoms
921         * @return Atom array
922         * @since Biojava 4.1.0
923         */
924        public static Atom[] cloneAtomArray(Atom[] ca) {
925                Atom[] newCA = new Atom[ca.length];
926
927                List<Chain> model = new ArrayList<Chain>();
928                int apos = -1;
929                for (Atom a : ca) {
930                        apos++;
931                        Group parentG = a.getGroup();
932                        Chain parentC = parentG.getChain();
933
934                        Chain newChain = null;
935                        for (Chain c : model) {
936                                if (c.getName().equals(parentC.getName())) {
937                                        newChain = c;
938                                        break;
939                                }
940                        }
941                        if (newChain == null) {
942                                newChain = new ChainImpl();
943                                newChain.setId(parentC.getId());
944                                newChain.setName(parentC.getName());
945                                model.add(newChain);
946                        }
947
948                        Group parentN = (Group) parentG.clone();
949
950                        newCA[apos] = parentN.getAtom(a.getName());
951                        try {
952                                // if the group doesn't exist yet, this produces a StructureException
953                                newChain.getGroupByPDB(parentN.getResidueNumber());
954                        } catch (StructureException e) {
955                                // the group doesn't exist yet in the newChain, let's add it
956                                newChain.addGroup(parentN);
957                        }
958
959                }
960                return newCA;
961        }
962
963        /**
964         * Clone a set of representative Atoms, but returns the parent groups
965         *
966         * @param ca
967         *            Atom array
968         * @return Group array
969         */
970        public static Group[] cloneGroups(Atom[] ca) {
971                Group[] newGroup = new Group[ca.length];
972
973                List<Chain> model = new ArrayList<Chain>();
974                int apos = -1;
975                for (Atom a : ca) {
976                        apos++;
977                        Group parentG = a.getGroup();
978                        Chain parentC = parentG.getChain();
979
980                        Chain newChain = null;
981                        for (Chain c : model) {
982                                if (c.getName().equals(parentC.getName())) {
983                                        newChain = c;
984                                        break;
985                                }
986                        }
987                        if (newChain == null) {
988                                newChain = new ChainImpl();
989                                newChain.setName(parentC.getName());
990                                model.add(newChain);
991                        }
992
993                        Group ng = (Group) parentG.clone();
994                        newGroup[apos] = ng;
995                        newChain.addGroup(ng);
996                }
997                return newGroup;
998        }
999
1000        /**
1001         * Utility method for working with circular permutations. Creates a
1002         * duplicated and cloned set of Calpha atoms from the input array.
1003         *
1004         * @param ca2
1005         *            atom array
1006         * @return cloned and duplicated set of input array
1007         */
1008        public static Atom[] duplicateCA2(Atom[] ca2) {
1009                // we don't want to rotate input atoms, do we?
1010                Atom[] ca2clone = new Atom[ca2.length * 2];
1011
1012                int pos = 0;
1013
1014                Chain c = null;
1015                String prevChainId = "";
1016                for (Atom a : ca2) {
1017                        Group g = (Group) a.getGroup().clone(); // works because each group
1018                        // has only a single atom
1019
1020                        if (c == null) {
1021                                c = new ChainImpl();
1022                                Chain orig = a.getGroup().getChain();
1023                                c.setId(orig.getId());
1024                                c.setName(orig.getName());
1025                        } else {
1026                                Chain orig = a.getGroup().getChain();
1027                                if (!orig.getId().equals(prevChainId)) {
1028                                        c = new ChainImpl();
1029                                        c.setId(orig.getId());
1030                                        c.setName(orig.getName());
1031                                }
1032                        }
1033
1034                        c.addGroup(g);
1035                        ca2clone[pos] = g.getAtom(a.getName());
1036
1037                        pos++;
1038                }
1039
1040                // Duplicate ca2!
1041                c = null;
1042                prevChainId = "";
1043                for (Atom a : ca2) {
1044                        Group g = (Group) a.getGroup().clone();
1045
1046                        if (c == null) {
1047                                c = new ChainImpl();
1048                                Chain orig = a.getGroup().getChain();
1049                                c.setId(orig.getId());
1050                                c.setName(orig.getName());
1051                        } else {
1052                                Chain orig = a.getGroup().getChain();
1053                                if (!orig.getId().equals(prevChainId)) {
1054                                        c = new ChainImpl();
1055                                        c.setId(orig.getId());
1056                                        c.setName(orig.getName());
1057                                }
1058                        }
1059
1060                        c.addGroup(g);
1061                        ca2clone[pos] = g.getAtom(a.getName());
1062
1063                        pos++;
1064                }
1065
1066                return ca2clone;
1067
1068        }
1069
1070        /**
1071         * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and
1072         * has CA name will be returned.
1073         *
1074         * @param s
1075         *            the structure object
1076         * @return an Atom[] array
1077         * @see #getRepresentativeAtomArray(Structure)
1078         */
1079        public static Atom[] getAtomCAArray(Structure s) {
1080
1081                List<Atom> atoms = new ArrayList<Atom>();
1082
1083                for (Chain c : s.getChains()) {
1084                        for (Group g : c.getAtomGroups()) {
1085                                if (g.hasAtom(CA_ATOM_NAME)
1086                                                && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
1087                                        atoms.add(g.getAtom(CA_ATOM_NAME));
1088                                }
1089                        }
1090                }
1091
1092                return atoms.toArray(new Atom[atoms.size()]);
1093        }
1094
1095        /**
1096         * Gets a representative atom for each group that is part of the chain
1097         * backbone. Note that modified aminoacids won't be returned as part of the
1098         * backbone if the {@link org.biojava.nbio.structure.chem.ReducedChemCompProvider} was used to load the
1099         * structure.
1100         *
1101         * For amino acids, the representative is a CA carbon. For nucleotides, the
1102         * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
1103         * types will be ignored.
1104         *
1105         * @param s
1106         *            Input structure
1107         * @return representative Atoms of the structure backbone
1108         * @since Biojava 4.1.0
1109         */
1110        public static Atom[] getRepresentativeAtomArray(Structure s) {
1111
1112                List<Atom> atoms = new ArrayList<>();
1113
1114                for (Chain c : s.getChains()) {
1115                        Atom[] chainAtoms = getRepresentativeAtomArray(c);
1116                        atoms.addAll(Arrays.asList(chainAtoms));
1117                }
1118
1119                return atoms.toArray(new Atom[0]);
1120        }
1121
1122        /**
1123         * Return an Atom array of the main chain atoms: CA, C, N, O Any group that
1124         * contains those atoms will be included, be it a standard aminoacid or not
1125         *
1126         * @param s
1127         *            the structure object
1128         * @return an Atom[] array
1129         */
1130        public static Atom[] getBackboneAtomArray(Structure s) {
1131
1132                List<Atom> atoms = new ArrayList<>();
1133
1134                for (Chain c : s.getChains()) {
1135                        for (Group g : c.getAtomGroups()) {
1136                                if (g.hasAminoAtoms()) {
1137                                        // this means we will only take atoms grom groups that have
1138                                        // complete backbones
1139                                        for (Atom a : g.getAtoms()) {
1140                                                switch (g.getType()) {
1141                                                case NUCLEOTIDE:
1142                                                        // Nucleotide backbone
1143                                                        if (a.getName().equals(C1_ATOM_NAME))
1144                                                                atoms.add(a);
1145                                                        if (a.getName().equals(C2_ATOM_NAME))
1146                                                                atoms.add(a);
1147                                                        if (a.getName().equals(C3_ATOM_NAME))
1148                                                                atoms.add(a);
1149                                                        if (a.getName().equals(C4_ATOM_NAME))
1150                                                                atoms.add(a);
1151                                                        if (a.getName().equals(O2_ATOM_NAME))
1152                                                                atoms.add(a);
1153                                                        if (a.getName().equals(O3_ATOM_NAME))
1154                                                                atoms.add(a);
1155                                                        if (a.getName().equals(O4_ATOM_NAME))
1156                                                                atoms.add(a);
1157                                                        if (a.getName().equals(O5_ATOM_NAME))
1158                                                                atoms.add(a);
1159                                                        if (a.getName().equals(OP1_ATOM_NAME))
1160                                                                atoms.add(a);
1161                                                        if (a.getName().equals(OP2_ATOM_NAME))
1162                                                                atoms.add(a);
1163                                                        if (a.getName().equals(P_ATOM_NAME))
1164                                                                atoms.add(a);
1165                                                        // TODO Allow C4* names as well as C4'? -SB 3/2015
1166                                                        break;
1167                                                case AMINOACID:
1168                                                default:
1169                                                        // we do it this way instead of with g.getAtom() to
1170                                                        // be sure we always use the same order as original
1171                                                        if (a.getName().equals(CA_ATOM_NAME))
1172                                                                atoms.add(a);
1173                                                        if (a.getName().equals(C_ATOM_NAME))
1174                                                                atoms.add(a);
1175                                                        if (a.getName().equals(N_ATOM_NAME))
1176                                                                atoms.add(a);
1177                                                        if (a.getName().equals(O_ATOM_NAME))
1178                                                                atoms.add(a);
1179                                                        break;
1180                                                }
1181                                        }
1182                                }
1183                        }
1184
1185                }
1186
1187                return atoms.toArray(new Atom[0]);
1188        }
1189
1190        /**
1191         * Convert three character amino acid codes into single character e.g.
1192         * convert CYS to C. Valid 3-letter codes will be those of the standard 20
1193         * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids}
1194         * map)
1195         *
1196         * @return the 1 letter code, or null if the given 3 letter code does not
1197         *         correspond to an amino acid code
1198         * @param groupCode3
1199         *            a three character amino acid representation String
1200         * @see #get1LetterCode(String)
1201         */
1202        public static Character get1LetterCodeAmino(String groupCode3) {
1203                return aminoAcids.get(groupCode3);
1204        }
1205
1206        /**
1207         * Convert a three letter amino acid or nucleotide code into a single
1208         * character code. If the code does not correspond to an amino acid or
1209         * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}.
1210         *
1211         * Returned null for nucleotides prior to version 4.0.1.
1212         *
1213         * @param groupCode3
1214         *            three letter representation
1215         * @return The 1-letter abbreviation
1216         */
1217        public static Character get1LetterCode(String groupCode3) {
1218
1219                Character code1;
1220
1221                // is it a standard amino acid ?
1222                code1 = get1LetterCodeAmino(groupCode3);
1223
1224                if (code1 == null) {
1225                        // hm groupCode3 is not standard
1226                        // perhaps it is a nucleotide?
1227                        groupCode3 = groupCode3.trim();
1228                        if (isNucleotide(groupCode3)) {
1229                                code1 = nucleotides30.get(groupCode3);
1230                                if (code1 == null) {
1231                                        code1 = nucleotides23.get(groupCode3);
1232                                }
1233                                if (code1 == null) {
1234                                        code1 = UNKNOWN_GROUP_LABEL;
1235                                }
1236                        } else {
1237                                // does not seem to be so let's assume it is
1238                                // nonstandard aminoacid and label it "X"
1239                                // logger.warning("unknown group name "+groupCode3 );
1240                                code1 = UNKNOWN_GROUP_LABEL;
1241                        }
1242                }
1243
1244                return code1;
1245
1246        }
1247
1248        /**
1249         * Test if the three-letter code of an ATOM entry corresponds to a
1250         * nucleotide or to an aminoacid.
1251         *
1252         * @param groupCode3
1253         *            3-character code for a group.
1254         *
1255         */
1256        public static boolean isNucleotide(String groupCode3) {
1257                String code = groupCode3.trim();
1258                return nucleotides30.containsKey(code)
1259                                || nucleotides23.containsKey(code);
1260        }
1261
1262        public static String convertAtomsToSeq(Atom[] atoms) {
1263
1264                StringBuilder buf = new StringBuilder();
1265                Group prevGroup = null;
1266                for (Atom a : atoms) {
1267                        Group g = a.getGroup();
1268                        if (prevGroup != null) {
1269                                if (prevGroup.equals(g)) {
1270                                        // we add each group only once.
1271                                        continue;
1272                                }
1273                        }
1274                        String code3 = g.getPDBName();
1275                        Character code1 = get1LetterCodeAmino(code3);
1276                        if (code1 == null)
1277                                code1 = UNKNOWN_GROUP_LABEL;
1278
1279                        buf.append(code1);
1280
1281                        prevGroup = g;
1282
1283                }
1284                return buf.toString();
1285        }
1286
1287        /**
1288         * Get a group represented by a ResidueNumber.
1289         *
1290         * @param struc
1291         *            a {@link Structure}
1292         * @param pdbResNum
1293         *            a {@link ResidueNumber}
1294         * @return a group in the structure that is represented by the pdbResNum.
1295         * @throws StructureException
1296         *             if the group cannot be found.
1297         */
1298        public static Group getGroupByPDBResidueNumber(Structure struc,
1299                        ResidueNumber pdbResNum) throws StructureException {
1300                if (struc == null || pdbResNum == null) {
1301                        throw new IllegalArgumentException("Null argument(s).");
1302                }
1303
1304                Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName());
1305
1306                return chain.getGroupByPDB(pdbResNum);
1307        }
1308
1309        /**
1310         * Returns the set of intra-chain contacts for the given chain for given
1311         * atom names, i.e. the contact map. Uses a spatial hashing algorithm that
1312         * speeds up the calculation without need of full distance matrix. The
1313         * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs
1314         * to be set to true for this to work.
1315         *
1316         * @param chain
1317         * @param atomNames
1318         *            the array with atom names to be used. Beware: CA will do both
1319         *            C-alphas an Calciums if null all non-H atoms of non-hetatoms
1320         *            will be used
1321         * @param cutoff
1322         * @return
1323         */
1324        public static AtomContactSet getAtomsInContact(Chain chain,
1325                        String[] atomNames, double cutoff) {
1326                Grid grid = new Grid(cutoff);
1327
1328                Atom[] atoms = null;
1329                if (atomNames == null) {
1330                        atoms = getAllNonHAtomArray(chain, false);
1331                } else {
1332                        atoms = getAtomArray(chain, atomNames);
1333                }
1334                // If tha
1335                if(atoms.length==0){
1336                        logger.warn("No atoms found for buidling grid!");
1337                        return new AtomContactSet(cutoff);
1338                }
1339                grid.addAtoms(atoms);
1340
1341                return grid.getAtomContacts();
1342        }
1343
1344        /**
1345         * Returns the set of intra-chain contacts for the given chain for all non-H
1346         * atoms of non-hetatoms, i.e. the contact map. Uses a spatial hashing
1347         * algorithm that speeds up the calculation without need of full distance
1348         * matrix. The parsing mode
1349         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1350         * true for this to work.
1351         *
1352         * @param chain
1353         * @param cutoff
1354         * @return
1355         */
1356        public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
1357                return getAtomsInContact(chain, (String[]) null, cutoff);
1358        }
1359
1360        /**
1361         * Returns the set of intra-chain contacts for the given chain for C-alpha
1362         * atoms (including non-standard aminoacids appearing as HETATM groups),
1363         * i.e. the contact map. Uses a spatial hashing algorithm that speeds up
1364         * the calculation without need of full distance matrix. The parsing mode
1365         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1366         * true for this to work.
1367         *
1368         * @param chain
1369         * @param cutoff
1370         * @return
1371         * @see #getRepresentativeAtomsInContact(Chain, double)
1372         */
1373        public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
1374                Grid grid = new Grid(cutoff);
1375
1376                Atom[] atoms = getAtomCAArray(chain);
1377
1378                grid.addAtoms(atoms);
1379
1380                return grid.getAtomContacts();
1381        }
1382
1383        /**
1384         * Returns the set of intra-chain contacts for the given chain for C-alpha
1385         * or C3' atoms (including non-standard aminoacids appearing as HETATM
1386         * groups), i.e. the contact map. Uses a spatial hashing algorithm that
1387         * speeds up the calculation without need of full distance matrix.
1388         *
1389         * @param chain
1390         * @param cutoff
1391         * @return
1392         * @since Biojava 4.1.0
1393         */
1394        public static AtomContactSet getRepresentativeAtomsInContact(Chain chain,
1395                        double cutoff) {
1396                Grid grid = new Grid(cutoff);
1397
1398                Atom[] atoms = getRepresentativeAtomArray(chain);
1399
1400                grid.addAtoms(atoms);
1401
1402                return grid.getAtomContacts();
1403        }
1404
1405        /**
1406         * Returns the set of inter-chain contacts between the two given chains for
1407         * the given atom names. Uses a spatial hashing algorithm that speeds up
1408         * the calculation without need of full distance matrix. The parsing mode
1409         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1410         * true for this to work.
1411         *
1412         * @param chain1
1413         * @param chain2
1414         * @param atomNames
1415         *            the array with atom names to be used. For Calphas use {"CA"},
1416         *            if null all non-H atoms will be used. Note HET atoms are
1417         *            ignored unless this parameter is null.
1418         * @param cutoff
1419         * @param hetAtoms
1420         *            if true HET atoms are included, if false they are not
1421         * @return
1422         */
1423        public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
1424                        String[] atomNames, double cutoff, boolean hetAtoms) {
1425                Grid grid = new Grid(cutoff);
1426                Atom[] atoms1 = null;
1427                Atom[] atoms2 = null;
1428                if (atomNames == null) {
1429                        atoms1 = getAllNonHAtomArray(chain1, hetAtoms);
1430                        atoms2 = getAllNonHAtomArray(chain2, hetAtoms);
1431                } else {
1432                        atoms1 = getAtomArray(chain1, atomNames);
1433                        atoms2 = getAtomArray(chain2, atomNames);
1434                }
1435                grid.addAtoms(atoms1, atoms2);
1436
1437                return grid.getAtomContacts();
1438        }
1439
1440        /**
1441         * Returns the set of inter-chain contacts between the two given chains for
1442         * all non-H atoms. Uses a spatial hashing algorithm that speeds up the
1443         * calculation without need of full distance matrix. The parsing mode
1444         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1445         * true for this to work.
1446         *
1447         * @param chain1
1448         * @param chain2
1449         * @param cutoff
1450         * @param hetAtoms
1451         *            if true HET atoms are included, if false they are not
1452         * @return
1453         */
1454        public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
1455                        double cutoff, boolean hetAtoms) {
1456                return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms);
1457        }
1458
1459        /**
1460         * Finds Groups in {@code structure} that contain at least one Atom that is
1461         * within {@code radius} Angstroms of {@code centroid}.
1462         *
1463         * @param structure
1464         *            The structure from which to find Groups
1465         * @param centroid
1466         *            The centroid of the shell
1467         * @param excludeResidues
1468         *            A list of ResidueNumbers to exclude
1469         * @param radius
1470         *            The radius from {@code centroid}, in Angstroms
1471         * @param includeWater
1472         *            Whether to include Groups whose <em>only</em> atoms are water
1473         * @param useAverageDistance
1474         *            When set to true, distances are the arithmetic mean (1-norm)
1475         *            of the distances of atoms that belong to the group and that
1476         *            are within the shell; otherwise, distances are the minimum of
1477         *            these values
1478         * @return A map of Groups within (or partially within) the shell, to their
1479         *         distances in Angstroms
1480         */
1481        public static Map<Group, Double> getGroupDistancesWithinShell(
1482                        Structure structure, Atom centroid,
1483                        Set<ResidueNumber> excludeResidues, double radius,
1484                        boolean includeWater, boolean useAverageDistance) {
1485
1486                // for speed, we avoid calculating square roots
1487                radius = radius * radius;
1488
1489                Map<Group, Double> distances = new HashMap<Group, Double>();
1490
1491                // we only need this if we're averaging distances
1492                // note that we can't use group.getAtoms().size() because some the
1493                // group's atoms be outside the shell
1494                Map<Group, Integer> atomCounts = new HashMap<Group, Integer>();
1495
1496                for (Chain chain : structure.getChains()) {
1497                        groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
1498
1499                                // exclude water
1500                                if (!includeWater && chainGroup.isWater())
1501                                        continue;
1502
1503                                // check blacklist of residue numbers
1504                                for (ResidueNumber rn : excludeResidues) {
1505                                        if (rn.equals(chainGroup.getResidueNumber()))
1506                                                continue groupLoop;
1507                                }
1508
1509                                for (Atom testAtom : chainGroup.getAtoms()) {
1510
1511                                        // use getDistanceFast as we are doing a lot of comparisons
1512                                        double dist = Calc.getDistanceFast(centroid, testAtom);
1513
1514                                        // if we're the shell
1515                                        if (dist <= radius) {
1516                                                if (!distances.containsKey(chainGroup))
1517                                                        distances.put(chainGroup, Double.POSITIVE_INFINITY);
1518                                                if (useAverageDistance) {
1519                                                        // sum the distance; we'll divide by the total
1520                                                        // number later
1521                                                        // here, we CANNOT use fastDistance (distance
1522                                                        // squared) because we want the arithmetic mean
1523                                                        distances.put(chainGroup, distances.get(chainGroup)
1524                                                                        + Math.sqrt(dist));
1525                                                        if (!atomCounts.containsKey(chainGroup))
1526                                                                atomCounts.put(chainGroup, 0);
1527                                                        atomCounts.put(chainGroup,
1528                                                                        atomCounts.get(chainGroup) + 1);
1529                                                } else {
1530                                                        // take the minimum distance among all atoms of
1531                                                        // chainGroup
1532                                                        // note that we can't break here because we might
1533                                                        // find a smaller distance
1534                                                        if (dist < distances.get(chainGroup)) {
1535                                                                distances.put(chainGroup, dist);
1536                                                        }
1537                                                }
1538                                        }
1539
1540                                }
1541                        }
1542                }
1543
1544                if (useAverageDistance) {
1545                        for (Map.Entry<Group, Double> entry : distances.entrySet()) {
1546                                int count = atomCounts.get(entry.getKey());
1547                                distances.put(entry.getKey(), entry.getValue() / count);
1548                        }
1549                } else {
1550                        // in this case we used getDistanceFast
1551                        distances.replaceAll((k, v) -> Math.sqrt(v));
1552                }
1553
1554                return distances;
1555
1556        }
1557
1558        public static Set<Group> getGroupsWithinShell(Structure structure,
1559                        Atom atom, Set<ResidueNumber> excludeResidues, double distance,
1560                        boolean includeWater) {
1561
1562                // square the distance to use as a comparison against getDistanceFast
1563                // which returns the square of a distance.
1564                distance = distance * distance;
1565
1566                Set<Group> returnSet = new LinkedHashSet<>();
1567                for (Chain chain : structure.getChains()) {
1568                        groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
1569                                if (!includeWater && chainGroup.isWater())
1570                                        continue;
1571                                for (ResidueNumber rn : excludeResidues) {
1572                                        if (rn.equals(chainGroup.getResidueNumber()))
1573                                                continue groupLoop;
1574                                }
1575                                for (Atom atomB : chainGroup.getAtoms()) {
1576
1577                                        // use getDistanceFast as we are doing a lot of comparisons
1578                                        double dist = Calc.getDistanceFast(atom, atomB);
1579                                        if (dist <= distance) {
1580                                                returnSet.add(chainGroup);
1581                                                break;
1582                                        }
1583
1584                                }
1585                        }
1586                }
1587                return returnSet;
1588        }
1589
1590        /**
1591         * <p>
1592         * Returns a Set of Groups in a structure within the distance specified of a
1593         * given group.
1594         * </p>
1595         * <p>
1596         * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of
1597         * Groups returned
1598         *
1599         * @param structure
1600         *            The structure to work with
1601         * @param group
1602         *            The 'query' group
1603         * @param distance
1604         *            The cutoff distance
1605         * @param includeWater
1606         *            Should water residues be included in the output?
1607         * @return {@link LinkedHashSet} of {@link Group}s within at least one atom
1608         *         with {@code distance} of at least one atom in {@code group}
1609         */
1610        public static Set<Group> getGroupsWithinShell(Structure structure,
1611                        Group group, double distance, boolean includeWater) {
1612
1613                Set<Group> returnList = new LinkedHashSet<>();
1614
1615                Set<ResidueNumber> excludeGroups = new HashSet<>();
1616                excludeGroups.add(group.getResidueNumber());
1617                for (Atom atom : group.getAtoms()) {
1618                        Set<Group> set = getGroupsWithinShell(structure, atom,
1619                                        excludeGroups, distance, includeWater);
1620                        returnList.addAll(set);
1621                }
1622
1623                return returnList;
1624        }
1625
1626        /**
1627         * Remove all models from a Structure and keep only the first
1628         *
1629         * @param s
1630         *            original Structure
1631         * @return a structure that contains only the first model
1632         * @since 3.0.5
1633         */
1634        public static Structure removeModels(Structure s) {
1635                if (s.nrModels() == 1)
1636                        return s;
1637
1638                Structure n = new StructureImpl();
1639                // go through whole substructure and clone ...
1640
1641                // copy structure data
1642
1643                n.setPdbId(s.getPdbId());
1644                n.setName(s.getName());
1645
1646                // TODO: do deep copying of data!
1647                n.setPDBHeader(s.getPDBHeader());
1648                n.setDBRefs(s.getDBRefs());
1649
1650                n.setSites(s.getSites());
1651
1652                n.setChains(s.getModel(0));
1653
1654                return n;
1655
1656        }
1657
1658        /**
1659         * Removes all polymeric and solvent groups from a list of groups
1660         *
1661         */
1662        public static List<Group> filterLigands(List<Group> allGroups) {
1663
1664                List<Group> groups = new ArrayList<>();
1665                for (Group g : allGroups) {
1666
1667                        if ( g.isPolymeric())
1668                                continue;
1669
1670                        if (!g.isWater()) {
1671                                groups.add(g);
1672                        }
1673                }
1674
1675                return groups;
1676        }
1677
1678        /**
1679         * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)}
1680         * which creates new parsers when needed
1681         *
1682         * @param name
1683         * @return
1684         * @throws IOException
1685         * @throws StructureException
1686         */
1687        public static Structure getStructure(String name) throws IOException,
1688        StructureException {
1689                return StructureTools.getStructure(name, null, null);
1690        }
1691
1692        /**
1693         * Flexibly get a structure from an input String. The intent of this method
1694         * is to allow any reasonable string which could refer to a structure to be
1695         * correctly parsed. The following are currently supported:
1696         * <ol>
1697         * <li>Filename (if name refers to an existing file)
1698         * <li>PDB ID
1699         * <li>SCOP domains
1700         * <li>PDP domains
1701         * <li>Residue ranges
1702         * <li>Other formats supported by AtomCache
1703         * </ol>
1704         *
1705         * @param name
1706         *            Some reference to the protein structure
1707         * @param parser
1708         *            A clean PDBFileParser to use if it is a file. If null, a
1709         *            PDBFileParser will be instantiated if needed.
1710         * @param cache
1711         *            An AtomCache to use if the structure can be fetched from the
1712         *            PDB. If null, a AtomCache will be instantiated if needed.
1713         * @return A Structure object
1714         * @throws IOException
1715         *             if name is an existing file, but doesn't parse correctly
1716         * @throws StructureException
1717         *             if the format is unknown, or if AtomCache throws an
1718         *             exception.
1719         */
1720        public static Structure getStructure(String name, PDBFileParser parser,
1721                        AtomCache cache) throws IOException, StructureException {
1722                File f = new File(FileDownloadUtils.expandUserHome(name));
1723                if (f.exists()) {
1724                        if (parser == null) {
1725                                parser = new PDBFileParser();
1726                        }
1727                        InputStream inStream = new FileInputStream(f);
1728                        return parser.parsePDBFile(inStream);
1729                } else {
1730                        if (cache == null) {
1731                                cache = new AtomCache();
1732                        }
1733                        return cache.getStructure(name);
1734                }
1735        }
1736
1737        /**
1738         * Cleans up the structure's alternate location (altloc) groups. All alternate location groups should have all atoms (except
1739         * in the case of microheterogenity) or when a deuterium exists.
1740         * Ensure that all the alt loc groups have all the atoms in the main group.
1741         * @param structure The Structure to be cleaned up
1742         */
1743        public static void cleanUpAltLocs(Structure structure) {
1744                for (int i =0; i< structure.nrModels() ; i++){
1745                        for (Chain chain : structure.getModel(i)) {
1746                                for (Group group : chain.getAtomGroups()) {
1747                                        for (Group altLocGroup : group.getAltLocs()) {
1748                                                for ( Atom groupAtom : group.getAtoms()) {
1749                                                        // If this alt loc doesn't have this atom
1750                                                        if (! altLocGroup.hasAtom(groupAtom.getName())) {
1751                                                                // Fix for microheterogenity
1752                                                                if (altLocGroup.getPDBName().equals(group.getPDBName())) {
1753                                                                        // If it's a Hydrogen then we check for it's Deuterated brother
1754                                                                        if(!hasDeuteratedEquiv(groupAtom, altLocGroup)){
1755                                                                                altLocGroup.addAtom(groupAtom);
1756                                                                        }
1757                                                                }
1758                                                        }
1759                                                }
1760                                        }
1761                                }
1762                        }
1763                }
1764        }
1765
1766        /**
1767         * Check to see if an Deuterated atom has a non deuterated brother in the group.
1768         * @param atom the input atom that is putatively deuterium
1769         * @param currentGroup the group the atom is in
1770         * @return true if the atom is deuterated and it's hydrogen equive exists.
1771         */
1772        public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) {
1773                // If it's deuterated and has a non-deuterated brother
1774                return atom.getElement() == Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'D', 'H'));
1775        }
1776
1777        /**
1778         * Check to see if a Hydrogen has a  Deuterated brother in the group.
1779         * @param atom the input atom that is putatively hydorgen
1780         * @param currentGroup the group the atom is in
1781         * @return true if the atom is hydrogen and it's Deuterium equiv exists.
1782         */
1783        public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) {
1784                // If it's hydrogen and has a deuterated brother
1785                return atom.getElement() == Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(), 'H', 'D'));
1786        }
1787
1788        private static String replaceFirstChar(String name, char c, char d) {
1789                if(name.charAt(0)==c){
1790                        return name.replaceFirst(String.valueOf(c), String.valueOf(d));
1791                }
1792                return name;
1793        }
1794
1795        /**
1796         * Remove all atoms but the representative atoms (C alphas or phosphates) from the given structure.
1797         * @param structure the structure
1798         * @since 5.4.0
1799         */
1800        public static void reduceToRepresentativeAtoms(Structure structure) {
1801                for (int modelIdx = 0; modelIdx<structure.nrModels(); modelIdx++) {
1802                        for (Chain c : structure.getPolyChains(modelIdx)) {
1803                                for (Group g : c.getAtomGroups()) {
1804                                        List<Atom> atoms = g.getAtoms();
1805                                        if (g.isAminoAcid()) {
1806                                                atoms.removeIf(a->!a.getName().equals(CA_ATOM_NAME));
1807                                        } else if (g.isNucleotide()) {
1808                                                atoms.removeIf(a->!a.getName().equals(NUCLEOTIDE_REPRESENTATIVE));
1809                                        }
1810                                        // else we keep all other atoms. We are concerned only about aminoacids and nucleotides that make up the bulk of the structures
1811                                }
1812                        }
1813                }
1814        }
1815}