Source code

001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jan 4, 2006
021 *
022 */
023package org.biojava.nbio.structure;
024
025import java.io.File;
026import java.io.FileInputStream;
027import java.io.IOException;
028import java.io.InputStream;
029import java.util.ArrayList;
030import java.util.Collection;
031import java.util.Collections;
032import java.util.HashMap;
033import java.util.HashSet;
034import java.util.Iterator;
035import java.util.LinkedHashSet;
036import java.util.List;
037import java.util.Map;
038import java.util.Set;
039
040import javax.vecmath.Point3d;
041
042import org.biojava.nbio.structure.align.util.AtomCache;
043import org.biojava.nbio.structure.contact.AtomContactSet;
044import org.biojava.nbio.structure.contact.Grid;
045import org.biojava.nbio.structure.io.FileParsingParameters;
046import org.biojava.nbio.structure.io.PDBFileParser;
047import org.biojava.nbio.core.util.FileDownloadUtils;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051/**
052 * A class that provides some tool methods.
053 *
054 * @author Andreas Prlic
055 * @author Jules Jacobsen
056 * @since 1.0
057 */
058public class StructureTools {
059
060        private static final Logger logger = LoggerFactory
061                        .getLogger(StructureTools.class);
062
063        // Amino Acid backbone
064        /**
065         * The atom name of the backbone C-alpha atom. Note that this can be
066         * ambiguous depending on the context since Calcium atoms use the same name
067         * in PDB.
068         */
069        public static final String CA_ATOM_NAME = "CA";
070
071        /**
072         * The atom name for the backbone amide nitrogen
073         */
074        public static final String N_ATOM_NAME = "N";
075
076        /**
077         * The atom name for the backbone carbonyl
078         */
079        public static final String C_ATOM_NAME = "C";
080
081        /**
082         * The atom name for the backbone carbonyl oxygen
083         */
084        public static final String O_ATOM_NAME = "O";
085
086        /**
087         * The atom name of the side-chain C-beta atom
088         */
089        public static final String CB_ATOM_NAME = "CB";
090
091        // Nucleotide backbone
092        /**
093         * The atom name of the backbone C1' in RNA
094         */
095        public static final String C1_ATOM_NAME = "C1'";
096        /**
097         * The atom name of the backbone C2' in RNA
098         */
099        public static final String C2_ATOM_NAME = "C2'";
100        /**
101         * The atom name of the backbone C3' in RNA
102         */
103        public static final String C3_ATOM_NAME = "C3'";
104        /**
105         * The atom name of the backbone C4' in RNA
106         */
107        public static final String C4_ATOM_NAME = "C4'";
108        /**
109         * The atom name of the backbone O2' in RNA
110         */
111        public static final String O2_ATOM_NAME = "O2'";
112        /**
113         * The atom name of the backbone O3' in RNA
114         */
115        public static final String O3_ATOM_NAME = "O3'";
116        /**
117         * The atom name of the backbone O4' in RNA
118         */
119        public static final String O4_ATOM_NAME = "O4'";
120        /**
121         * The atom name of the backbone O4' in RNA
122         */
123        public static final String O5_ATOM_NAME = "O5'";
124        /**
125         * The atom name of the backbone O4' in RNA
126         */
127        public static final String OP1_ATOM_NAME = "OP1";
128        /**
129         * The atom name of the backbone O4' in RNA
130         */
131        public static final String OP2_ATOM_NAME = "OP2";
132        /**
133         * The atom name of the backbone phosphate in RNA
134         */
135        public static final String P_ATOM_NAME = "P";
136
137        /**
138         * The atom used as representative for nucleotides, equivalent to
139         * {@link #CA_ATOM_NAME} for proteins
140         */
141        public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME;
142
143        /**
144         * The character to use for unknown compounds in sequence strings
145         */
146        public static final char UNKNOWN_GROUP_LABEL = 'X';
147
148        /**
149         * Below this ratio of aminoacid/nucleotide residues to the sequence total,
150         * we use simple majority of aminoacid/nucleotide residues to decide the
151         * character of the chain (protein/nucleotide)
152         */
153        public static final double RATIO_RESIDUES_TO_TOTAL = 0.95;
154
155        /**
156         * Threshold for plausible binding of a ligand to the selected substructure
157         */
158        public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5;
159
160        // there is a file format change in PDB 3.0 and nucleotides are being
161        // renamed
162        private static final Map<String, Character> nucleotides30;
163        private static final Map<String, Character> nucleotides23;
164
165        // amino acid 3 and 1 letter code definitions
166        private static final Map<String, Character> aminoAcids;
167
168        private static final Set<Element> hBondDonorAcceptors;
169
170        static {
171                nucleotides30 = new HashMap<String, Character>();
172                nucleotides30.put("DA", 'A');
173                nucleotides30.put("DC", 'C');
174                nucleotides30.put("DG", 'G');
175                nucleotides30.put("DT", 'T');
176                nucleotides30.put("DI", 'I');
177                nucleotides30.put("A", 'A');
178                nucleotides30.put("G", 'G');
179                nucleotides30.put("C", 'C');
180                nucleotides30.put("U", 'U');
181                nucleotides30.put("I", 'I');
182
183                // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced
184                // with these:
185                nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
186                nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl
187                nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
188                nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3'
189                // terminator)
190                nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant
191                nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator
192                nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic
193                // sugar
194                nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine
195                nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine
196                nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide
197                // nucleic acid, with
198                // added methyl
199                nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide
200                // nucleic acid, with
201                // added methyl
202                nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide
203                // nucleic acid
204                nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide
205                // nucleic acid
206                nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide
207                // nucleic acid
208                nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide
209                // nucleic acid
210                nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant
211
212                // store nucleic acids (C, G, A, T, U, and I), and
213                // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I),
214                // and
215                nucleotides23 = new HashMap<String, Character>();
216                String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A",
217                                "+T", "+U", "+I" };
218                for (String n : names) {
219                        nucleotides23.put(n, n.charAt(n.length() - 1));
220                }
221
222                aminoAcids = new HashMap<String, Character>();
223                aminoAcids.put("GLY", 'G');
224                aminoAcids.put("ALA", 'A');
225                aminoAcids.put("VAL", 'V');
226                aminoAcids.put("LEU", 'L');
227                aminoAcids.put("ILE", 'I');
228                aminoAcids.put("PHE", 'F');
229                aminoAcids.put("TYR", 'Y');
230                aminoAcids.put("TRP", 'W');
231                aminoAcids.put("PRO", 'P');
232                aminoAcids.put("HIS", 'H');
233                aminoAcids.put("LYS", 'K');
234                aminoAcids.put("ARG", 'R');
235                aminoAcids.put("SER", 'S');
236                aminoAcids.put("THR", 'T');
237                aminoAcids.put("GLU", 'E');
238                aminoAcids.put("GLN", 'Q');
239                aminoAcids.put("ASP", 'D');
240                aminoAcids.put("ASN", 'N');
241                aminoAcids.put("CYS", 'C');
242                aminoAcids.put("MET", 'M');
243                // MSE is only found as a molecular replacement for MET
244                aminoAcids.put("MSE", 'M');
245                // 'non-standard', genetically encoded
246                // http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
247                // IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE'
248                // likewise 'PYL' (IUBMB) and 'PYH' (PDB)
249                aminoAcids.put("CSE", 'U');
250                aminoAcids.put("SEC", 'U');
251                aminoAcids.put("PYH", 'O');
252                aminoAcids.put("PYL", 'O');
253
254                hBondDonorAcceptors = new HashSet<Element>();
255                hBondDonorAcceptors.add(Element.N);
256                hBondDonorAcceptors.add(Element.O);
257                hBondDonorAcceptors.add(Element.S);
258
259        }
260
261        /**
262         * Count how many Atoms are contained within a Structure object.
263         *
264         * @param s
265         *            the structure object
266         * @return the number of Atoms in this Structure
267         */
268        public static final int getNrAtoms(Structure s) {
269
270                int nrAtoms = 0;
271
272                Iterator<Group> iter = new GroupIterator(s);
273
274                while (iter.hasNext()) {
275                        Group g = iter.next();
276                        nrAtoms += g.size();
277                }
278
279                return nrAtoms;
280        }
281
282        /**
283         * Count how many groups are contained within a structure object.
284         *
285         * @param s
286         *            the structure object
287         * @return the number of groups in the structure
288         */
289        public static final int getNrGroups(Structure s) {
290                int nrGroups = 0;
291
292                List<Chain> chains = s.getChains(0);
293                for (Chain c : chains) {
294                        nrGroups += c.getAtomLength();
295                }
296                return nrGroups;
297        }
298
299        /**
300         * Returns an array of the requested Atoms from the Structure object.
301         * Iterates over all groups and checks if the requested atoms are in this
302         * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl}
303         * group. If the group does not contain all requested atoms then no atoms
304         * are added for that group. For structures with more than one model, only
305         * model 0 will be used.
306         *
307         * @param s
308         *            the structure to get the atoms from
309         *
310         * @param atomNames
311         *            contains the atom names to be used.
312         * @return an Atom[] array
313         */
314        public static final Atom[] getAtomArray(Structure s, String[] atomNames) {
315                List<Chain> chains = s.getModel(0);
316
317                List<Atom> atoms = new ArrayList<Atom>();
318
319                extractAtoms(atomNames, chains, atoms);
320                return atoms.toArray(new Atom[atoms.size()]);
321
322        }
323
324        /**
325         * Returns an array of the requested Atoms from the Structure object. In
326         * contrast to {@link #getAtomArray(Structure, String[])} this method
327         * iterates over all chains. Iterates over all chains and groups and checks
328         * if the requested atoms are in this group, no matter if this is a
329         * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not
330         * contain all requested atoms then no atoms are added for that group. For
331         * structures with more than one model, only model 0 will be used.
332         *
333         * @param s
334         *            the structure to get the atoms from
335         *
336         * @param atomNames
337         *            contains the atom names to be used.
338         * @return an Atom[] array
339         */
340        public static final Atom[] getAtomArrayAllModels(Structure s,
341                        String[] atomNames) {
342
343                List<Atom> atoms = new ArrayList<Atom>();
344
345                for (int i = 0; i < s.nrModels(); i++) {
346                        List<Chain> chains = s.getModel(i);
347                        extractAtoms(atomNames, chains, atoms);
348                }
349                return atoms.toArray(new Atom[atoms.size()]);
350
351        }
352
353        /**
354         * Convert all atoms of the structure (all models) into an Atom array
355         *
356         * @param s
357         *            input structure
358         * @return all atom array
359         */
360        public static final Atom[] getAllAtomArray(Structure s) {
361                List<Atom> atoms = new ArrayList<Atom>();
362
363                AtomIterator iter = new AtomIterator(s);
364                while (iter.hasNext()) {
365                        Atom a = iter.next();
366                        atoms.add(a);
367                }
368                return atoms.toArray(new Atom[atoms.size()]);
369        }
370        /**
371         * Convert all atoms of the structure (specified model) into an Atom array
372         *
373         * @param s
374         *            input structure
375         * @return all atom array
376         */
377        public static final Atom[] getAllAtomArray(Structure s, int model) {
378                List<Atom> atoms = new ArrayList<Atom>();
379
380                AtomIterator iter = new AtomIterator(s,model);
381                while (iter.hasNext()) {
382                        Atom a = iter.next();
383                        atoms.add(a);
384                }
385                return atoms.toArray(new Atom[atoms.size()]);
386
387        }
388
389        /**
390         * Returns and array of all atoms of the chain, including
391         * Hydrogens (if present) and all HETATOMs. Waters are not included.
392         *
393         * @param c
394         *            input chain
395         * @return all atom array
396         */
397        public static final Atom[] getAllAtomArray(Chain c) {
398                List<Atom> atoms = new ArrayList<Atom>();
399
400                for (Group g : c.getAtomGroups()) {
401                        if (g.isWater())
402                                continue;
403                        for (Atom a : g.getAtoms()) {
404                                atoms.add(a);
405                        }
406                }
407                return atoms.toArray(new Atom[atoms.size()]);
408        }
409
410        /**
411         * List of groups from the structure not included in ca (e.g. ligands).
412         *
413         * Unaligned groups are searched from all chains referenced in ca, as well
414         * as any chains in the first model of the structure from ca[0], if any.
415         *
416         * @param ca an array of atoms
417         * @return
418         */
419        public static List<Group> getUnalignedGroups(Atom[] ca) {
420                Set<Chain> chains = new HashSet<Chain>();
421                Set<Group> caGroups = new HashSet<Group>();
422
423                // Create list of all chains in this structure
424                Structure s = null;
425                if (ca.length > 0) {
426                        Group g = ca[0].getGroup();
427                        if (g != null) {
428                                Chain c = g.getChain();
429                                if (c != null) {
430                                        s = c.getStructure();
431                                }
432                        }
433                }
434                if (s != null) {
435                        // Add all chains from the structure
436                        for (Chain c : s.getChains(0)) {
437                                chains.add(c);
438                        }
439                }
440
441                // Add groups and chains from ca
442                for (Atom a : ca) {
443                        Group g = a.getGroup();
444                        if (g != null) {
445                                caGroups.add(g);
446
447                                Chain c = g.getChain();
448                                if (c != null) {
449                                        chains.add(c);
450                                }
451                        }
452                }
453
454                // Iterate through all chains, finding groups not in ca
455                List<Group> unadded = new ArrayList<Group>();
456                for (Chain c : chains) {
457                        for (Group g : c.getAtomGroups()) {
458                                if (!caGroups.contains(g)) {
459                                        unadded.add(g);
460                                }
461                        }
462                }
463                return unadded;
464        }
465
466        /**
467         * Finds all ligand groups from the target which fall within the cutoff distance
468         * of some atom from the query set.
469         *
470         * @param target Set of groups including the ligands
471         * @param query Atom selection
472         * @param cutoff Distance from query atoms to consider, in angstroms
473         * @return All groups from the target with at least one atom within cutoff of a query atom
474         * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF
475         */
476        public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) {
477                // Geometric hashing of the reduced structure
478                Grid grid = new Grid(cutoff);
479                grid.addAtoms(query);
480
481                List<Group> ligands = new ArrayList<>();
482                for(Group g :target ) {
483                        // don't worry about waters
484                        if(g.isWater()) {
485                                continue;
486                        }
487
488                        if(g.isPolymeric() ) {
489                                // Polymers aren't ligands
490                                continue;
491                        }
492
493                        // It is a ligand!
494
495                        // Check that it's within cutoff of something in reduced
496                        List<Atom> groupAtoms = g.getAtoms();
497                        if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) {
498                                continue;
499                        }
500
501                        ligands.add(g);
502                }
503                return ligands;
504        }
505
506        /**
507         * Adds a particular group to a structure. A new chain will be created if necessary.
508         *
509         * <p>When adding multiple groups, pass the return value of one call as the
510         * chainGuess parameter of the next call for efficiency.
511         * <pre>
512         * Chain guess = null;
513         * for(Group g : groups) {
514         *     guess = addGroupToStructure(s, g, guess );
515         * }
516         * </pre>
517         * @param s structure to receive the group
518         * @param g group to add
519         * @param chainGuess (optional) If not null, should be a chain from s. Used
520         *  to improve performance when adding many groups from the same chain
521         * @param clone Indicates whether the input group should be cloned before
522         *  being added to the new chain
523         * @return the chain g was added to
524         */
525        public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) {
526                synchronized(s) {
527                        // Find or create the chain
528                        String chainId = g.getChainId();
529                        assert !chainId.isEmpty();
530                        Chain chain;
531                        if(chainGuess != null && chainGuess.getId() == chainId) {
532                                // previously guessed chain
533                                chain = chainGuess;
534                        } else {
535                                // Try to guess
536                                chain = s.getChain(chainId, model);
537                                if(chain == null) {
538                                        // no chain found
539                                        chain = new ChainImpl();
540                                        chain.setId(chainId);
541
542                                        Chain oldChain = g.getChain();
543                                        chain.setName(oldChain.getName());
544
545                                        EntityInfo oldEntityInfo = oldChain.getEntityInfo();
546
547                                        EntityInfo newEntityInfo;
548                                        if(oldEntityInfo == null) {
549                                                newEntityInfo = new EntityInfo();
550                                                s.addEntityInfo(newEntityInfo);
551                                        } else {
552                                                newEntityInfo = s.getEntityById(oldEntityInfo.getMolId());
553                                                if( newEntityInfo == null ) {
554                                                        newEntityInfo = new EntityInfo(oldEntityInfo);
555                                                        s.addEntityInfo(newEntityInfo);
556                                                }
557                                        }
558                                        newEntityInfo.addChain(chain);
559                                        chain.setEntityInfo(newEntityInfo);
560
561                                        // TODO Do the seqres need to be cloned too? -SB 2016-10-7
562                                        chain.setSeqResGroups(oldChain.getSeqResGroups());
563                                        chain.setSeqMisMatches(oldChain.getSeqMisMatches());
564
565                                        s.addChain(chain,model);
566                                }
567                        }
568
569                        // Add cloned group
570                        if(clone) {
571                                g = (Group)g.clone();
572                        }
573                        chain.addGroup(g);
574
575                        return chain;
576                }
577        }
578
579        /**
580         * Add a list of groups to a new structure. Chains will be automatically
581         * created in the new structure as needed.
582         * @param s structure to receive the group
583         * @param g group to add
584         * @param clone Indicates whether the input groups should be cloned before
585         *  being added to the new chain
586         */
587        public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) {
588                Chain chainGuess = null;
589                for(Group g : groups) {
590                        chainGuess = addGroupToStructure(s, g, model, chainGuess, clone);
591                }
592        }
593
594        /**
595         * Expand a set of atoms into all groups from the same structure.
596         *
597         * If the structure is set, only the first atom is used (assuming all
598         * atoms come from the same original structure).
599         * If the atoms aren't linked to a structure (for instance, for cloned atoms),
600         * searches all chains of all atoms for groups.
601         * @param atoms Sample of atoms
602         * @return All groups from all chains accessible from the input atoms
603         */
604        public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) {
605                return getAllGroupsFromSubset(atoms,null);
606        }
607        /**
608         * Expand a set of atoms into all groups from the same structure.
609         *
610         * If the structure is set, only the first atom is used (assuming all
611         * atoms come from the same original structure).
612         * If the atoms aren't linked to a structure (for instance, for cloned atoms),
613         * searches all chains of all atoms for groups.
614         * @param atoms Sample of atoms
615         * @param types Type of groups to return (useful for getting only ligands, for instance).
616         *  Null gets all groups.
617         * @return All groups from all chains accessible from the input atoms
618         */
619        public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) {
620                // Get the full structure
621                Structure s = null;
622                if (atoms.length > 0) {
623                        Group g = atoms[0].getGroup();
624                        if (g != null) {
625                                Chain c = g.getChain();
626                                if (c != null) {
627                                        s = c.getStructure();
628                                }
629                        }
630                }
631                // Collect all groups from the structure
632                Set<Chain> allChains = new HashSet<>();
633                if( s != null ) {
634                        allChains.addAll(s.getChains());
635                }
636                // In case the structure wasn't set, need to use ca chains too
637                for(Atom a : atoms) {
638                        Group g = a.getGroup();
639                        if(g != null) {
640                                Chain c = g.getChain();
641                                if( c != null ) {
642                                        allChains.add(c);
643                                }
644                        }
645                }
646
647                if(allChains.isEmpty() ) {
648                        return Collections.emptySet();
649                }
650
651                // Extract all ligand groups
652                Set<Group> full = new HashSet<>();
653                for(Chain c : allChains) {
654                        if(types == null) {
655                                full.addAll(c.getAtomGroups());
656                        } else {
657                                full.addAll(c.getAtomGroups(types));
658                        }
659                }
660
661                return full;
662        }
663
664
665        /**
666         * Returns and array of all non-Hydrogen atoms in the given Structure,
667         * optionally including HET atoms or not. Waters are not included.
668         *
669         * @param s
670         * @param hetAtoms
671         *            if true HET atoms are included in array, if false they are not
672         * @return
673         */
674        public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) {
675                AtomIterator iter = new AtomIterator(s);
676                return getAllNonHAtomArray(s, hetAtoms, iter);
677        }
678        /**
679         * Returns and array of all non-Hydrogen atoms in the given Structure,
680         * optionally including HET atoms or not. Waters are not included.
681         *
682         * @param s
683         * @param hetAtoms
684         *            if true HET atoms are included in array, if false they are not
685         * @param modelNr Model number to draw atoms from
686         * @return
687         */
688        public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) {
689                AtomIterator iter = new AtomIterator(s,modelNr);
690                return getAllNonHAtomArray(s, hetAtoms, iter);
691        }
692        private static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) {
693                List<Atom> atoms = new ArrayList<Atom>();
694
695                while (iter.hasNext()) {
696                        Atom a = iter.next();
697                        if (a.getElement() == Element.H)
698                                continue;
699
700                        Group g = a.getGroup();
701
702                        if (g.isWater())
703                                continue;
704
705                        if (!hetAtoms && g.getType().equals(GroupType.HETATM))
706                                continue;
707
708                        atoms.add(a);
709                }
710                return atoms.toArray(new Atom[atoms.size()]);
711        }
712
713        /**
714         * Returns and array of all non-Hydrogen atoms in the given Chain,
715         * optionally including HET atoms or not Waters are not included.
716         *
717         * @param c
718         * @param hetAtoms
719         *            if true HET atoms are included in array, if false they are not
720         * @return
721         */
722        public static final Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) {
723                List<Atom> atoms = new ArrayList<Atom>();
724
725                for (Group g : c.getAtomGroups()) {
726                        if (g.isWater())
727                                continue;
728                        for (Atom a : g.getAtoms()) {
729
730                                if (a.getElement() == Element.H)
731                                        continue;
732
733                                if (!hetAtoms && g.getType().equals(GroupType.HETATM))
734                                        continue;
735
736                                atoms.add(a);
737                        }
738                }
739                return atoms.toArray(new Atom[atoms.size()]);
740        }
741
742        /**
743         * Returns and array of all non-Hydrogen atoms coordinates in the given Chain,
744         * optionally including HET atoms or not Waters are not included.
745         *
746         * @param c
747         * @param hetAtoms
748         *            if true HET atoms are included in array, if false they are not
749         * @return
750         */
751        public static final Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) {
752                List<Point3d> atoms = new ArrayList<Point3d>();
753
754                for (Group g : c.getAtomGroups()) {
755                        if (g.isWater())
756                                continue;
757                        for (Atom a : g.getAtoms()) {
758
759                                if (a.getElement() == Element.H)
760                                        continue;
761
762                                if (!hetAtoms && g.getType().equals(GroupType.HETATM))
763                                        continue;
764
765                                atoms.add(a.getCoordsAsPoint3d());
766                        }
767                }
768                return atoms.toArray(new Point3d[atoms.size()]);
769        }
770
771        /**
772         * Adds to the given atoms list, all atoms of groups that contained all
773         * requested atomNames, i.e. if a group does not contain all of the
774         * requested atom names, its atoms won't be added.
775         *
776         * @param atomNames
777         * @param chains
778         * @param atoms
779         */
780        private static void extractAtoms(String[] atomNames, List<Chain> chains,
781                        List<Atom> atoms) {
782
783                for (Chain c : chains) {
784
785                        for (Group g : c.getAtomGroups()) {
786
787                                // a temp container for the atoms of this group
788                                List<Atom> thisGroupAtoms = new ArrayList<Atom>();
789                                // flag to check if this group contains all the requested atoms.
790                                boolean thisGroupAllAtoms = true;
791                                for (String atomName : atomNames) {
792                                        Atom a = g.getAtom(atomName);
793
794                                        if (a == null) {
795                                                // this group does not have a required atom, skip it...
796                                                thisGroupAllAtoms = false;
797                                                break;
798                                        }
799                                        thisGroupAtoms.add(a);
800                                }
801                                if (thisGroupAllAtoms) {
802                                        // add the atoms of this group to the array.
803                                        for (Atom a : thisGroupAtoms) {
804                                                atoms.add(a);
805                                        }
806                                }
807
808                        }
809                }
810        }
811
812        /**
813         * Returns an array of the requested Atoms from the Chain object. Iterates
814         * over all groups and checks if the requested atoms are in this group, no
815         * matter if this is a AminoAcid or Hetatom group. If the group does not
816         * contain all requested atoms then no atoms are added for that group.
817         *
818         * @param c
819         *            the Chain to get the atoms from
820         *
821         * @param atomNames
822         *            contains the atom names to be used.
823         * @return an Atom[] array
824         */
825        public static final Atom[] getAtomArray(Chain c, String[] atomNames) {
826
827                List<Atom> atoms = new ArrayList<Atom>();
828
829                for (Group g : c.getAtomGroups()) {
830
831                        // a temp container for the atoms of this group
832                        List<Atom> thisGroupAtoms = new ArrayList<Atom>();
833                        // flag to check if this group contains all the requested atoms.
834                        boolean thisGroupAllAtoms = true;
835                        for (String atomName : atomNames) {
836                                Atom a = g.getAtom(atomName);
837                                if (a == null) {
838                                        logger.debug("Group " + g.getResidueNumber() + " ("
839                                                        + g.getPDBName()
840                                                        + ") does not have the required atom '" + atomName
841                                                        + "'");
842                                        // this group does not have a required atom, skip it...
843                                        thisGroupAllAtoms = false;
844                                        break;
845                                }
846                                thisGroupAtoms.add(a);
847                        }
848
849                        if (thisGroupAllAtoms) {
850                                // add the atoms of this group to the array.
851                                for (Atom a : thisGroupAtoms) {
852                                        atoms.add(a);
853                                }
854                        }
855
856                }
857                return atoms.toArray(new Atom[atoms.size()]);
858
859        }
860
861        /**
862         * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and
863         * has CA name will be returned.
864         *
865         * @param c
866         *            the structure object
867         * @return an Atom[] array
868         * @see #getRepresentativeAtomArray(Chain)
869         */
870        public static final Atom[] getAtomCAArray(Chain c) {
871                List<Atom> atoms = new ArrayList<Atom>();
872
873                for (Group g : c.getAtomGroups()) {
874                        if (g.hasAtom(CA_ATOM_NAME)
875                                        && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
876                                atoms.add(g.getAtom(CA_ATOM_NAME));
877                        }
878                }
879
880                return atoms.toArray(new Atom[atoms.size()]);
881        }
882
883        /**
884         * Gets a representative atom for each group that is part of the chain
885         * backbone. Note that modified aminoacids won't be returned as part of the
886         * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the
887         * structure.
888         *
889         * For amino acids, the representative is a CA carbon. For nucleotides, the
890         * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
891         * types will be ignored.
892         *
893         * @param c
894         * @return representative Atoms of the chain backbone
895         * @since Biojava 4.1.0
896         */
897        public static final Atom[] getRepresentativeAtomArray(Chain c) {
898                List<Atom> atoms = new ArrayList<Atom>();
899
900                for (Group g : c.getAtomGroups()) {
901
902                        switch (g.getType()) {
903                        case AMINOACID:
904                                if (g.hasAtom(CA_ATOM_NAME)
905                                                && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
906                                        atoms.add(g.getAtom(CA_ATOM_NAME));
907                                }
908                                break;
909                        case NUCLEOTIDE:
910                                if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) {
911                                        atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE));
912                                }
913                                break;
914                        default:
915                                // don't add
916                        }
917                }
918
919                return atoms.toArray(new Atom[atoms.size()]);
920
921        }
922
923        /**
924         * Provides an equivalent copy of Atoms in a new array. Clones everything,
925         * starting with parent groups and chains. The chain will only contain
926         * groups that are part of the input array.
927         *
928         * @param ca
929         *            array of representative atoms, e.g. CA atoms
930         * @return Atom array
931         * @since Biojava 4.1.0
932         */
933        public static final Atom[] cloneAtomArray(Atom[] ca) {
934                Atom[] newCA = new Atom[ca.length];
935
936                List<Chain> model = new ArrayList<Chain>();
937                int apos = -1;
938                for (Atom a : ca) {
939                        apos++;
940                        Group parentG = a.getGroup();
941                        Chain parentC = parentG.getChain();
942
943                        Chain newChain = null;
944                        for (Chain c : model) {
945                                if (c.getName().equals(parentC.getName())) {
946                                        newChain = c;
947                                        break;
948                                }
949                        }
950                        if (newChain == null) {
951                                newChain = new ChainImpl();
952                                newChain.setId(parentC.getId());
953                                newChain.setName(parentC.getName());
954                                model.add(newChain);
955                        }
956
957                        Group parentN = (Group) parentG.clone();
958
959                        newCA[apos] = parentN.getAtom(a.getName());
960                        try {
961                                // if the group doesn't exist yet, this produces a StructureException
962                                newChain.getGroupByPDB(parentN.getResidueNumber());
963                        } catch (StructureException e) {
964                                // the group doesn't exist yet in the newChain, let's add it
965                                newChain.addGroup(parentN);
966                        }
967
968                }
969                return newCA;
970        }
971
972        /**
973         * Clone a set of representative Atoms, but returns the parent groups
974         *
975         * @param ca
976         *            Atom array
977         * @return Group array
978         */
979        public static Group[] cloneGroups(Atom[] ca) {
980                Group[] newGroup = new Group[ca.length];
981
982                List<Chain> model = new ArrayList<Chain>();
983                int apos = -1;
984                for (Atom a : ca) {
985                        apos++;
986                        Group parentG = a.getGroup();
987                        Chain parentC = parentG.getChain();
988
989                        Chain newChain = null;
990                        for (Chain c : model) {
991                                if (c.getName().equals(parentC.getName())) {
992                                        newChain = c;
993                                        break;
994                                }
995                        }
996                        if (newChain == null) {
997                                newChain = new ChainImpl();
998                                newChain.setName(parentC.getName());
999                                model.add(newChain);
1000                        }
1001
1002                        Group ng = (Group) parentG.clone();
1003                        newGroup[apos] = ng;
1004                        newChain.addGroup(ng);
1005                }
1006                return newGroup;
1007        }
1008
1009        /**
1010         * Utility method for working with circular permutations. Creates a
1011         * duplicated and cloned set of Calpha atoms from the input array.
1012         *
1013         * @param ca2
1014         *            atom array
1015         * @return cloned and duplicated set of input array
1016         */
1017        public static Atom[] duplicateCA2(Atom[] ca2) {
1018                // we don't want to rotate input atoms, do we?
1019                Atom[] ca2clone = new Atom[ca2.length * 2];
1020
1021                int pos = 0;
1022
1023                Chain c = null;
1024                String prevChainId = "";
1025                for (Atom a : ca2) {
1026                        Group g = (Group) a.getGroup().clone(); // works because each group
1027                        // has only a single atom
1028
1029                        if (c == null) {
1030                                c = new ChainImpl();
1031                                Chain orig = a.getGroup().getChain();
1032                                c.setId(orig.getId());
1033                                c.setName(orig.getName());
1034                        } else {
1035                                Chain orig = a.getGroup().getChain();
1036                                if (!orig.getId().equals(prevChainId)) {
1037                                        c = new ChainImpl();
1038                                        c.setId(orig.getId());
1039                                        c.setName(orig.getName());
1040                                }
1041                        }
1042
1043                        c.addGroup(g);
1044                        ca2clone[pos] = g.getAtom(a.getName());
1045
1046                        pos++;
1047                }
1048
1049                // Duplicate ca2!
1050                c = null;
1051                prevChainId = "";
1052                for (Atom a : ca2) {
1053                        Group g = (Group) a.getGroup().clone();
1054
1055                        if (c == null) {
1056                                c = new ChainImpl();
1057                                Chain orig = a.getGroup().getChain();
1058                                c.setId(orig.getId());
1059                                c.setName(orig.getName());
1060                        } else {
1061                                Chain orig = a.getGroup().getChain();
1062                                if (!orig.getId().equals(prevChainId)) {
1063                                        c = new ChainImpl();
1064                                        c.setId(orig.getId());
1065                                        c.setName(orig.getName());
1066                                }
1067                        }
1068
1069                        c.addGroup(g);
1070                        ca2clone[pos] = g.getAtom(a.getName());
1071
1072                        pos++;
1073                }
1074
1075                return ca2clone;
1076
1077        }
1078
1079        /**
1080         * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and
1081         * has CA name will be returned.
1082         *
1083         * @param s
1084         *            the structure object
1085         * @return an Atom[] array
1086         * @see #getRepresentativeAtomArray(Structure)
1087         */
1088        public static Atom[] getAtomCAArray(Structure s) {
1089
1090                List<Atom> atoms = new ArrayList<Atom>();
1091
1092                for (Chain c : s.getChains()) {
1093                        for (Group g : c.getAtomGroups()) {
1094                                if (g.hasAtom(CA_ATOM_NAME)
1095                                                && g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
1096                                        atoms.add(g.getAtom(CA_ATOM_NAME));
1097                                }
1098                        }
1099                }
1100
1101                return atoms.toArray(new Atom[atoms.size()]);
1102        }
1103
1104        /**
1105         * Gets a representative atom for each group that is part of the chain
1106         * backbone. Note that modified aminoacids won't be returned as part of the
1107         * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the
1108         * structure.
1109         *
1110         * For amino acids, the representative is a CA carbon. For nucleotides, the
1111         * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
1112         * types will be ignored.
1113         *
1114         * @param s
1115         *            Input structure
1116         * @return representative Atoms of the structure backbone
1117         * @since Biojava 4.1.0
1118         */
1119        public static Atom[] getRepresentativeAtomArray(Structure s) {
1120
1121                List<Atom> atoms = new ArrayList<Atom>();
1122
1123                for (Chain c : s.getChains()) {
1124                        Atom[] chainAtoms = getRepresentativeAtomArray(c);
1125                        for (Atom a : chainAtoms) {
1126                                atoms.add(a);
1127                        }
1128                }
1129
1130                return atoms.toArray(new Atom[atoms.size()]);
1131        }
1132
1133        /**
1134         * Return an Atom array of the main chain atoms: CA, C, N, O Any group that
1135         * contains those atoms will be included, be it a standard aminoacid or not
1136         *
1137         * @param s
1138         *            the structure object
1139         * @return an Atom[] array
1140         */
1141        public static Atom[] getBackboneAtomArray(Structure s) {
1142
1143                List<Atom> atoms = new ArrayList<Atom>();
1144
1145                for (Chain c : s.getChains()) {
1146                        for (Group g : c.getAtomGroups()) {
1147                                if (g.hasAminoAtoms()) {
1148                                        // this means we will only take atoms grom groups that have
1149                                        // complete backbones
1150                                        for (Atom a : g.getAtoms()) {
1151                                                switch (g.getType()) {
1152                                                case NUCLEOTIDE:
1153                                                        // Nucleotide backbone
1154                                                        if (a.getName().equals(C1_ATOM_NAME))
1155                                                                atoms.add(a);
1156                                                        if (a.getName().equals(C2_ATOM_NAME))
1157                                                                atoms.add(a);
1158                                                        if (a.getName().equals(C3_ATOM_NAME))
1159                                                                atoms.add(a);
1160                                                        if (a.getName().equals(C4_ATOM_NAME))
1161                                                                atoms.add(a);
1162                                                        if (a.getName().equals(O2_ATOM_NAME))
1163                                                                atoms.add(a);
1164                                                        if (a.getName().equals(O3_ATOM_NAME))
1165                                                                atoms.add(a);
1166                                                        if (a.getName().equals(O4_ATOM_NAME))
1167                                                                atoms.add(a);
1168                                                        if (a.getName().equals(O5_ATOM_NAME))
1169                                                                atoms.add(a);
1170                                                        if (a.getName().equals(OP1_ATOM_NAME))
1171                                                                atoms.add(a);
1172                                                        if (a.getName().equals(OP2_ATOM_NAME))
1173                                                                atoms.add(a);
1174                                                        if (a.getName().equals(P_ATOM_NAME))
1175                                                                atoms.add(a);
1176                                                        // TODO Allow C4* names as well as C4'? -SB 3/2015
1177                                                        break;
1178                                                case AMINOACID:
1179                                                default:
1180                                                        // we do it this way instead of with g.getAtom() to
1181                                                        // be sure we always use the same order as original
1182                                                        if (a.getName().equals(CA_ATOM_NAME))
1183                                                                atoms.add(a);
1184                                                        if (a.getName().equals(C_ATOM_NAME))
1185                                                                atoms.add(a);
1186                                                        if (a.getName().equals(N_ATOM_NAME))
1187                                                                atoms.add(a);
1188                                                        if (a.getName().equals(O_ATOM_NAME))
1189                                                                atoms.add(a);
1190                                                        break;
1191                                                }
1192                                        }
1193                                }
1194                        }
1195
1196                }
1197
1198                return atoms.toArray(new Atom[atoms.size()]);
1199        }
1200
1201        /**
1202         * Convert three character amino acid codes into single character e.g.
1203         * convert CYS to C. Valid 3-letter codes will be those of the standard 20
1204         * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids}
1205         * map)
1206         *
1207         * @return the 1 letter code, or null if the given 3 letter code does not
1208         *         correspond to an amino acid code
1209         * @param groupCode3
1210         *            a three character amino acid representation String
1211         * @see {@link #get1LetterCode(String)}
1212         */
1213        public static final Character get1LetterCodeAmino(String groupCode3) {
1214                return aminoAcids.get(groupCode3);
1215        }
1216
1217        /**
1218         * Convert a three letter amino acid or nucleotide code into a single
1219         * character code. If the code does not correspond to an amino acid or
1220         * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}.
1221         *
1222         * Returned null for nucleotides prior to version 4.0.1.
1223         *
1224         * @param groupCode3
1225         *            three letter representation
1226         * @return The 1-letter abbreviation
1227         */
1228        public static final Character get1LetterCode(String groupCode3) {
1229
1230                Character code1;
1231
1232                // is it a standard amino acid ?
1233                code1 = get1LetterCodeAmino(groupCode3);
1234
1235                if (code1 == null) {
1236                        // hm groupCode3 is not standard
1237                        // perhaps it is a nucleotide?
1238                        groupCode3 = groupCode3.trim();
1239                        if (isNucleotide(groupCode3)) {
1240                                code1 = nucleotides30.get(groupCode3);
1241                                if (code1 == null) {
1242                                        code1 = nucleotides23.get(groupCode3);
1243                                }
1244                                if (code1 == null) {
1245                                        code1 = UNKNOWN_GROUP_LABEL;
1246                                }
1247                        } else {
1248                                // does not seem to be so let's assume it is
1249                                // nonstandard aminoacid and label it "X"
1250                                // logger.warning("unknown group name "+groupCode3 );
1251                                code1 = UNKNOWN_GROUP_LABEL;
1252                        }
1253                }
1254
1255                return code1;
1256
1257        }
1258
1259        /**
1260         * Test if the three-letter code of an ATOM entry corresponds to a
1261         * nucleotide or to an aminoacid.
1262         *
1263         * @param groupCode3
1264         *            3-character code for a group.
1265         *
1266         */
1267        public static final boolean isNucleotide(String groupCode3) {
1268                String code = groupCode3.trim();
1269                return nucleotides30.containsKey(code)
1270                                || nucleotides23.containsKey(code);
1271        }
1272
1273        /**
1274         * Reduce a structure to provide a smaller representation . Only takes the
1275         * first model of the structure. If chainName is provided only return a
1276         * structure containing that Chain ID. Converts lower case chain IDs to
1277         * upper case if structure does not contain a chain with that ID.
1278         *
1279         * @param s
1280         * @param chainId
1281         * @return Structure
1282         * @since 3.0
1283         * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
1284         */
1285        @Deprecated
1286        public static final Structure getReducedStructure(Structure s,
1287                        String chainId) throws StructureException {
1288                // since we deal here with structure alignments,
1289                // only use Model 1...
1290
1291                Structure newS = new StructureImpl();
1292                newS.setPDBCode(s.getPDBCode());
1293                newS.setPDBHeader(s.getPDBHeader());
1294                newS.setName(s.getName());
1295                newS.setSSBonds(s.getSSBonds());
1296                newS.setDBRefs(s.getDBRefs());
1297                newS.setSites(s.getSites());
1298                newS.setBiologicalAssembly(s.isBiologicalAssembly());
1299                newS.setEntityInfos(s.getEntityInfos());
1300                newS.setSSBonds(s.getSSBonds());
1301                newS.setSites(s.getSites());
1302
1303                if (chainId != null)
1304                        chainId = chainId.trim();
1305
1306                if (chainId == null || chainId.equals("")) {
1307                        // only get model 0
1308                        List<Chain> model0 = s.getModel(0);
1309                        for (Chain c : model0) {
1310                                newS.addChain(c);
1311                        }
1312                        return newS;
1313
1314                }
1315
1316                Chain c = null;
1317                try {
1318                        c = s.getChainByPDB(chainId);
1319                } catch (StructureException e) {
1320                        logger.warn(e.getMessage() + ". Chain id " + chainId
1321                                        + " did not match, trying upper case Chain id.");
1322                        c = s.getChainByPDB(chainId.toUpperCase());
1323
1324                }
1325                if (c != null) {
1326                        newS.addChain(c);
1327                        for (EntityInfo comp : s.getEntityInfos()) {
1328                                if (comp.getChainIds() != null
1329                                                && comp.getChainIds().contains(c.getChainID())) {
1330                                        // found matching entity info. set description...
1331                                        newS.getPDBHeader().setDescription(
1332                                                        "Chain " + c.getChainID() + " of " + s.getPDBCode()
1333                                                        + " " + comp.getDescription());
1334                                }
1335                        }
1336                }
1337
1338                return newS;
1339        }
1340
1341        public static final String convertAtomsToSeq(Atom[] atoms) {
1342
1343                StringBuilder buf = new StringBuilder();
1344                Group prevGroup = null;
1345                for (Atom a : atoms) {
1346                        Group g = a.getGroup();
1347                        if (prevGroup != null) {
1348                                if (prevGroup.equals(g)) {
1349                                        // we add each group only once.
1350                                        continue;
1351                                }
1352                        }
1353                        String code3 = g.getPDBName();
1354                        Character code1 = get1LetterCodeAmino(code3);
1355                        if (code1 == null)
1356                                code1 = UNKNOWN_GROUP_LABEL;
1357
1358                        buf.append(code1);
1359
1360                        prevGroup = g;
1361
1362                }
1363                return buf.toString();
1364        }
1365
1366        /**
1367         * Get a group represented by a ResidueNumber.
1368         *
1369         * @param struc
1370         *            a {@link Structure}
1371         * @param pdbResNum
1372         *            a {@link ResidueNumber}
1373         * @return a group in the structure that is represented by the pdbResNum.
1374         * @throws StructureException
1375         *             if the group cannot be found.
1376         */
1377        public static final Group getGroupByPDBResidueNumber(Structure struc,
1378                        ResidueNumber pdbResNum) throws StructureException {
1379                if (struc == null || pdbResNum == null) {
1380                        throw new IllegalArgumentException("Null argument(s).");
1381                }
1382
1383                Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName());
1384
1385                return chain.getGroupByPDB(pdbResNum);
1386        }
1387
1388        /**
1389         * Returns the set of intra-chain contacts for the given chain for given
1390         * atom names, i.e. the contact map. Uses a geometric hashing algorithm that
1391         * speeds up the calculation without need of full distance matrix. The
1392         * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs
1393         * to be set to true for this to work.
1394         *
1395         * @param chain
1396         * @param atomNames
1397         *            the array with atom names to be used. Beware: CA will do both
1398         *            C-alphas an Calciums if null all non-H atoms of non-hetatoms
1399         *            will be used
1400         * @param cutoff
1401         * @return
1402         */
1403        public static AtomContactSet getAtomsInContact(Chain chain,
1404                        String[] atomNames, double cutoff) {
1405                Grid grid = new Grid(cutoff);
1406
1407                Atom[] atoms = null;
1408                if (atomNames == null) {
1409                        atoms = getAllNonHAtomArray(chain, false);
1410                } else {
1411                        atoms = getAtomArray(chain, atomNames);
1412                }
1413                // If tha
1414                if(atoms.length==0){
1415                        logger.warn("No atoms found for buidling grid!");
1416                        return new AtomContactSet(cutoff);
1417                }
1418                grid.addAtoms(atoms);
1419
1420                return grid.getAtomContacts();
1421        }
1422
1423        /**
1424         * Returns the set of intra-chain contacts for the given chain for all non-H
1425         * atoms of non-hetatoms, i.e. the contact map. Uses a geometric hashing
1426         * algorithm that speeds up the calculation without need of full distance
1427         * matrix. The parsing mode
1428         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1429         * true for this to work.
1430         *
1431         * @param chain
1432         * @param cutoff
1433         * @return
1434         */
1435        public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
1436                return getAtomsInContact(chain, (String[]) null, cutoff);
1437        }
1438
1439        /**
1440         * Returns the set of intra-chain contacts for the given chain for C-alpha
1441         * atoms (including non-standard aminoacids appearing as HETATM groups),
1442         * i.e. the contact map. Uses a geometric hashing algorithm that speeds up
1443         * the calculation without need of full distance matrix. The parsing mode
1444         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1445         * true for this to work.
1446         *
1447         * @param chain
1448         * @param cutoff
1449         * @return
1450         * @see {@link #getRepresentativeAtomsInContact(Chain, double)}
1451         */
1452        public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
1453                Grid grid = new Grid(cutoff);
1454
1455                Atom[] atoms = getAtomCAArray(chain);
1456
1457                grid.addAtoms(atoms);
1458
1459                return grid.getAtomContacts();
1460        }
1461
1462        /**
1463         * Returns the set of intra-chain contacts for the given chain for C-alpha
1464         * or C3' atoms (including non-standard aminoacids appearing as HETATM
1465         * groups), i.e. the contact map. Uses a geometric hashing algorithm that
1466         * speeds up the calculation without need of full distance matrix.
1467         *
1468         * @param chain
1469         * @param cutoff
1470         * @return
1471         * @since Biojava 4.1.0
1472         */
1473        public static AtomContactSet getRepresentativeAtomsInContact(Chain chain,
1474                        double cutoff) {
1475                Grid grid = new Grid(cutoff);
1476
1477                Atom[] atoms = getRepresentativeAtomArray(chain);
1478
1479                grid.addAtoms(atoms);
1480
1481                return grid.getAtomContacts();
1482        }
1483
1484        /**
1485         * Returns the set of inter-chain contacts between the two given chains for
1486         * the given atom names. Uses a geometric hashing algorithm that speeds up
1487         * the calculation without need of full distance matrix. The parsing mode
1488         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1489         * true for this to work.
1490         *
1491         * @param chain1
1492         * @param chain2
1493         * @param atomNames
1494         *            the array with atom names to be used. For Calphas use {"CA"},
1495         *            if null all non-H atoms will be used. Note HET atoms are
1496         *            ignored unless this parameter is null.
1497         * @param cutoff
1498         * @param hetAtoms
1499         *            if true HET atoms are included, if false they are not
1500         * @return
1501         */
1502        public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
1503                        String[] atomNames, double cutoff, boolean hetAtoms) {
1504                Grid grid = new Grid(cutoff);
1505                Atom[] atoms1 = null;
1506                Atom[] atoms2 = null;
1507                if (atomNames == null) {
1508                        atoms1 = getAllNonHAtomArray(chain1, hetAtoms);
1509                        atoms2 = getAllNonHAtomArray(chain2, hetAtoms);
1510                } else {
1511                        atoms1 = getAtomArray(chain1, atomNames);
1512                        atoms2 = getAtomArray(chain2, atomNames);
1513                }
1514                grid.addAtoms(atoms1, atoms2);
1515
1516                return grid.getAtomContacts();
1517        }
1518
1519        /**
1520         * Returns the set of inter-chain contacts between the two given chains for
1521         * all non-H atoms. Uses a geometric hashing algorithm that speeds up the
1522         * calculation without need of full distance matrix. The parsing mode
1523         * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
1524         * true for this to work.
1525         *
1526         * @param chain1
1527         * @param chain2
1528         * @param cutoff
1529         * @param hetAtoms
1530         *            if true HET atoms are included, if false they are not
1531         * @return
1532         */
1533        public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
1534                        double cutoff, boolean hetAtoms) {
1535                return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms);
1536        }
1537
1538        /**
1539         * Finds Groups in {@code structure} that contain at least one Atom that is
1540         * within {@code radius} Angstroms of {@code centroid}.
1541         *
1542         * @param structure
1543         *            The structure from which to find Groups
1544         * @param centroid
1545         *            The centroid of the shell
1546         * @param excludeResidues
1547         *            A list of ResidueNumbers to exclude
1548         * @param radius
1549         *            The radius from {@code centroid}, in Angstroms
1550         * @param includeWater
1551         *            Whether to include Groups whose <em>only</em> atoms are water
1552         * @param useAverageDistance
1553         *            When set to true, distances are the arithmetic mean (1-norm)
1554         *            of the distances of atoms that belong to the group and that
1555         *            are within the shell; otherwise, distances are the minimum of
1556         *            these values
1557         * @return A map of Groups within (or partially within) the shell, to their
1558         *         distances in Angstroms
1559         */
1560        public static Map<Group, Double> getGroupDistancesWithinShell(
1561                        Structure structure, Atom centroid,
1562                        Set<ResidueNumber> excludeResidues, double radius,
1563                        boolean includeWater, boolean useAverageDistance) {
1564
1565                // for speed, we avoid calculating square roots
1566                radius = radius * radius;
1567
1568                Map<Group, Double> distances = new HashMap<Group, Double>();
1569
1570                // we only need this if we're averaging distances
1571                // note that we can't use group.getAtoms().size() because some the
1572                // group's atoms be outside the shell
1573                Map<Group, Integer> atomCounts = new HashMap<Group, Integer>();
1574
1575                for (Chain chain : structure.getChains()) {
1576                        groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
1577
1578                                // exclude water
1579                                if (!includeWater && chainGroup.isWater())
1580                                        continue;
1581
1582                                // check blacklist of residue numbers
1583                                for (ResidueNumber rn : excludeResidues) {
1584                                        if (rn.equals(chainGroup.getResidueNumber()))
1585                                                continue groupLoop;
1586                                }
1587
1588                                for (Atom testAtom : chainGroup.getAtoms()) {
1589
1590                                        // use getDistanceFast as we are doing a lot of comparisons
1591                                        double dist = Calc.getDistanceFast(centroid, testAtom);
1592
1593                                        // if we're the shell
1594                                        if (dist <= radius) {
1595                                                if (!distances.containsKey(chainGroup))
1596                                                        distances.put(chainGroup, Double.POSITIVE_INFINITY);
1597                                                if (useAverageDistance) {
1598                                                        // sum the distance; we'll divide by the total
1599                                                        // number later
1600                                                        // here, we CANNOT use fastDistance (distance
1601                                                        // squared) because we want the arithmetic mean
1602                                                        distances.put(chainGroup, distances.get(chainGroup)
1603                                                                        + Math.sqrt(dist));
1604                                                        if (!atomCounts.containsKey(chainGroup))
1605                                                                atomCounts.put(chainGroup, 0);
1606                                                        atomCounts.put(chainGroup,
1607                                                                        atomCounts.get(chainGroup) + 1);
1608                                                } else {
1609                                                        // take the minimum distance among all atoms of
1610                                                        // chainGroup
1611                                                        // note that we can't break here because we might
1612                                                        // find a smaller distance
1613                                                        if (dist < distances.get(chainGroup)) {
1614                                                                distances.put(chainGroup, dist);
1615                                                        }
1616                                                }
1617                                        }
1618
1619                                }
1620                        }
1621                }
1622
1623                if (useAverageDistance) {
1624                        for (Map.Entry<Group, Double> entry : distances.entrySet()) {
1625                                int count = atomCounts.get(entry.getKey());
1626                                distances.put(entry.getKey(), entry.getValue() / count);
1627                        }
1628                } else {
1629                        // in this case we used getDistanceFast
1630                        for (Map.Entry<Group, Double> entry : distances.entrySet()) {
1631                                distances.put(entry.getKey(), Math.sqrt(entry.getValue()));
1632                        }
1633                }
1634
1635                return distances;
1636
1637        }
1638
1639        public static Set<Group> getGroupsWithinShell(Structure structure,
1640                        Atom atom, Set<ResidueNumber> excludeResidues, double distance,
1641                        boolean includeWater) {
1642
1643                // square the distance to use as a comparison against getDistanceFast
1644                // which returns the square of a distance.
1645                distance = distance * distance;
1646
1647                Set<Group> returnSet = new LinkedHashSet<Group>();
1648                for (Chain chain : structure.getChains()) {
1649                        groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
1650                                if (!includeWater && chainGroup.isWater())
1651                                        continue;
1652                                for (ResidueNumber rn : excludeResidues) {
1653                                        if (rn.equals(chainGroup.getResidueNumber()))
1654                                                continue groupLoop;
1655                                }
1656                                for (Atom atomB : chainGroup.getAtoms()) {
1657
1658                                        // use getDistanceFast as we are doing a lot of comparisons
1659                                        double dist = Calc.getDistanceFast(atom, atomB);
1660                                        if (dist <= distance) {
1661                                                returnSet.add(chainGroup);
1662                                                break;
1663                                        }
1664
1665                                }
1666                        }
1667                }
1668                return returnSet;
1669        }
1670
1671        /**
1672         * <p>
1673         * Returns a Set of Groups in a structure within the distance specified of a
1674         * given group.
1675         * </p>
1676         * <p>
1677         * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of
1678         * Groups returned
1679         *
1680         * @param structure
1681         *            The structure to work with
1682         * @param group
1683         *            The 'query' group
1684         * @param distance
1685         *            The cutoff distance
1686         * @param includeWater
1687         *            Should water residues be included in the output?
1688         * @return {@link LinkedHashSet} of {@link Group}s within at least one atom
1689         *         with {@code distance} of at least one atom in {@code group}
1690         */
1691        public static Set<Group> getGroupsWithinShell(Structure structure,
1692                        Group group, double distance, boolean includeWater) {
1693
1694                Set<Group> returnList = new LinkedHashSet<Group>();
1695
1696                Set<ResidueNumber> excludeGroups = new HashSet<ResidueNumber>();
1697                excludeGroups.add(group.getResidueNumber());
1698                for (Atom atom : group.getAtoms()) {
1699                        Set<Group> set = getGroupsWithinShell(structure, atom,
1700                                        excludeGroups, distance, includeWater);
1701                        returnList.addAll(set);
1702                }
1703
1704                return returnList;
1705        }
1706
1707        /**
1708         * Remove all models from a Structure and keep only the first
1709         *
1710         * @param s
1711         *            original Structure
1712         * @return a structure that contains only the first model
1713         * @since 3.0.5
1714         */
1715        public static Structure removeModels(Structure s) {
1716                if (s.nrModels() == 1)
1717                        return s;
1718
1719                Structure n = new StructureImpl();
1720                // go through whole substructure and clone ...
1721
1722                // copy structure data
1723
1724                n.setPDBCode(s.getPDBCode());
1725                n.setName(s.getName());
1726
1727                // TODO: do deep copying of data!
1728                n.setPDBHeader(s.getPDBHeader());
1729                n.setDBRefs(s.getDBRefs());
1730
1731                n.setSites(s.getSites());
1732
1733                n.setChains(s.getModel(0));
1734
1735                return n;
1736
1737        }
1738
1739        /**
1740         * Removes all polymeric and solvent groups from a list of groups
1741         *
1742         */
1743        public static List<Group> filterLigands(List<Group> allGroups) {
1744
1745                List<Group> groups = new ArrayList<Group>();
1746                for (Group g : allGroups) {
1747
1748                        if ( g.isPolymeric())
1749                                continue;
1750
1751                        if (!g.isWater()) {
1752                                groups.add(g);
1753                        }
1754                }
1755
1756                return groups;
1757        }
1758
1759        /**
1760         * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)}
1761         * which creates new parsers when needed
1762         *
1763         * @param name
1764         * @return
1765         * @throws IOException
1766         * @throws StructureException
1767         */
1768        public static Structure getStructure(String name) throws IOException,
1769        StructureException {
1770                return StructureTools.getStructure(name, null, null);
1771        }
1772
1773        /**
1774         * Flexibly get a structure from an input String. The intent of this method
1775         * is to allow any reasonable string which could refer to a structure to be
1776         * correctly parsed. The following are currently supported:
1777         * <ol>
1778         * <li>Filename (if name refers to an existing file)
1779         * <li>PDB ID
1780         * <li>SCOP domains
1781         * <li>PDP domains
1782         * <li>Residue ranges
1783         * <li>Other formats supported by AtomCache
1784         * </ol>
1785         *
1786         * @param name
1787         *            Some reference to the protein structure
1788         * @param parser
1789         *            A clean PDBFileParser to use if it is a file. If null, a
1790         *            PDBFileParser will be instantiated if needed.
1791         * @param cache
1792         *            An AtomCache to use if the structure can be fetched from the
1793         *            PDB. If null, a AtomCache will be instantiated if needed.
1794         * @return A Structure object
1795         * @throws IOException
1796         *             if name is an existing file, but doesn't parse correctly
1797         * @throws StructureException
1798         *             if the format is unknown, or if AtomCache throws an
1799         *             exception.
1800         */
1801        public static Structure getStructure(String name, PDBFileParser parser,
1802                        AtomCache cache) throws IOException, StructureException {
1803                File f = new File(FileDownloadUtils.expandUserHome(name));
1804                if (f.exists()) {
1805                        if (parser == null) {
1806                                parser = new PDBFileParser();
1807                        }
1808                        InputStream inStream = new FileInputStream(f);
1809                        return parser.parsePDBFile(inStream);
1810                } else {
1811                        if (cache == null) {
1812                                cache = new AtomCache();
1813                        }
1814                        return cache.getStructure(name);
1815                }
1816        }
1817
1818        /**
1819         * @deprecated  use {@link Chain#isProtein()} instead.
1820         */
1821        @Deprecated
1822        public static boolean isProtein(Chain c) {
1823
1824                return c.isProtein();
1825        }
1826
1827        /**
1828         * @deprecated use {@link Chain#isNucleicAcid()} instead.
1829         */
1830        @Deprecated
1831        public static boolean isNucleicAcid(Chain c) {
1832                return c.isNucleicAcid();
1833        }
1834
1835        /**
1836         * @deprecated use {@link Chain#getPredominantGroupType()} instead.
1837         */
1838        @Deprecated
1839        public static GroupType getPredominantGroupType(Chain c) {
1840                return c.getPredominantGroupType();
1841        }
1842
1843        /**
1844         * @deprecated use {@link Chain#isWaterOnly()} instead.
1845         */
1846        @Deprecated
1847        public static boolean isChainWaterOnly(Chain c) {
1848                return c.isWaterOnly();
1849        }
1850
1851        /**
1852         * @deprecated  use {@link Chain#isPureNonPolymer()} instead.
1853         */
1854        @Deprecated
1855        public static boolean isChainPureNonPolymer(Chain c) {
1856
1857                return c.isPureNonPolymer();
1858        }
1859
1860        /**
1861         * Cleans up the structure's alternate location (altloc) groups. All alternate location groups should have all atoms (except
1862         * in the case of microheterogenity) or when a deuterium exists.
1863         * Ensure that all the alt loc groups have all the atoms in the main group.
1864         * @param structure The Structure to be cleaned up
1865         */
1866        public static void cleanUpAltLocs(Structure structure) {
1867                for (int i =0; i< structure.nrModels() ; i++){
1868                        for (Chain chain : structure.getModel(i)) {
1869                                for (Group group : chain.getAtomGroups()) {
1870                                        for (Group altLocGroup : group.getAltLocs()) {
1871                                                for ( Atom groupAtom : group.getAtoms()) {
1872                                                        // If this alt loc doesn't have this atom
1873                                                        if (! altLocGroup.hasAtom(groupAtom.getName())) {
1874                                                                // Fix for microheterogenity
1875                                                                if (altLocGroup.getPDBName().equals(group.getPDBName())) {
1876                                                                        // If it's a Hydrogen then we check for it's Deuterated brother
1877                                                                        if(!hasDeuteratedEquiv(groupAtom, altLocGroup)){
1878                                                                                altLocGroup.addAtom(groupAtom);
1879                                                                        }
1880                                                                }
1881                                                        }
1882                                                }
1883                                        }
1884                                }
1885                        }
1886                }
1887        }
1888
1889        /**
1890         * Check to see if an Deuterated atom has a non deuterated brother in the group.
1891         * @param atom the input atom that is putatively deuterium
1892         * @param currentGroup the group the atom is in
1893         * @return true if the atom is deuterated and it's hydrogen equive exists.
1894         */
1895        public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) {
1896                if(atom.getElement()==Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'D', 'H'))) {
1897                        // If it's deuterated and has a non-deuterated brother
1898                        return true;
1899                }
1900                return false;
1901        }
1902
1903        /**
1904         * Check to see if a Hydrogen has a  Deuterated brother in the group.
1905         * @param atom the input atom that is putatively hydorgen
1906         * @param currentGroup the group the atom is in
1907         * @return true if the atom is hydrogen and it's Deuterium equiv exists.
1908         */
1909        public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) {
1910                if(atom.getElement()==Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'H', 'D'))) {
1911                        // If it's hydrogen and has a deuterated brother
1912                        return true;
1913                }
1914                return false;
1915        }
1916
1917        private static String replaceFirstChar(String name, char c, char d) {
1918                if(name.charAt(0)==c){
1919                        return name.replaceFirst(String.valueOf(c), String.valueOf(d));
1920                }
1921                return name;
1922        }
1923
1924}