001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 12.03.2004
021 * @author Andreas Prlic
022 *
023 */
024package org.biojava.nbio.structure;
025
026
027import org.biojava.nbio.structure.io.FileConvert;
028import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
029import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
030import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
031import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
032import org.biojava.nbio.core.sequence.ProteinSequence;
033import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
034import org.biojava.nbio.core.sequence.template.Sequence;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import java.util.*;
039
040
041/**
042 * A Chain in a PDB file. It contains several groups which can be of
043 * one of the types defined in the {@link GroupType} constants.
044 *
045 * @author Andreas Prlic
046 * @author Jules Jacobsen
047 * @since 1.4
048 */
049public class ChainImpl implements Chain {
050
051        private final static Logger logger = LoggerFactory.getLogger(ChainImpl.class);
052
053        private static final long serialVersionUID = 1990171805277911840L;
054
055        /**
056         * The default chain identifier used to be an empty space
057         */
058        private static final String DEFAULT_CHAIN_ID = "A";
059
060        private String swissprot_id ;
061        private String authId; // the 'public' chain identifier as assigned by authors in PDB files
062
063        private List <Group> groups;
064        private List<Group> seqResGroups;
065
066        private EntityInfo entity;
067        private Structure parent;
068
069        private Map<String, Integer> pdbResnumMap;
070        private String asymId; // the 'internal' chain identifier as used in mmCIF files
071
072
073        private List<SeqMisMatch> seqMisMatches = null;
074        /**
075         *  Constructs a ChainImpl object.
076         */
077        public ChainImpl() {
078                super();
079
080                authId = DEFAULT_CHAIN_ID;
081                groups = new ArrayList<>() ;
082
083                seqResGroups = new ArrayList<>();
084                pdbResnumMap = new HashMap<>();
085                asymId = null;
086
087        }
088
089        /** {@inheritDoc}
090         *
091         */
092        @Override
093        public String getId() {
094                return asymId;
095        }
096
097        /** {@inheritDoc}
098         *
099         */
100        @Override
101        public void setId(String asymId) {
102                this.asymId = asymId;
103        }
104
105        /** {@inheritDoc}
106         *
107         */
108        @Override
109        public String getName() { return authId; }
110
111        /** {@inheritDoc}
112         *
113         */
114        @Override
115        public void setName(String authId) { this.authId = authId; }
116
117        /** {@inheritDoc}
118         *
119         */
120        @Override
121        @Deprecated
122        public void setParent(Structure parent) {
123                setStructure(parent);
124        }
125
126        /** {@inheritDoc}
127         *
128         */
129        @Override
130        public void setStructure(Structure parent){
131                this.parent = parent;
132        }
133
134        /** Returns the parent Structure of this chain.
135         *
136         * @return the parent Structure object
137         */
138        @Override
139        public Structure getStructure() {
140
141                return parent;
142        }
143
144
145        /** Returns the parent Structure of this chain.
146         *
147         * @return the parent Structure object
148         * @deprecated  use getStructure instead.
149         */
150        @Override
151        @Deprecated
152        public Structure getParent() {
153
154                return getStructure();
155        }
156
157        /** Returns an identical copy of this Chain .
158         * @return an identical copy of this Chain
159         */
160        @Override
161        public Object clone() {
162                // go through all groups and add to new Chain.
163                ChainImpl n = new ChainImpl();
164                // copy chain data:
165
166                n.setId(getId());
167                n.setName(getName());
168                n.setSwissprotId ( getSwissprotId());
169
170                // NOTE the EntityInfo will be reset at the parent level (Structure) if cloning is happening from parent level
171                // here we don't deep-copy it and just keep the same reference, in case the cloning is happening at the Chain level only
172                n.setEntityInfo(this.entity);
173
174
175                for (Group group : groups) {
176                        Group g = (Group) group.clone();
177                        n.addGroup(g);
178                        g.setChain(n);
179                }
180
181                if (seqResGroups!=null){
182
183                        List<Group> tmpSeqRes = new ArrayList<>();
184
185                        // cloning seqres and atom groups is ugly, due to their
186                        // nested relationship (some of the atoms can be in the seqres, but not all)
187
188                        for (Group seqResGroup : seqResGroups) {
189
190                                if (seqResGroup==null) {
191                                        tmpSeqRes.add(null);
192                                        continue;
193                                }
194
195                                int i = groups.indexOf(seqResGroup);
196
197                                Group g ;
198
199                                if (i!=-1) {
200                                        // group found in atom groups, we get the equivalent reference from the newly cloned atom groups
201                                        g = n.getAtomGroup(i);
202                                } else {
203                                        // group not found in atom groups, we clone the seqres group
204                                        g = (Group) seqResGroup.clone();
205                                }
206                                g.setChain(n);
207                                tmpSeqRes.add(g);
208                        }
209
210                        n.setSeqResGroups(tmpSeqRes);
211                }
212
213                return n ;
214        }
215
216        /** {@inheritDoc}
217         *
218         */
219        @Override
220        public void setEntityInfo(EntityInfo mol) {
221                this.entity = mol;
222        }
223
224        /** {@inheritDoc}
225         *
226         */
227        @Override
228        public EntityInfo getEntityInfo() {
229                return this.entity;
230        }
231
232        /** set the Swissprot id of this chains .
233         * @param sp_id  a String specifying the swissprot id value
234         * @see #getSwissprotId
235         */
236        @Override
237        public void setSwissprotId(String sp_id){
238                swissprot_id = sp_id ;
239        }
240
241        /** get the Swissprot id of this chains .
242         * @return a String representing the swissprot id value
243         * @see #setSwissprotId
244         */
245        @Override
246        public String getSwissprotId() {
247                return swissprot_id ;
248        }
249
250        /** {@inheritDoc}
251         *
252         */
253        @Override
254        public void addGroup(Group group) {
255
256                group.setChain(this);
257
258                // Set the altlocs chain as well
259                for(Group g : group.getAltLocs()) {
260                        g.setChain(this);
261                }
262
263                groups.add(group);
264
265                // store the position internally for quick access of this group
266
267                String pdbResnum = null ;
268                ResidueNumber resNum = group.getResidueNumber();
269                if ( resNum != null)
270                        pdbResnum = resNum.toString();
271                if ( pdbResnum != null) {
272                        Integer pos = groups.size() - 1;
273                        // ARGH sometimes numbering in PDB files is confusing.
274                        // e.g. PDB: 1sfe
275                        /*
276                         * ATOM    620  N   GLY    93     -24.320  -6.591   4.210  1.00 46.82           N
277                         * ATOM    621  CA  GLY    93     -24.960  -6.849   5.497  1.00 47.35           C
278                         * ATOM    622  C   GLY    93     -26.076  -5.873   5.804  1.00 47.24           C
279                         * ATOM    623  O   GLY    93     -26.382  -4.986   5.006  1.00 47.56           O
280                         *    and ...
281                         * HETATM 1348  O   HOH    92     -21.853 -16.886  19.138  1.00 66.92           O
282                         * HETATM 1349  O   HOH    93     -26.126   1.226  29.069  1.00 71.69           O
283                         * HETATM 1350  O   HOH    94     -22.250 -18.060  -6.401  1.00 61.97           O
284                         */
285
286                        // this check is to give in this case the entry priority that is an AminoAcid / comes first...
287                        // a good example of same residue number for 2 residues is 3th3, chain T, residue 201 (a LYS and a sugar BGC covalently attached to it) - JD 2016-03-09
288                        if (  pdbResnumMap.containsKey(pdbResnum)) {
289
290                                logger.warn("Adding residue {}({}) to chain {} but a residue with same residue number is already present: {}({}). Will add only the aminoacid residue (if any) to the lookup, lookups for that residue number won't work properly.",
291                                                pdbResnum, group.getPDBName(), getChainID(), groups.get(pdbResnumMap.get(pdbResnum)).getResidueNumber(), groups.get(pdbResnumMap.get(pdbResnum)).getPDBName());
292                                if ( group instanceof AminoAcid)
293                                        pdbResnumMap.put(pdbResnum,pos);
294                        } else
295                                pdbResnumMap.put(pdbResnum,pos);
296                }
297
298        }
299
300
301        /**
302         * {@inheritDoc}
303         */
304        @Override
305        public Group getAtomGroup(int position) {
306
307                return groups.get(position);
308        }
309
310        /**
311         * {@inheritDoc}
312         */
313        @Override
314        public List<Group> getAtomGroups(GroupType type){
315
316                List<Group> tmp = new ArrayList<>() ;
317                for (Group g : groups) {
318                        if (g.getType().equals(type)) {
319                                tmp.add(g);
320                        }
321                }
322
323                return tmp ;
324        }
325
326
327        /** {@inheritDoc}
328         *
329         */
330        @Override
331        public List<Group> getAtomGroups(){
332                return groups ;
333        }
334
335        /** {@inheritDoc}
336         *
337         */
338        @Override
339        public void setAtomGroups(List<Group> groups){
340                for (Group g:groups){
341                        g.setChain(this);
342                }
343                this.groups = groups;
344        }
345
346        @Override
347        public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end, boolean ignoreMissing)
348                        throws StructureException {
349                // Short-circut for include all groups
350                if(start == null && end == null) {
351                        return groups.toArray(new Group[groups.size()]);
352                }
353
354
355                List<Group> retlst = new ArrayList<>();
356
357                boolean adding, foundStart;
358                if( start == null ) {
359                        // start with first group
360                        adding = true;
361                        foundStart = true;
362                } else {
363                        adding = false;
364                        foundStart = false;
365                }
366
367                
368                for (Group g: groups){
369
370                        // Check for start
371                        if (!adding && start.equalsPositional(g.getResidueNumber())) {
372                                adding = true;
373                                foundStart = true;
374                        }
375
376                        // Check if past start
377                        if ( ignoreMissing && ! (foundStart && adding) ) {
378                                ResidueNumber pos = g.getResidueNumber();
379
380                                if ( start != null && start.compareToPositional(pos) <= 0) {
381                                        foundStart = true;
382                                        adding = true;
383                                }
384                        }
385
386                        if ( adding)
387                                retlst.add(g);
388
389                        // check for end
390                        if ( end != null && end.equalsPositional(g.getResidueNumber())) {
391                                if ( ! adding)
392                                        throw new StructureException("did not find start PDB residue number " + start + " in chain " + authId);
393                                adding = false;
394                                break;
395                        }
396                        // check if past end
397                        if ( ignoreMissing && adding && end != null){
398
399                                ResidueNumber pos = g.getResidueNumber();
400                                if ( end.compareToPositional(pos) <= 0) {
401                                        adding = false;
402                                        break;
403                                }
404
405                        }
406                }
407
408                if ( ! foundStart){
409                        throw new StructureException("did not find start PDB residue number " + start + " in chain " + authId);
410                }
411                if ( end != null && adding && !ignoreMissing) {
412                        throw new StructureException("did not find end PDB residue number " + end + " in chain " + authId);
413                }
414
415
416                //not checking if the end has been found in this case...
417
418                return retlst.toArray(new Group[retlst.size()] );
419        }
420
421
422        /**
423         * {@inheritDoc}
424         *
425         */
426        @Override
427        public Group getGroupByPDB(ResidueNumber resNum) throws StructureException {
428                String pdbresnum = resNum.toString();
429                if ( pdbResnumMap.containsKey(pdbresnum)) {
430                        Integer pos = pdbResnumMap.get(pdbresnum);
431                        return groups.get(pos);
432                } else {
433                        throw new StructureException("unknown PDB residue number " + pdbresnum + " in chain " + authId);
434                }
435        }
436
437        /**
438         * {@inheritDoc}
439         *
440         */
441        @Override
442        public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end)
443                        throws StructureException {
444                return getGroupsByPDB(start, end, false);
445        }
446
447
448
449        /**
450         * {@inheritDoc}
451         */
452        @Override
453        public int getSeqResLength() {
454                //new method returns the length of the sequence defined in the SEQRES records
455                return seqResGroups.size();
456        }
457
458        /**
459         * {@inheritDoc}
460         */
461        @Override
462        public void   setChainID(String asymId) { this.asymId = asymId;   }
463
464
465        /**
466         * {@inheritDoc}
467         */
468        @Override
469        public String getChainID()           {  return this.asymId;  }
470
471
472
473        /** String representation.
474         * @return String representation of the Chain
475         */
476        @Override
477        public String toString(){
478                String newline = System.getProperty("line.separator");
479                StringBuilder str = new StringBuilder();
480                str.append("Chain asymId:").append(getChainID()).append(" authId:").append(getName()).append(newline);
481                if ( entity != null ){
482                        if ( entity.getDescription() != null){
483                                str.append(entity.getDescription()).append(newline);
484                        }
485                }
486                str.append("total SEQRES length: ").append(getSeqResGroups().size()).append(" total ATOM length:")
487                .append(getAtomLength()).append(" residues ").append(newline);
488
489                return str.toString() ;
490
491        }
492
493        /**
494         * {@inheritDoc}
495         */
496        @Override
497        public Sequence<?> getBJSequence()  {
498
499                String seq = getSeqResSequence();
500
501                Sequence<AminoAcidCompound> s = null;
502
503                try {
504                        s = new ProteinSequence(seq);
505                } catch (CompoundNotFoundException e) {
506                        logger.error("Could not create sequence object from seqres sequence. Some unknown compound: {}",e.getMessage());
507                }
508
509                //TODO: return a DNA sequence if the content is DNA...
510                return s;
511
512        }
513
514        /**
515         * {@inheritDoc}
516         */
517        @Override
518        public String getAtomSequence(){
519
520
521                List<Group> groups = getAtomGroups();
522                StringBuilder sequence = new StringBuilder() ;
523
524                for ( Group g: groups){
525                        ChemComp cc = g.getChemComp();
526
527                        if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) ||
528                                        PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){
529                                // an amino acid residue.. use for alignment
530                                String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc);
531                                if ( oneLetter == null)
532                                        oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL);
533                                sequence.append(oneLetter);
534                        }
535
536                }
537                return sequence.toString();
538
539
540        }
541
542        /**
543         * {@inheritDoc}
544         */
545        @Override
546        public String getSeqResSequence(){
547
548                StringBuilder str = new StringBuilder();
549                for (Group g : seqResGroups) {
550                        ChemComp cc = g.getChemComp();
551                        if ( cc == null) {
552                                logger.warn("Could not load ChemComp for group: ", g);
553                                str.append(StructureTools.UNKNOWN_GROUP_LABEL);
554                        } else if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) ||
555                                        PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){
556                                // an amino acid residue.. use for alignment
557                                String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc);
558                                // AB oneLetter.length() should be one. e.g. in 1EMA it is 3 and this makes mapping residue to sequence impossible.
559                                if ( oneLetter == null || oneLetter.isEmpty() || oneLetter.equals("?")) {
560                                        oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL);
561                                }
562                                str.append(oneLetter);
563                        } else {
564                                str.append(StructureTools.UNKNOWN_GROUP_LABEL);
565                        }
566                }
567                return str.toString();
568        }
569        
570        /**
571         * Get the one letter sequence so that Sequence is guaranteed to
572         * be the same length as seqResGroups.
573         * Method related to https://github.com/biojava/biojava/issues/457
574         * @return a string of the sequence guaranteed to be the same length
575         * as seqResGroups.
576         */
577        public String getSeqResOneLetterSeq(){
578
579                StringBuilder str = new StringBuilder();
580                for (Group g : seqResGroups) {
581                        ChemComp cc = g.getChemComp();
582                        if ( cc == null) {
583                                logger.warn("Could not load ChemComp for group: ", g);
584                                str.append(StructureTools.UNKNOWN_GROUP_LABEL);
585                        } else if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) ||
586                                        PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){
587                                // an amino acid residue.. use for alignment
588                                String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc);
589                                // AB oneLetter.length() should be one. e.g. in 1EMA it is 3 and this makes mapping residue to sequence impossible.
590                                if ( oneLetter == null || oneLetter.isEmpty() || oneLetter.equals("?") || oneLetter.length()!=1) {
591                                        oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL);
592                                }
593                                str.append(oneLetter);
594                        } else {
595                                str.append(StructureTools.UNKNOWN_GROUP_LABEL);
596                        }
597                }
598                return str.toString();
599        }
600
601
602        /**
603         * {@inheritDoc}
604         */
605        @Override
606        public Group getSeqResGroup(int position) {
607
608                return seqResGroups.get(position);
609        }
610
611        /**
612         * {@inheritDoc}
613         */
614        @Override
615        public List<Group> getSeqResGroups(GroupType type) {
616                List<Group> tmp = new ArrayList<>() ;
617                for (Group g : seqResGroups) {
618                        if (g.getType().equals(type)) {
619                                tmp.add(g);
620                        }
621                }
622
623                return tmp ;
624        }
625
626        /** {@inheritDoc}
627         *
628         */
629        @Override
630        public List<Group> getSeqResGroups() {
631                return seqResGroups;
632        }
633
634        /** {@inheritDoc}
635         *
636         */
637        @Override
638        public void setSeqResGroups(List<Group> groups){
639                for (Group g: groups){
640                        g.setChain(this);
641                }
642                this.seqResGroups = groups;
643        }
644
645
646        /** {@inheritDoc}
647         *
648         */
649        @Override
650        public int getAtomLength() {
651
652                return groups.size();
653        }
654
655        /** {@inheritDoc}
656         *
657         */
658        @Override
659        public List<Group> getAtomLigands(){
660                List<Group> ligands = new ArrayList<>();
661
662                for (Group g : groups)
663                        if (!seqResGroups.contains(g) && !g.isWater())
664                                ligands.add(g);
665
666                return ligands;
667        }
668
669        @Override
670        public String getInternalChainID() {
671                return asymId;
672        }
673
674        @Override
675        public void setInternalChainID(String internalChainID) {
676                this.asymId = internalChainID;
677
678        }
679
680        @Override
681        public String toPDB() {
682                return FileConvert.toPDB(this);
683        }
684
685        @Override
686        public String toMMCIF() {
687                return FileConvert.toMMCIF(this, true);
688        }
689
690        @Override
691        public void setSeqMisMatches(List<SeqMisMatch> seqMisMatches) {
692                this.seqMisMatches = seqMisMatches;
693        }
694
695        @Override
696        public List<SeqMisMatch> getSeqMisMatches() {
697                return seqMisMatches;
698        }
699        
700        @Override
701        public EntityType getEntityType() {
702                if (getEntityInfo()==null) return null;
703                return getEntityInfo().getType();
704        }
705
706        @Override
707        public boolean isWaterOnly() {
708                for (Group g : getAtomGroups()) {
709                        if (!g.isWater())
710                                return false;
711                }
712                return true;
713        }
714
715        @Override
716        public boolean isPureNonPolymer() {
717                for (Group g : getAtomGroups()) {
718
719                        //ChemComp cc = g.getChemComp();
720
721                        if (    g.isPolymeric() &&
722                                        !g.isHetAtomInFile() ) {
723
724                                // important: the aminoacid or nucleotide residue can be in Atom records
725
726                                return false;
727                        }
728
729                }
730                return true;
731        }
732
733        @Override
734        public GroupType getPredominantGroupType(){
735
736                double ratioResiduesToTotal = StructureTools.RATIO_RESIDUES_TO_TOTAL;
737
738                int sizeAminos = getAtomGroups(GroupType.AMINOACID).size();
739                int sizeNucleotides = getAtomGroups(GroupType.NUCLEOTIDE).size();
740                List<Group> hetAtoms = getAtomGroups(GroupType.HETATM);
741                int sizeHetatoms = hetAtoms.size();
742                int sizeWaters = 0;
743                for (Group g : hetAtoms) {
744                        if (g.isWater())
745                                sizeWaters++;
746                }
747                int sizeHetatomsWithoutWater = sizeHetatoms - sizeWaters;
748
749                int fullSize = sizeAminos + sizeNucleotides + sizeHetatomsWithoutWater;
750
751                if ((double) sizeAminos / (double) fullSize > ratioResiduesToTotal)
752                        return GroupType.AMINOACID;
753
754                if ((double) sizeNucleotides / (double) fullSize > ratioResiduesToTotal)
755                        return GroupType.NUCLEOTIDE;
756
757                if ((double) (sizeHetatomsWithoutWater) / (double) fullSize > ratioResiduesToTotal)
758                        return GroupType.HETATM;
759
760                // finally if neither condition works, we try based on majority, but log
761                // it
762                GroupType max;
763                if (sizeNucleotides > sizeAminos) {
764                        if (sizeNucleotides > sizeHetatomsWithoutWater) {
765                                max = GroupType.NUCLEOTIDE;
766                        } else {
767                                max = GroupType.HETATM;
768                        }
769                } else {
770                        if (sizeAminos > sizeHetatomsWithoutWater) {
771                                max = GroupType.AMINOACID;
772                        } else {
773                                max = GroupType.HETATM;
774                        }
775                }
776                logger.debug(
777                                "Ratio of residues to total for chain with asym_id {} is below {}. Assuming it is a {} chain. "
778                                                + "Counts: # aa residues: {}, # nuc residues: {}, # non-water het residues: {}, # waters: {}, "
779                                                + "ratio aa/total: {}, ratio nuc/total: {}",
780                                getId(), ratioResiduesToTotal, max, sizeAminos,
781                                sizeNucleotides, sizeHetatomsWithoutWater, sizeWaters,
782                                (double) sizeAminos / (double) fullSize,
783                                (double) sizeNucleotides / (double) fullSize);
784
785                return max;
786        }
787
788        @Override
789        public  boolean isProtein() {
790                return getPredominantGroupType() == GroupType.AMINOACID;
791        }
792
793        @Override
794        public  boolean isNucleicAcid() {
795                return getPredominantGroupType() == GroupType.NUCLEOTIDE;
796        }
797
798
799}
800