001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 12.03.2004
021 * @author Andreas Prlic
022 *
023 */
024package org.biojava.nbio.structure;
025
026
027import org.biojava.nbio.structure.io.FileConvert;
028import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
029import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
030import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
031import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
032import org.biojava.nbio.core.sequence.ProteinSequence;
033import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
034import org.biojava.nbio.core.sequence.template.Sequence;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import java.io.Serializable;
039import java.util.*;
040
041
042/**
043 * A Chain in a PDB file. It contains several groups which can be of
044 * one of the types defined in the {@link GroupType} constants.
045 *
046 * @author Andreas Prlic
047 * @author Jules Jacobsen
048 * @since 1.4
049 */
050public class ChainImpl implements Chain, Serializable {
051
052        private final static Logger logger = LoggerFactory.getLogger(ChainImpl.class);
053
054        private static final long serialVersionUID = 1990171805277911840L;
055
056        /**
057         * The default chain identifier used to be an empty space
058         */
059        public static String DEFAULT_CHAIN_ID = "A";
060
061        private String swissprot_id ;
062        private String chainID ; // the chain identifier as in PDB files
063
064        private List <Group> groups;
065        private List<Group> seqResGroups;
066
067        private Long id;
068        private Compound mol;
069        private Structure parent;
070
071        private Map<String, Integer> pdbResnumMap;
072        private String internalChainID; // the chain identifier used in mmCIF files
073
074
075        private List<SeqMisMatch> seqMisMatches = null;
076        /**
077         *  Constructs a ChainImpl object.
078         */
079        public ChainImpl() {
080                super();
081
082                chainID = DEFAULT_CHAIN_ID;
083                groups = new ArrayList<Group>() ;
084
085                seqResGroups = new ArrayList<Group>();
086                pdbResnumMap = new HashMap<String,Integer>();
087                internalChainID = null;
088
089        }
090
091        /** {@inheritDoc}
092         *
093         */
094        @Override
095        public Long getId() {
096                return id;
097        }
098
099        /** {@inheritDoc}
100         *
101         */
102        @Override
103        public void setId(Long id) {
104                this.id = id;
105        }
106
107        /** {@inheritDoc}
108         *
109         */
110        @Override
111        @Deprecated
112        public void setParent(Structure parent) {
113                setStructure(parent);
114        }
115
116        /** {@inheritDoc}
117         *
118         */
119        @Override
120        public void setStructure(Structure parent){
121                this.parent = parent;
122        }
123
124        /** Returns the parent Structure of this chain.
125         *
126         * @return the parent Structure object
127         */
128        @Override
129        public Structure getStructure() {
130
131                return parent;
132        }
133
134
135        /** Returns the parent Structure of this chain.
136         *
137         * @return the parent Structure object
138         * @deprecated  use getStructure instead.
139         */
140        @Override
141        @Deprecated
142        public Structure getParent() {
143
144
145                return getStructure();
146        }
147
148        /** Returns an identical copy of this Chain .
149         * @return an identical copy of this Chain
150         */
151        @Override
152        public Object clone() {
153                // go through all groups and add to new Chain.
154                ChainImpl n = new ChainImpl();
155                // copy chain data:
156
157                n.setChainID( getChainID());
158                n.setSwissprotId ( getSwissprotId());
159
160                // NOTE the Compound will be reset at the parent level (Structure) if cloning is happening from parent level
161                // here we don't deep-copy it and just keep the same reference, in case the cloning is happening at the Chain level only
162                n.setCompound(this.mol);
163
164                n.setInternalChainID(internalChainID);
165
166                for (Group group : groups) {
167                        Group g = (Group) group.clone();
168                        n.addGroup(g);
169                        g.setChain(n);
170                }
171
172
173
174                if (seqResGroups!=null){
175
176                        List<Group> tmpSeqRes = new ArrayList<Group>();
177
178                        // cloning seqres and atom groups is ugly, due to their
179                        // nested relationship (some of the atoms can be in the seqres, but not all)
180
181                        for (Group seqResGroup : seqResGroups) {
182
183                                int i = findMathingGroupIndex(groups, seqResGroup);
184
185                                Group g = null;
186
187                                if (i!=-1) {
188                                        // group found in atom groups, we get the equivalent reference from the newly cloned atom groups
189                                        g = n.getAtomGroup(i);
190                                } else {
191                                        // group not found in atom groups, we clone the seqres group
192                                        g = (Group) seqResGroup.clone();
193                                }
194                                g.setChain(n);
195                                tmpSeqRes.add(g);
196                        }
197
198                        n.setSeqResGroups(tmpSeqRes);
199                }
200
201
202                return n ;
203        }
204
205        private static int findMathingGroupIndex(List<Group> atomGroups, Group g) {
206                int i = 0;
207                for (Group atomGroup: atomGroups) {
208                        if (g==atomGroup) return i;
209                        i++;
210                }
211                return -1;
212        }
213
214
215
216        /** {@inheritDoc}
217         *
218         */
219        @Override
220        public void setCompound(Compound mol) {
221                this.mol = mol;
222        }
223
224        /** {@inheritDoc}
225         *
226         */
227        @Override
228        public Compound getCompound() {
229                return this.mol;
230        }
231
232        /** set the Swissprot id of this chains .
233         * @param sp_id  a String specifying the swissprot id value
234         * @see #getSwissprotId
235         */
236        @Override
237        public void setSwissprotId(String sp_id){
238                swissprot_id = sp_id ;
239        }
240
241        /** get the Swissprot id of this chains .
242         * @return a String representing the swissprot id value
243         * @see #setSwissprotId
244         */
245        @Override
246        public String getSwissprotId() {
247                return swissprot_id ;
248        }
249
250        /** {@inheritDoc}
251         *
252         */
253        @Override
254        public void addGroup(Group group) {
255
256                group.setChain(this);
257
258                // Set the altlocs chain as well
259                for(Group g : group.getAltLocs()) {
260                        g.setChain(this);
261                }
262
263                groups.add(group);
264
265                // store the position internally for quick access of this group
266
267                String pdbResnum = null ;
268                ResidueNumber resNum = group.getResidueNumber();
269                if ( resNum != null)
270                        pdbResnum = resNum.toString();
271                if ( pdbResnum != null) {
272                        Integer pos = groups.size() - 1;
273                        // ARGH sometimes numbering in PDB files is confusing.
274                        // e.g. PDB: 1sfe
275                /*
276                 * ATOM    620  N   GLY    93     -24.320  -6.591   4.210  1.00 46.82           N
277                 * ATOM    621  CA  GLY    93     -24.960  -6.849   5.497  1.00 47.35           C
278                 * ATOM    622  C   GLY    93     -26.076  -5.873   5.804  1.00 47.24           C
279                 * ATOM    623  O   GLY    93     -26.382  -4.986   5.006  1.00 47.56           O
280                 *    and ...
281                 * HETATM 1348  O   HOH    92     -21.853 -16.886  19.138  1.00 66.92           O
282                 * HETATM 1349  O   HOH    93     -26.126   1.226  29.069  1.00 71.69           O
283                 * HETATM 1350  O   HOH    94     -22.250 -18.060  -6.401  1.00 61.97           O
284                 */
285
286                        // this check is to give in this case the entry priority that is an AminoAcid / comes first...
287                        // a good example of same residue number for 2 residues is 3th3, chain T, residue 201 (a LYS and a sugar BGC covalently attached to it) - JD 2016-03-09
288                        if (  pdbResnumMap.containsKey(pdbResnum)) {
289
290                                logger.warn("Adding residue {}({}) to chain {} but a residue with same residue number is already present: {}({}). Will add only the aminoacid residue (if any) to the lookup, lookups for that residue number won't work properly.",
291                                                pdbResnum, group.getPDBName(), getChainID(), groups.get(pdbResnumMap.get(pdbResnum)).getResidueNumber(), groups.get(pdbResnumMap.get(pdbResnum)).getPDBName());
292                                if ( group instanceof AminoAcid)
293                                        pdbResnumMap.put(pdbResnum,pos);
294                        } else
295                                pdbResnumMap.put(pdbResnum,pos);
296                }
297
298        }
299
300
301        /**
302         * {@inheritDoc}
303         */
304        @Override
305        public Group getAtomGroup(int position) {
306
307                return groups.get(position);
308        }
309
310        /**
311         * {@inheritDoc}
312         */
313        @Override
314        public List<Group> getAtomGroups(GroupType type){
315
316                List<Group> tmp = new ArrayList<Group>() ;
317                for (Group g : groups) {
318                        if (g.getType().equals(type)) {
319                                tmp.add(g);
320                        }
321                }
322
323                return tmp ;
324        }
325
326
327        /** {@inheritDoc}
328         *
329         */
330        @Override
331        public List<Group> getAtomGroups(){
332                return groups ;
333        }
334
335        /** {@inheritDoc}
336         *
337         */
338        @Override
339        public void setAtomGroups(List<Group> groups){
340                for (Group g:groups){
341                        g.setChain(this);
342                }
343                this.groups = groups;
344        }
345
346        @Override
347        @Deprecated // TODO dmyersturnbull: why is this deprecated if it's declared in Chain?
348        public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end, boolean ignoreMissing)
349                        throws StructureException {
350
351                if (! ignoreMissing )
352                        return getGroupsByPDB(start, end);
353
354
355                List<Group> retlst = new ArrayList<Group>();
356
357                String pdbresnumStart = start.toString();
358                String pdbresnumEnd   = end.toString();
359
360
361                int startPos = Integer.MIN_VALUE;
362                int endPos   = Integer.MAX_VALUE;
363
364
365                startPos = start.getSeqNum();
366                endPos   = end.getSeqNum();
367
368
369
370                boolean adding = false;
371                boolean foundStart = false;
372
373                for (Group g: groups){
374
375                        if ( g.getResidueNumber().toString().equals(pdbresnumStart)) {
376                                adding = true;
377                                foundStart = true;
378                        }
379
380                        if ( ! (foundStart && adding) ) {
381
382
383                                int pos = g.getResidueNumber().getSeqNum();
384
385                                if ( pos >= startPos) {
386                                        foundStart = true;
387                                        adding = true;
388                                }
389
390
391                        }
392
393                        if ( adding)
394                                retlst.add(g);
395
396                        if ( g.getResidueNumber().toString().equals(pdbresnumEnd)) {
397                                if ( ! adding)
398                                        throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID);
399                                adding = false;
400                                break;
401                        }
402                        if (adding){
403
404                                int pos = g.getResidueNumber().getSeqNum();
405                                if (pos >= endPos) {
406                                        adding = false;
407                                        break;
408                                }
409
410                        }
411                }
412
413                if ( ! foundStart){
414                        throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID);
415                }
416
417
418                //not checking if the end has been found in this case...
419
420                return retlst.toArray(new Group[retlst.size()] );
421        }
422
423
424        /**
425         * {@inheritDoc}
426         *
427         */
428        @Override
429        public Group getGroupByPDB(ResidueNumber resNum) throws StructureException {
430                String pdbresnum = resNum.toString();
431                if ( pdbResnumMap.containsKey(pdbresnum)) {
432                        Integer pos = pdbResnumMap.get(pdbresnum);
433                        return groups.get(pos);
434                } else {
435                        throw new StructureException("unknown PDB residue number " + pdbresnum + " in chain " + chainID);
436                }
437        }
438
439        /**
440         * {@inheritDoc}
441         *
442         */
443        @Override
444        public Group[] getGroupsByPDB(ResidueNumber start, ResidueNumber end)
445                        throws StructureException {
446
447                String pdbresnumStart = start.toString();
448                String pdbresnumEnd   = end.toString();
449
450                List<Group> retlst = new ArrayList<Group>();
451
452                Iterator<Group> iter = groups.iterator();
453                boolean adding = false;
454                boolean foundStart = false;
455
456                while ( iter.hasNext()){
457                        Group g = iter.next();
458                        if ( g.getResidueNumber().toString().equals(pdbresnumStart)) {
459                                adding = true;
460                                foundStart = true;
461                        }
462
463                        if ( adding)
464                                retlst.add(g);
465
466                        if ( g.getResidueNumber().toString().equals(pdbresnumEnd)) {
467                                if ( ! adding)
468                                        throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID);
469                                adding = false;
470                                break;
471                        }
472                }
473
474                if ( ! foundStart){
475                        throw new StructureException("did not find start PDB residue number " + pdbresnumStart + " in chain " + chainID);
476                }
477                if ( adding) {
478                        throw new StructureException("did not find end PDB residue number " + pdbresnumEnd + " in chain " + chainID);
479                }
480
481                return retlst.toArray(new Group[retlst.size()] );
482        }
483
484
485
486        /**
487         * {@inheritDoc}
488         */
489        @Override
490        public int getSeqResLength() {
491                //new method returns the length of the sequence defined in the SEQRES records
492                return seqResGroups.size();
493        }
494
495        /**
496         * {@inheritDoc}
497         */
498        @Override
499        public void   setChainID(String nam) { chainID = nam;   }
500
501
502        /**
503         * {@inheritDoc}
504         */
505        @Override
506        public String getChainID()           {  return chainID;  }
507
508
509
510        /** String representation.
511         * @return String representation of the Chain
512         */
513        @Override
514        public String toString(){
515                String newline = System.getProperty("line.separator");
516                StringBuilder str = new StringBuilder();
517                str.append("Chain >").append(getChainID()).append("<").append(newline);
518                if ( mol != null ){
519                        if ( mol.getMolName() != null){
520                                str.append(mol.getMolName()).append(newline);
521                        }
522                }
523                str.append("total SEQRES length: ").append(getSeqResGroups().size()).append(" total ATOM length:")
524                                .append(getAtomLength()).append(" residues ").append(newline);
525
526                return str.toString() ;
527
528        }
529
530        /**
531         * {@inheritDoc}
532         */
533        @Override
534        public Sequence<?> getBJSequence()  {
535
536                String seq = getSeqResSequence();
537
538                Sequence<AminoAcidCompound> s = null;
539
540                try {
541                        s = new ProteinSequence(seq);
542                } catch (CompoundNotFoundException e) {
543                        logger.error("Could not create sequence object from seqres sequence. Some unknown compound: {}",e.getMessage());
544                }
545
546                //TODO: return a DNA sequence if the content is DNA...
547                return s;
548
549        }
550
551        /**
552         * {@inheritDoc}
553         */
554        @Override
555        public String getAtomSequence(){
556
557
558                List<Group> groups = getAtomGroups();
559                StringBuilder sequence = new StringBuilder() ;
560
561                for ( Group g: groups){
562                        ChemComp cc = g.getChemComp();
563
564                        if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) ||
565                                        PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){
566                                // an amino acid residue.. use for alignment
567                                String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc);
568                                if ( oneLetter == null)
569                                        oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL);
570                                sequence.append(oneLetter);
571                        }
572
573                }
574                return sequence.toString();
575
576
577        }
578
579        /**
580         * {@inheritDoc}
581         */
582        @Override
583        public String getSeqResSequence(){
584
585                StringBuilder str = new StringBuilder();
586                for (Group g : seqResGroups) {
587                        ChemComp cc = g.getChemComp();
588                        if ( cc == null) {
589                                logger.warn("Could not load ChemComp for group: ", g);
590                                str.append(StructureTools.UNKNOWN_GROUP_LABEL);
591                        } else if ( PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType()) ||
592                                        PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())){
593                                // an amino acid residue.. use for alignment
594                                String oneLetter= ChemCompGroupFactory.getOneLetterCode(cc);
595                                if ( oneLetter == null || oneLetter.isEmpty() || oneLetter.equals("?"))
596                                        oneLetter = Character.toString(StructureTools.UNKNOWN_GROUP_LABEL);
597                                str.append(oneLetter);
598                        } else {
599                                str.append(StructureTools.UNKNOWN_GROUP_LABEL);
600                        }
601                }
602                return str.toString();
603        }
604
605
606        /**
607         * {@inheritDoc}
608         */
609        @Override
610        public Group getSeqResGroup(int position) {
611
612                return seqResGroups.get(position);
613        }
614
615        /**
616         * {@inheritDoc}
617         */
618        @Override
619        public List<Group> getSeqResGroups(GroupType type) {
620                List<Group> tmp = new ArrayList<Group>() ;
621                for (Group g : seqResGroups) {
622                        if (g.getType().equals(type)) {
623                                tmp.add(g);
624                        }
625                }
626
627                return tmp ;
628        }
629
630        /** {@inheritDoc}
631         *
632         */
633        @Override
634        public List<Group> getSeqResGroups() {
635                return seqResGroups;
636        }
637
638        /** {@inheritDoc}
639         *
640         */
641        @Override
642        public void setSeqResGroups(List<Group> groups){
643                for (Group g: groups){
644                        g.setChain(this);
645                }
646                this.seqResGroups = groups;
647        }
648
649        protected void addSeqResGroup(Group g){
650                seqResGroups.add(g);
651        }
652
653
654        /** {@inheritDoc}
655         *
656         */
657        @Override
658        public int getAtomLength() {
659
660                return groups.size();
661        }
662
663        /** {@inheritDoc}
664         *
665         */
666        @Override
667        public List<Group> getAtomLigands(){
668                List<Group> ligands = new ArrayList<Group>();
669
670                for (Group g : groups)
671                        if (!seqResGroups.contains(g) && !g.isWater())
672                                ligands.add(g);
673
674                return ligands;
675        }
676
677        @Override
678        public String getInternalChainID() {
679                return internalChainID;
680        }
681
682        @Override
683        public void setInternalChainID(String internalChainID) {
684                this.internalChainID = internalChainID;
685
686        }
687
688        @Override
689        public String toPDB() {
690                return FileConvert.toPDB(this);
691        }
692
693        @Override
694        public String toMMCIF() {
695                return FileConvert.toMMCIF(this, true);
696        }
697
698        @Override
699        public void setSeqMisMatches(List<SeqMisMatch> seqMisMatches) {
700                this.seqMisMatches = seqMisMatches;
701        }
702
703        @Override
704        public List<SeqMisMatch> getSeqMisMatches() {
705                return seqMisMatches;
706        }
707}
708