001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Aug 23, 2007
021 *
022 */
023
024package org.biojava.nbio.structure.io;
025
026
027import java.util.ArrayList;
028import java.util.HashMap;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032
033import org.biojava.nbio.alignment.Alignments;
034import org.biojava.nbio.alignment.Alignments.PairwiseSequenceAlignerType;
035import org.biojava.nbio.alignment.SimpleGapPenalty;
036import org.biojava.nbio.alignment.template.GapPenalty;
037import org.biojava.nbio.alignment.template.PairwiseSequenceAligner;
038import org.biojava.nbio.core.alignment.matrices.SubstitutionMatrixHelper;
039import org.biojava.nbio.core.alignment.template.SequencePair;
040import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
041import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
042import org.biojava.nbio.core.sequence.DNASequence;
043import org.biojava.nbio.core.sequence.ProteinSequence;
044import org.biojava.nbio.core.sequence.RNASequence;
045import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
046import org.biojava.nbio.core.sequence.compound.AmbiguityDNARNAHybridCompoundSet;
047import org.biojava.nbio.core.sequence.compound.AmbiguityRNACompoundSet;
048import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
049import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
050import org.biojava.nbio.core.sequence.compound.DNACompoundSet;
051import org.biojava.nbio.core.sequence.compound.NucleotideCompound;
052import org.biojava.nbio.core.sequence.template.Compound;
053import org.biojava.nbio.core.sequence.template.Sequence;
054import org.biojava.nbio.structure.AminoAcid;
055import org.biojava.nbio.structure.Atom;
056import org.biojava.nbio.structure.Chain;
057import org.biojava.nbio.structure.Group;
058import org.biojava.nbio.structure.GroupType;
059import org.biojava.nbio.structure.HetatomImpl;
060import org.biojava.nbio.structure.NucleotideImpl;
061import org.biojava.nbio.structure.ResidueNumber;
062import org.biojava.nbio.structure.Structure;
063import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
064import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
065import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
066import org.slf4j.Logger;
067import org.slf4j.LoggerFactory;
068
069
070/**
071 * Aligns the SEQRES residues to the ATOM residues.
072 * The AminoAcids that can be matched between the two of them will be set in the SEQRES
073 * chains
074 *
075 *
076 * @author Andreas Prlic
077 * @author Jose Duarte
078 */
079public class SeqRes2AtomAligner {
080
081        private static final Logger logger = LoggerFactory.getLogger(SeqRes2AtomAligner.class);
082
083
084
085        private String alignmentString;
086
087        public SeqRes2AtomAligner(){
088                logger.debug("initialising SeqRes2AtomAligner");
089                alignmentString = "";
090        }
091
092        public String getAlignmentString() {
093                return alignmentString;
094        }
095
096        public static Chain getMatchingAtomRes(Chain seqRes, List<Chain> atomList)
097        {
098                Iterator<Chain> iter = atomList.iterator();
099                while(iter.hasNext()){
100                        Chain atomChain = iter.next();
101                        if ( atomChain.getChainID().equals(seqRes.getChainID())){
102                                return atomChain;
103                        }
104                }
105
106                logger.info("Could not match SEQRES chainID >" + seqRes.getChainID() + "< to ATOM chains!, size of atom chain: " + atomList.size());
107                return null;
108        }
109
110
111
112
113        public void align(Structure s, List<Chain> seqResList){
114
115                List<Chain> atomList   = s.getModel(0);
116
117
118                for (Chain seqRes: seqResList){
119
120                                Chain atomRes = getMatchingAtomRes(seqRes,atomList);
121                                if ( atomRes == null)
122                                        continue;
123
124                                mapSeqresRecords(atomRes,seqRes);
125
126
127                }
128
129        }
130
131        /**
132         * Map the seqRes groups to the atomRes chain.
133         * Updates the atomRes chain object with the mapped data
134         * The seqRes chain should not be needed after this and atomRes should be further used.
135         *
136         * @param atomRes the chain containing ATOM groups (in atomGroups slot). This chain
137         * is modified to contain in its seqresGroups slot the mapped atom groups
138         * @param seqRes the chain containing SEQRES groups (in atomGroups slot). This chain
139         * is not modified
140         */
141        public void mapSeqresRecords(Chain atomRes, Chain seqRes) {
142                List<Group> seqResGroups = seqRes.getAtomGroups();
143                List<Group> atmResGroups = atomRes.getAtomGroups();
144
145
146
147                logger.debug("Comparing ATOM {} ({} groups) to SEQRES {} ({} groups) ",
148                                atomRes.getChainID(), atmResGroups.size(), seqRes.getChainID(), seqResGroups.size());
149
150
151                List<Group> matchedGroups = trySimpleMatch(seqResGroups, atmResGroups);
152
153                if ( matchedGroups != null) {
154                        // update the new SEQRES list
155                        atomRes.setSeqResGroups(matchedGroups);
156                        return;
157                }
158
159                logger.debug("Could not map SEQRES to ATOM records easily, need to align...");
160
161                int numAminosSeqres = seqRes.getAtomGroups(GroupType.AMINOACID).size();
162                int numNucleotidesSeqres = seqRes.getAtomGroups(GroupType.NUCLEOTIDE).size();
163
164                if ( numAminosSeqres < 1) {
165
166                        if ( numNucleotidesSeqres > 1) {
167
168                                logger.debug("SEQRES chain {} is a nucleotide chain ({} nucleotides), aligning nucleotides...", seqRes.getChainID(), numNucleotidesSeqres);
169
170                                alignNucleotideChains(seqRes,atomRes);
171                                return;
172                        } else {
173
174                                logger.debug("SEQRES chain {} contains {} amino acids and {} nucleotides, ignoring...", seqRes.getChainID(),numAminosSeqres,numNucleotidesSeqres);
175
176                                return;
177                        }
178                }
179
180                if ( atomRes.getAtomGroups(GroupType.AMINOACID).size() < 1) {
181                        logger.debug("ATOM chain {} does not contain amino acids, ignoring...", atomRes.getChainID());
182                        return;
183                }
184
185                logger.debug("Proceeding to do protein alignment for chain {}", atomRes.getChainID() );
186
187
188                boolean noMatchFound = alignProteinChains(seqResGroups,atomRes.getAtomGroups());
189                if ( ! noMatchFound){
190                        atomRes.setSeqResGroups(seqResGroups);
191                }
192
193        }
194
195        private void alignNucleotideChains(Chain seqRes, Chain atomRes) {
196
197                if ( atomRes.getAtomGroups(GroupType.NUCLEOTIDE).size() < 1) {
198                        logger.debug("ATOM chain {} does not contain nucleotides, ignoring...", atomRes.getChainID());
199
200                        return;
201                }
202                logger.debug("Alignment for chain {}", atomRes.getChainID() );
203
204                List<Group> seqResGroups = seqRes.getAtomGroups();
205                boolean noMatchFound = alignNucleotideGroups(seqResGroups,atomRes.getAtomGroups());
206                if ( ! noMatchFound){
207                        atomRes.setSeqResGroups(seqResGroups);
208                }
209
210        }
211
212        /**
213         * A simple matching approach that tries to do a 1:1 mapping between SEQRES and ATOM records
214         *
215         * @param seqRes
216         * @param atomList
217         * @return the matching or null if the matching didn't work
218         */
219        private List<Group> trySimpleMatch(List<Group> seqResGroups,List<Group> atmResGroups) {
220                // by default first ATOM position is 1
221                //
222
223                @SuppressWarnings("unchecked")
224                List<Group> newSeqResGroups = (ArrayList<Group>)((ArrayList<Group>)seqResGroups).clone();
225
226                boolean startAt1 = true;
227
228                for ( int atomResPos = 0 ; atomResPos < atmResGroups.size() ; atomResPos++){
229
230                        // let's try to match this case
231                        Group atomResGroup = atmResGroups.get(atomResPos);
232
233                        // let's ignore waters
234                        if ( atomResGroup.isWater()){
235                                continue;
236                        }
237
238                        ResidueNumber atomResNum = atomResGroup.getResidueNumber();
239
240                        int seqResPos = atomResNum.getSeqNum();
241
242
243
244                        if ( seqResPos < 0) {
245                                logger.debug("ATOM residue number < 0 : {}", seqResPos);
246                                return null;
247                        }
248
249                        if ( seqResPos == 0){
250                                // make sure the first SEQRES is matching.
251                                Group seqResGroup = seqResGroups.get(0);
252                                if (  seqResGroup.getPDBName().equals(atomResGroup.getPDBName())){
253                                        // they match! seems in this case the numbering starts with 0...
254                                        startAt1 = false;
255                                } else {
256
257                                        logger.debug("SEQRES position 1 ({}) does not match ATOM PDB res num 0 ({})",
258                                                        seqResGroup.getPDBName(), atomResGroup.getPDBName());
259
260
261                                        return null;
262
263                                }
264                        }
265
266
267                        if ( startAt1 )
268                                seqResPos--;
269
270                        // another check that would require the alignment approach
271                        if ( startAt1 && seqResPos >=  seqResGroups.size()  )
272                        {
273
274                                // could be a HETATOM...
275                                if ( atomResGroup instanceof AminoAcid) {
276                                        logger.debug(" ATOM residue nr: " + seqResPos + " > seqres! " + seqResGroups.size() + " " + atomResGroup);
277                                        return null;
278                                } else if ( atomResGroup instanceof NucleotideImpl) {
279                                        logger.debug(" NUCLEOTIDE residue nr: " + seqResPos + " > seqres! " + seqResGroups.size() + " " + atomResGroup);
280                                        return null;
281                                } else {
282                                        // we won't map HETATOM groups...
283                                        continue;
284                                }
285                        }
286
287
288                        //                      if ( seqResPos < 0){
289                        //
290                        //                              System.err.println("What is going on??? " + atomRes.getChainID() + " " + atomResGroup);
291                        //                      }
292
293                        if ( seqResPos >= seqResGroups.size()){
294                                logger.debug("seqres groups don't match atom indices " + seqResPos);
295                                if ( atomResGroup instanceof AminoAcid )
296                                        return null;
297                                else
298                                        continue;
299                        }
300
301                        Group seqResGroup = seqResGroups.get(seqResPos );
302
303                        if ( ! seqResGroup.getPDBName().trim().equals(atomResGroup.getPDBName().trim())){
304                                // a mismatch! something is wrong in the mapping and we need to do an alignment
305                                logger.debug("Mismatch of SEQRES pos " + seqResPos + " and ATOM record: " + atomResGroup + " | " + seqResGroup);
306                                return null;
307                        }
308
309                        // the two groups are identical and we can merge them
310                        // replace the SEQRES group with the ATOM group...
311
312                        Group replacedGroup = newSeqResGroups.set(seqResPos, atomResGroup);
313                        logger.debug("Merging index {}: replaced seqres group {} ({}) with atom group {} ({})",
314                                        seqResPos,
315                                        replacedGroup.getResidueNumber(), replacedGroup.getPDBName(),
316                                        atomResGroup.getResidueNumber(), atomResGroup.getPDBName());
317
318                }
319
320                // all went ok. copy over the updated list to the original one.
321                // note: if something went wrong, we did not modifiy the original list.
322                //seqResGroups = newSeqResGroups;
323
324
325                //                      int pos = -1;
326                //                      for (Group g: seqResGroups){
327                //                              pos++;
328                //                              logger.debug(pos + " " + g);
329                //                      }
330
331                //System.out.println("I:" + seqResGroups);
332                // all atom records could get matched correctly!
333                return newSeqResGroups;
334
335        }
336
337        /**
338         * Returns the full sequence of the Atom records of a parent
339         * with X instead of HETATMSs. The advantage of this is
340         * that it allows us to also align HETATM groups back to the SEQRES.
341         * @param groups the list of groups in a parent
342         * @param positionIndex a Map to keep track of which group is at which sequence position
343         * @param isNucleotideChain whether the atom groups are predominantly nucleotides (the groups represent a nucleotide chain), if true
344         * non-standard nucleotides will be represented with ambiguous letter 'N' instead of 'X', if false all non-standard residues will be 'X'
345         * @return string representations
346         */
347        public static String getFullAtomSequence(List<Group> groups, Map<Integer, Integer> positionIndex, boolean isNucleotideChain){
348
349                StringBuffer sequence = new StringBuffer() ;
350                int seqIndex = 0; // track sequence.length()
351                for ( int i=0 ; i< groups.size(); i++){
352                        Group g = groups.get(i);
353
354                        if ( g instanceof AminoAcid ){
355                                AminoAcid a = (AminoAcid)g;
356                                char oneLetter =a.getAminoType();
357                                if ( oneLetter == '?')
358                                        oneLetter = 'X';
359
360                                positionIndex.put(seqIndex,i);
361                                sequence.append(oneLetter );
362                                seqIndex++;
363                        } else {
364
365                                // exclude solvents
366                                if (  g.isWater())
367                                        continue;
368
369                                // exclude metals
370                                if ( g.size() == 1 ) {
371
372                                        Atom a = g.getAtom(0);
373                                        if ( a == null)
374                                                continue;
375                                        if (a.getElement().isMetal())
376                                                continue;
377
378                                }
379
380                                ChemComp cc = g.getChemComp();
381                                if ( cc == null) {
382                                        logger.debug("No chem comp available for group {}",g.toString());
383                                        // not sure what to do in that case!
384                                        continue;
385                                }
386                                if (
387                                                ResidueType.lPeptideLinking.equals(cc.getResidueType()) ||
388                                                PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType())  ||
389                                                PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())
390                                                ) {
391                                        //System.out.println(cc.getOne_letter_code());
392                                        String c = cc.getOne_letter_code();
393                                        if ( c.equals("?")) {
394                                                if (isNucleotideChain && PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) {
395                                                        // the way to represent unknown nucleotides is with 'N', see https://en.wikipedia.org/wiki/Nucleic_acid_notation
396                                                        c = "N";
397                                                } else {
398                                                        c = "X";
399                                                }
400                                        }
401
402                                        // For some unusual cases the het residue can map to 2 or more standard aas and thus give an
403                                        // insertion of length >1.
404                                        //      e.g. 1: SUI maps to DG  (in 1oew,A)
405                                        //              e.g. 2: NRQ maps to MYG (in 3cfh,A)
406                                        if (c.length()>1) {
407                                                logger.info("Group '{}' maps to more than 1 standard aminoacid: {}.",
408                                                                g.toString(), c);
409                                        }
410                                        // because of the mapping to more than 1 aminoacid, we have
411                                        // to loop through it (99% of cases c will have length 1 anyway)
412                                        for (int cIdx=0;cIdx<c.length();cIdx++) {
413                                                positionIndex.put(seqIndex,i);
414                                                sequence.append(c.charAt(cIdx));
415                                                seqIndex++;
416                                        }
417                                } else {
418                                        logger.debug("Group {} is not lPeptideLinked, nor PROTEIN_ONLY, nor POLYNUCLEOTIDE_ONLY",g.toString());
419                                        continue;
420                                }
421
422
423                                //sequence.append("X");
424                        }
425
426                }
427
428                return sequence.toString();
429
430        }
431
432
433        private boolean alignNucleotideGroups(List<Group> seqRes, List<Group> atomRes) {
434
435                Map<Integer,Integer> seqresIndexPosition = new HashMap<Integer, Integer>();
436                Map<Integer,Integer> atomIndexPosition   = new HashMap<Integer, Integer>();
437
438                String seq1 = getFullAtomSequence(seqRes, seqresIndexPosition, true);
439                //
440                String seq2 = getFullAtomSequence(atomRes, atomIndexPosition, true);
441
442                if (seq1.isEmpty() || seq2.isEmpty()) {
443                        logger.warn("Could not align nucleotide sequences, at least one of them is empty");
444                        return true;
445                }
446
447                logger.debug("align seq1 ("+ seq1.length()+") " + seq1);
448                logger.debug("align seq2 ("+ seq2.length()+") " + seq2);
449
450                Sequence<NucleotideCompound> s1 = getNucleotideSequence(seq1);
451                Sequence<NucleotideCompound> s2 = getNucleotideSequence(seq2);
452
453                if (s1==null || s2==null) return true;
454
455                if ( ! s1.getCompoundSet().equals(s2.getCompoundSet()) ) {
456                        // e.g. trying to align a DNA and an RNA sequence...
457                        // test PDB ID: 1A34
458                        // try to make both RNA sequence...
459                        if ( ! s1.getCompoundSet().equals(AmbiguityRNACompoundSet.getRNACompoundSet())) {
460                                try {
461                                        s1 = new RNASequence(seq1,AmbiguityRNACompoundSet.getRNACompoundSet());
462                                } catch (CompoundNotFoundException ex) {
463                                        logger.warn("Could not align DNA and RNA compound sets: " + seq1);
464                                        return true;
465                                }
466                        }
467
468                        if( ! s2.getCompoundSet().equals(AmbiguityRNACompoundSet.getRNACompoundSet())) {
469                                try {
470                                        s2 = new RNASequence(seq2,AmbiguityRNACompoundSet.getRNACompoundSet());
471                                } catch (CompoundNotFoundException ex) {
472                                        logger.warn("Could not align DNA and RNA compound sets: " + seq2);
473                                        return true;
474                                }
475                        }
476                }
477
478
479                SubstitutionMatrix<NucleotideCompound> matrix = SubstitutionMatrixHelper.getNuc4_4();
480
481                GapPenalty penalty = new SimpleGapPenalty(8,1);
482
483                PairwiseSequenceAligner<Sequence<NucleotideCompound>, NucleotideCompound> smithWaterman =
484                                Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix);
485
486
487
488                SequencePair<Sequence<NucleotideCompound>, NucleotideCompound> pair = smithWaterman.getPair();
489
490
491
492                if ( pair == null) {
493                        logger.warn("Could not align nucleotide sequences. ATOM and SEQRES groups will not be aligned.");
494                        logger.warn("Sequences: ");
495                        logger.warn(seq1);
496                        logger.warn(seq2);
497                        return true;
498
499                }
500
501
502
503                logger.debug("Alignment:\n"+pair.toString(100));
504
505
506                boolean noMatchFound = mapDNAChains(seqRes,atomRes,pair,seqresIndexPosition, atomIndexPosition );
507
508                return noMatchFound;
509
510        }
511
512        private Sequence<NucleotideCompound> getNucleotideSequence(String seq) {
513                Sequence<NucleotideCompound> s = null;
514
515                // first we try DNA, then RNA, them hybrid
516
517                try {
518
519                        s = new DNASequence(seq, AmbiguityDNACompoundSet.getDNACompoundSet());
520                } catch (CompoundNotFoundException e){
521
522                        try {
523                                s= new RNASequence(seq, AmbiguityRNACompoundSet.getRNACompoundSet());
524                        } catch (CompoundNotFoundException ex) {
525
526                                try {
527                                        // it could still be a hybrid, e.g. 3hoz, chain T, what to do in that case?
528                                        s = new DNASequence(seq, AmbiguityDNARNAHybridCompoundSet.getDNARNAHybridCompoundSet());
529                                        logger.warn("Hybrid RNA/DNA sequence detected for sequence {}", seq);
530                                } catch (CompoundNotFoundException exc) {
531                                        // not DNA, nor RNA, nor hybrid
532                                        logger.warn("Could not determine compound set (neither DNA, RNA nor hybrid) for nucleotide sequence " + seq);
533                                        return null;
534                                }
535
536                        }
537                }
538                return s;
539        }
540
541
542        /**
543         * Aligns two chains of groups, where the first parent is representing the
544         * list of amino acids as obtained from the SEQRES records, and the second parent
545         * represents the groups obtained from the ATOM records (and containing the actual ATOM information).
546         * This does the actual alignment and if a group can be mapped to a position in the SEQRES then the corresponding
547         * position is replaced with the group that contains the atoms.
548         *
549         * @param seqRes
550         * @param atomRes
551         * @return true if no match has been found
552         */
553        private boolean alignProteinChains(List<Group> seqRes, List<Group> atomRes) {
554
555                Map<Integer,Integer> seqresIndexPosition = new HashMap<Integer, Integer>();
556                Map<Integer,Integer> atomIndexPosition   = new HashMap<Integer, Integer>();
557
558                String seq1 = getFullAtomSequence(seqRes, seqresIndexPosition, false);
559                //
560                String seq2 = getFullAtomSequence(atomRes, atomIndexPosition, false);
561
562
563                logger.debug("Protein seq1 to align (length "+ seq1.length()+"): " + seq1);
564                logger.debug("Protein seq2 to align (length "+ seq2.length()+"): " + seq2);
565
566                ProteinSequence s1;
567                ProteinSequence s2;
568                try {
569                        s1 = new ProteinSequence(seq1);
570                        s2 = new ProteinSequence(seq2);
571                } catch (CompoundNotFoundException e) {
572                        logger.warn("Could not create protein sequences ({}) to align ATOM and SEQRES groups, they will remain unaligned.", e.getMessage());
573                        return true;
574                }
575
576
577                SubstitutionMatrix<AminoAcidCompound> matrix = SubstitutionMatrixHelper.getBlosum65();
578
579                GapPenalty penalty = new SimpleGapPenalty(8, 1);
580
581
582                PairwiseSequenceAligner<ProteinSequence, AminoAcidCompound> smithWaterman =
583                                Alignments.getPairwiseAligner(s1, s2, PairwiseSequenceAlignerType.LOCAL, penalty, matrix);
584
585                SequencePair<ProteinSequence, AminoAcidCompound> pair = smithWaterman.getPair();
586
587
588                // sequences that are only X (e.g. 1jnv chain A) produced empty alignments, because nothing aligns to nothing and thus the local alignment is empty
589                // to avoid those empty alignments we catch them here with pair.getLength()==0
590                if ( pair == null || pair.getLength()==0) {
591                        logger.warn("Could not align protein sequences. ATOM and SEQRES groups will not be aligned.");
592                        logger.warn("Sequences: ");
593                        logger.warn(seq1);
594                        logger.warn(seq2);
595                        return true;
596                }
597
598
599                logger.debug("Alignment:\n"+pair.toString(100));
600
601
602                boolean noMatchFound = mapChains(seqRes,atomRes,pair,seqresIndexPosition, atomIndexPosition );
603
604                return noMatchFound;
605
606
607        }
608
609
610        private boolean mapChains(List<Group> seqResGroups, List<Group> atomRes,
611                        SequencePair<ProteinSequence, AminoAcidCompound> pair,
612                        Map<Integer,Integer> seqresIndexPosition,
613                        Map<Integer,Integer> atomIndexPosition )   {
614
615
616
617                // at the present stage the future seqRes are still stored as Atom groups in the seqRes parent...
618
619
620                int aligLength = pair.getLength();
621
622                // make sure we actually find an alignment
623                boolean noMatchFound = true;
624
625                Compound gapSymbol =  AminoAcidCompoundSet.getAminoAcidCompoundSet().getCompoundForString("-");
626
627                mainLoop:
628                        for (int i = 1; i <= aligLength ; i++) {
629
630                                Compound s =  pair.getCompoundAt(1, i);
631                                Compound a =  pair.getCompoundAt(2, i);
632
633                                // alignment is using internal index start at 1...
634                                int posSeq  = pair.getIndexInQueryAt(i)  - 1;
635                                int posAtom = pair.getIndexInTargetAt(i) - 1;
636
637                                if (  s.equals(gapSymbol) || a.equals(gapSymbol)){
638                                        continue;
639                                }
640
641                                if ( s.equals(a)){
642
643                                        // the atom record can be aligned to the SeqRes record!
644                                        // replace the SeqRes group with the Atom group!
645
646                                        Group s1 = seqResGroups.get(seqresIndexPosition.get(posSeq));
647                                        Group a1 = atomRes.get(atomIndexPosition.get(posAtom));
648
649                                        if ( s1 == null || a1 == null){
650                                                /// can't map this position...
651                                                logger.warn("can't map " + i + ":" + s + " " + posSeq +" " + s1 + " atom: " + posAtom + " " + a1 );
652                                                continue mainLoop;
653                                        }
654
655                                        // need to trim the names to allow matching e.g in
656                                        // pdb1b2m
657                                        String pdbNameS = s1.getPDBName();
658                                        String pdbNameA = a1.getPDBName();
659
660                                        if ( pdbNameS == null || pdbNameA == null ){
661                                                logger.warn("null value for group.getPDBName found at {} when trying to align {} and {} {}",posSeq, s1, a1, posAtom);
662                                                logger.warn("ATOM and SEQRES sequences will not be aligned.");
663                                                return true;
664                                        }
665
666                                        if ( ! pdbNameA.trim().equals(pdbNameS.trim())) {
667
668                                                String msg = "'"+ s1 + "' (position " + posSeq + ") does not align with '" + a1+ "' (position " + posAtom + "), should be: " + s + " : " + a;
669
670                                                if ( s1.getType().equals(HetatomImpl.type) && a1.getType().equals(HetatomImpl.type)){
671                                                        logger.info(msg + ". They seem to be hetatoms, so ignoring mismatch.");
672                                                }
673                                                else {
674                                                        logger.warn(msg + ". This could be a problem because they aren't both hetatoms");
675                                                }
676
677                                        }
678
679                                        // do the actual replacing of the SEQRES group with the ATOM group
680                                        //                                      if ( s1.getChain().getChainID().equals("A")) {
681                                        //                                              System.out.println(" setting " + posSeq +" " + a1);
682                                        //                                      }
683                                        seqResGroups.set(seqresIndexPosition.get(posSeq),a1);
684                                        noMatchFound = false;
685                                }
686                        }
687
688
689                // now we merge the two chains into one
690                // the Groups that can be aligned are now pointing to the
691                // groups in the Atom records.
692                if (  noMatchFound) {
693
694                        logger.debug("no alignment found!");
695                }
696                return noMatchFound;
697
698        }
699
700        private boolean mapDNAChains(List<Group> seqResGroups, List<Group> atomRes,
701                        SequencePair<Sequence<NucleotideCompound>, NucleotideCompound> pair,
702                        Map<Integer,Integer> seqresIndexPosition,
703                        Map<Integer,Integer> atomIndexPosition)   {
704
705
706                // at the present stage the future seqREs are still stored as Atom groups in the seqRes parent...
707
708
709                int aligLength = pair.getLength();
710
711                //              System.out.println("sequence length seqres:");
712                //              System.out.println(seqresIndexPosition.keySet().size());
713                //              System.out.println("alignment length: " + aligLength);
714                // System.out.println(gapSymbol.getName());
715
716                // make sure we actually find an alignment
717                boolean noMatchFound = true;
718
719                Compound gapSymbol =  DNACompoundSet.getDNACompoundSet().getCompoundForString("-");
720
721                mainLoop:
722                        for (int i = 1; i <= aligLength ; i++) {
723
724                                Compound s =  pair.getCompoundAt(1, i);
725                                Compound a =  pair.getCompoundAt(2,i);
726
727                                // alignment is using internal index start at 1...
728                                int posSeq = pair.getIndexInQueryAt(i)  -1;
729                                int posAtom = pair.getIndexInTargetAt(i) -1;
730
731                                if (  s.equals(gapSymbol) || a.equals(gapSymbol)){
732                                        continue;
733                                }
734
735                                if ( s.equals(a)){
736
737                                        // the atom record can be aligned to the SeqRes record!
738                                        // replace the SeqRes group with the Atom group!
739
740                                        Group s1 = seqResGroups.get(seqresIndexPosition.get(posSeq));
741                                        Group a1 = atomRes.get(atomIndexPosition.get(posAtom));
742
743                                        if ( s1 == null || a1 == null){
744                                                /// can't map this position...
745                                                logger.warn("can't map " + i + ":" + s + " " + posSeq +" " + s1 + " atom: " + posAtom + " " + a1 );
746                                                continue mainLoop;
747                                        }
748
749                                        // need to trim the names to allow matching e.g in
750                                        // pdb1b2m
751                                        String pdbNameS = s1.getPDBName();
752                                        String pdbNameA = a1.getPDBName();
753                                        if ( pdbNameS == null || pdbNameA == null ){
754                                                logger.warn("null value found for group.getPDBName() at " + posSeq + " when trying to align " + s1 + " and " + a1 + " " + posAtom);
755                                                logger.warn("ATOM and SEQRES sequences will not be aligned.");
756                                        }
757                                        if (! pdbNameA.equals(pdbNameS)){
758                                                if ( ! pdbNameA.trim().equals(pdbNameS.trim())) {
759                                                        logger.info(s1 + " " + posSeq + " does not align with " + a1+ " " + posAtom + " should be: " + s + " : " + a);
760                                                        if ( s1.getType().equals(HetatomImpl.type) && a1.getType().equals(HetatomImpl.type)){
761                                                                logger.info("they seem to be hetatoms, so ignoring mismatch.");
762                                                        }
763                                                        else {
764                                                                //  System.exit(0);// for debug only
765                                                                //System.out.println(lst1.seqString());
766                                                                //System.out.println(lst2.seqString());
767                                                                logger.warn("Could not match residues " + s1 + " " + a1);
768                                                        }
769
770                                                }
771                                        }
772
773                                        // do the actual replacing of the SEQRES group with the ATOM group
774                                        //                                      if ( s1.getChain().getChainID().equals("A")) {
775                                        //                                              System.out.println(" setting " + posSeq +" " + a1);
776                                        //                                      }
777                                        seqResGroups.set(seqresIndexPosition.get(posSeq),a1);
778                                        noMatchFound = false;
779                                }
780                        }
781
782
783                // now we merge the two chains into one
784                // the Groups that can be aligned are now pointing to the
785                // groups in the Atom records.
786                if (  noMatchFound) {
787
788                        logger.debug("no alignment found!");
789                }
790                return noMatchFound;
791
792        }
793
794        /**
795         * Storing unaligned SEQRES groups in a Structure.
796         * @param structure
797         * @param seqResChains
798         */
799        public static void storeUnAlignedSeqRes(Structure structure, List<Chain> seqResChains, boolean headerOnly) {
800
801                for (int i = 0; i < structure.nrModels(); i++) {
802                        List<Chain> atomChains   = structure.getModel(i);
803
804                        for (Chain seqRes: seqResChains){
805                                Chain atomRes;
806
807                                if (headerOnly) {
808                                        // In header-only mode skip ATOM records.
809                                        // Here we store chains with SEQRES instead of AtomGroups.
810                                        seqRes.setSeqResGroups(seqRes.getAtomGroups());
811                                        seqRes.setAtomGroups(new ArrayList<Group>()); // clear out the atom groups.
812                                        atomChains.add(seqRes);
813                                } else {
814                                        // Otherwise, we find a chain with AtomGroups
815                                        // and set this as SEQRES groups.
816                                        atomRes = SeqRes2AtomAligner.getMatchingAtomRes(seqRes,atomChains);
817                                        if ( atomRes != null)
818                                                atomRes.setSeqResGroups(seqRes.getAtomGroups());
819                                        else
820                                                logger.warn("Could not find atom records for chain " + seqRes.getChainID());
821                                }
822                        }
823                        if (headerOnly) {
824                                structure.setChains(i, atomChains);
825                        }
826                }
827        }
828}