001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 25.04.2004
021 * @author Andreas Prlic
022 *
023 */
024package org.biojava.nbio.structure;
025
026import org.biojava.nbio.core.sequence.template.Sequence;
027import org.biojava.nbio.structure.io.FileParsingParameters;
028import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
029
030import java.io.Serializable;
031import java.util.List;
032
033/**
034 * <p>
035 * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file.
036 * A chain consists out of a list of {@link Group} objects. A Group can either be
037 * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}.
038 * </p>
039 *
040 * <p>
041 * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file.
042 * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them.
043 * The SEQRES sequence can be accessed via  {@link #getSeqResGroups()} and the
044 * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed
045 * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D()
046 *  </p>
047 *
048 * @author Andreas Prlic
049 * @version %I% %G%
050 * @since 1.4
051 */
052public interface Chain extends Serializable {
053
054        /** returns an identical copy of this Chain.
055         * @return  an identical copy of this Chain
056         */
057        Object clone();
058
059        /** add a group to the list of ATOM record group of this chain.
060         * To add SEQRES records a more complex alignment between ATOM and SEQRES residues
061         * is required, please see SeqRes2AtomAligner for more details on that.
062         * @param group  a Group object
063         */
064        void addGroup(Group group);
065
066        /** Get the 'private' asymId (internal chain IDs in mmCif) for this chain.
067         *
068         * @return the asymId
069         * @see #setId(String)
070         * @see #getName()
071         */
072        String getId() ;
073
074
075        /**
076         * Set the 'private' asymId (internal chain IDs in mmCif) for this chain.
077         *
078         * @param asymId the internal chain Id
079         */
080        void setId(String asymId) ;
081
082
083        /**
084         * Set the 'public' authId (chain ID in PDB file)
085         *
086         * @param authId the 'public' authId (chain ID in PDB file)
087         * @see #getId()
088         */
089        void setName(String authId);
090
091        /**
092         * Get the 'public' authId (chain ID in PDB file)
093         *
094         * @return the authId for this chain.
095         * @see #getId()
096         */
097        String getName();
098
099
100        /**
101         * Return the Group at given position,
102         * from within Groups with observed density in the chain, i.e.
103         * those with coordinates in ATOM and HETATMS (including waters) records.
104         * @param position  an int
105         * @return a Group object
106         * @see #getAtomLength()
107         * @see #getAtomGroups()
108         * @see #getSeqResGroup(int)
109         */
110        Group getAtomGroup (int position);
111
112        /**
113         * Return the Group at given position,
114         * from within groups in the SEQRES records of the chain, i.e.
115         * the aminoacids/nucleotides in the construct.
116         * @param position  an int
117         * @return a Group object
118         * @see #getSeqResLength()
119         * @see #getSeqResGroups()
120         * @see #getAtomGroup(int)
121         */
122        Group getSeqResGroup (int position);
123
124
125        /**
126         * Return all Groups with observed density in the chain, i.e.
127         * those with coordinates in ATOM and HETATMS (including waters) records.
128         *
129         * @return a List object representing the Groups of this Chain.
130         * @see #setAtomGroups(List)
131         * @see #getAtomLength()
132         * @see #getSeqResGroups()
133         */
134        List<Group> getAtomGroups();
135
136        /**
137         * Set all Groups with observed density in the chain, i.e.
138         * those with coordinates in ATOM and HETATMs (including waters) records.
139         * @param groups a List object representing the Groups of this Chain.
140         * @see #getAtomGroups()
141         */
142        void setAtomGroups(List<Group> groups);
143
144        /**
145         * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID},
146         * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
147         * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then
148         * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}.
149         * @param type  GroupType
150         * @return a List object
151         * @see #setAtomGroups(List)
152         */
153        List<Group> getAtomGroups (GroupType type);
154
155
156        /**
157         * Get a group by its PDB residue numbering. If the PDB residue number is not known,
158         * throws a StructureException.
159         *
160         * @param resNum the PDB residue number of the group
161         * @return the matching group
162         * @throws StructureException
163         */
164        Group getGroupByPDB(ResidueNumber resNum) throws StructureException;
165
166        /**
167         * Get all groups that are located between two PDB residue numbers.
168         *
169         * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
170         * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
171         * @return Groups in between. or throws a StructureException if either start or end can not be found,
172         * @throws StructureException
173         */
174        Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException;
175
176
177        /**
178         * Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB
179         * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range
180         * of groups as specified by the DBREF records - these frequently are rather inaccurate.
181         *
182         *
183         * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
184         * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
185         * @param ignoreMissing ignore missing groups in this range.
186         * @return Groups in between. or throws a StructureException if either start or end can not be found,
187         * @throws StructureException
188         *
189         */
190        Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException;
191
192
193        /**
194         * Returns the number of Groups with observed density in the chain, i.e.
195         * those with coordinates in ATOM and HETATMs (including waters) records
196         *
197         * @return the length
198         * @see #getAtomGroup(int)
199         * @see #getAtomGroups()
200         * @see #getSeqResLength())
201         */
202        int getAtomLength();
203
204        /**
205         * Returns the number of groups in the SEQRES records of the chain, i.e.
206         * the number of aminoacids/nucleotides in the construct
207         *
208         * @return the length
209         * @see #getSeqResGroup(int)
210         * @see #getSeqResGroups()
211         * @see #getAtomLength()
212         */
213        int getSeqResLength();
214
215        /**
216         * Sets the Entity information
217         * @param entityInfo the EntityInfo
218         * @see #getEntityInfo()
219         */
220        void setEntityInfo(EntityInfo entityInfo);
221
222        /**
223         * Returns the EntityInfo for this chain.
224         *
225         * @return the EntityInfo object
226         * @see #setEntityInfo(EntityInfo)
227         */
228        EntityInfo getEntityInfo();
229
230        /**
231         * Sets the 'private' asymId of this chain (Chain id in PDB file ).
232         * @param asymId  a String specifying the name value
233         * @see #getChainID()
234         * @deprecated  use {@link #setId(String asymId)} instead
235         */
236        @Deprecated
237        void setChainID(String asymId);
238
239
240
241        /**
242         * Gets the 'private' asymId of this chain.
243         * @return a String representing the name value
244         * @see #setChainID(String)
245         * @deprecated  use getId() instead
246         */
247        @Deprecated
248        String getChainID();
249
250
251        /**
252         * If available, returns the internal chain ID that is used in mmCIF files (asym_id), otherwise null
253         *
254         * @return String or null
255         * @since 3.0.5
256         * @deprecated  use {@link #getId()} instead
257         */
258        String getInternalChainID();
259
260        /**
261         * Sets the internal chain ID that is used in mmCif files
262         *
263         * @param internalChainID
264         * @since 3.0.5
265         * @deprecated use {@link #setId()} instead
266         */
267        void setInternalChainID(String internalChainID);
268
269
270        @Override
271        String toString();
272
273
274        /**
275         * Converts the SEQRES groups of a Chain to a Biojava Sequence object.
276         *
277         * @return the SEQRES groups of the Chain as a Sequence object.
278         */
279        Sequence<?> getBJSequence()  ;
280
281        /**
282         * Returns the sequence of amino acids as it has been provided in the ATOM records.
283         * Non-standard residues will be present in the string only if the property
284         * {@value org.biojava.nbio.structure.io.PDBFileReader#LOAD_CHEM_COMP_PROPERTY} has been set.
285         * @return amino acid sequence as string
286         * @see #getSeqResSequence()
287         */
288        String getAtomSequence();
289
290        /**
291         * Returns the PDB SEQRES sequence as a one-letter sequence string.
292         * Non-standard residues are represented by an "X".
293         * @return one-letter PDB SEQRES sequence as string
294         * @see #getAtomSequence()
295         */
296        String getSeqResSequence();
297
298        /**
299         * Sets the Swissprot id of this chain.
300         * @param sp_id  a String specifying the swissprot id value
301         * @see #getSwissprotId()
302         */
303        void setSwissprotId(String sp_id);
304
305        /**
306         * Gets the Swissprot id of this chain.
307         * @return a String representing the swissprot id value
308         * @see #setSwissprotId(String sp_id)
309         */
310        String getSwissprotId() ;
311
312
313        /**
314         * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID},
315         * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
316         * @param type  a GroupType
317         * @return an List object
318         * @see #setSeqResGroups(List)
319         */
320        List<Group> getSeqResGroups (GroupType type);
321
322        /**
323         * Returns a list of all groups in SEQRES records of the chain, i.e.
324         * the aminoacids/nucleotides in the construct.
325         * @return a List of all Group objects of this chain
326         * @see #setSeqResGroups(List)
327         * @see #getSeqResLength()
328         * @see #getAtomGroups()
329         */
330        List<Group> getSeqResGroups ();
331
332        /**
333         * Sets the list of SeqResGroups for this chain.
334         *
335         * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain.
336         * @see #getSeqResGroups()
337         */
338        void setSeqResGroups(List<Group> seqResGroups);
339
340        /**
341         * Sets the back-reference to its parent Structure.
342         * @param parent the parent Structure object for this Chain
343         * @see #getStructure()
344         * @deprecated  use setStructure instead
345         *
346         */
347        @Deprecated
348         void setParent(Structure parent) ;
349
350        /**
351         * Sets the back-reference to its parent Structure.
352         *
353         * @param parent
354         */
355        void setStructure(Structure parent) ;
356
357        /**
358         * Returns the parent Structure of this chain.
359         *
360         * @return the parent Structure object
361         * @see #setStructure(Structure)
362         * @deprecated use getStructure(Structure) instead.
363         */
364        @Deprecated
365        Structure getParent() ;
366
367
368        /**
369         * Returns the parent Structure of this chain.
370         *
371         * @return the parent Structure object
372         * @see #setStructure(Structure)
373         */
374        Structure getStructure() ;
375
376        /**
377         * Gets all groups that are not polymer groups and that are not solvent groups.
378         * Will automatically fetch Chemical Component files from the PDB web site, even if
379         * {@link FileParsingParameters#setLoadChemCompInfo(boolean)} has not been set to true.
380         * Otherwise the Ligands could not correctly be identified.
381         * @return list of Groups that are ligands
382         * @deprecated since biojava 5.0 this does not apply anymore. Chains contain either
383         * polymeric groups or non-polymeric groups
384         */
385        @Deprecated
386        List<Group> getAtomLigands();
387
388        /**
389         * Convert this Chain to a String in PDB format
390         * @return
391         */
392        String toPDB();
393
394        /**
395         * Convert this Chain to a String in mmCIF format
396         * @return
397         */
398        String toMMCIF();
399
400
401        /**
402         * Sets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
403         *
404         * @param seqMisMatches
405         */
406        void setSeqMisMatches(List<SeqMisMatch> seqMisMatches);
407
408        /**
409         * Gets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
410         *
411         * @returns a list of sequence mismatches (or null if none found)
412         */
413        List<SeqMisMatch> getSeqMisMatches();
414
415        /**
416         * Returns the EntityType of this chain. Equivalent to getEntityInfo().getType()
417         * @return
418         * @see EntityType
419         */
420        EntityType getEntityType();
421
422        /** Tests if a chain is consisting of water molecules only
423         *
424         * @return true if there are only solvent molecules in this chain.
425         */
426        public boolean isWaterOnly();
427
428        /**  Returns true if the given chain is composed of non-polymeric (including water) groups only.
429         *
430         * @return true if only non-polymeric groups in this chain.
431         */
432        public boolean isPureNonPolymer();
433
434        /**
435         * Get the predominant {@link GroupType} for a given Chain, following these
436         * rules: <li>if the ratio of number of residues of a certain
437         * {@link GroupType} to total non-water residues is above the threshold
438         * {@value #org.biojava.nbio.structure.StructureTools.RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is
439         * returned</li> <li>if there is no {@link GroupType} that is above the
440         * threshold then the {@link GroupType} with most members is chosen, logging
441         * it</li>
442         * <p>
443         * See also {@link ChemComp#getPolymerType()} and
444         * {@link ChemComp#getResidueType()} which follow the PDB chemical component
445         * dictionary and provide a much more accurate description of groups and
446         * their linking.
447         * </p>
448         *
449         * @return
450         */
451        public GroupType getPredominantGroupType();
452
453        /**
454         * Tell whether given chain is a protein chain
455         *
456         * @return true if protein, false if nucleotide or ligand
457         * @see #getPredominantGroupType()
458         */
459        public  boolean isProtein();
460
461        /**
462         * Tell whether given chain is DNA or RNA
463         *
464         * @return true if nucleic acid, false if protein or ligand
465         * @see #getPredominantGroupType()
466         */
467        public  boolean isNucleicAcid();
468}