001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 25.04.2004
021 * @author Andreas Prlic
022 *
023 */
024package org.biojava.nbio.structure;
025
026import org.biojava.nbio.core.sequence.template.Sequence;
027import org.biojava.nbio.structure.chem.ChemComp;
028
029import java.io.Serializable;
030import java.util.List;
031
032/**
033 * <p>
034 * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file.
035 * A chain consists of a list of {@link Group} objects. A Group can either be
036 * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}.
037 * </p>
038 *
039 * <p>
040 * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file.
041 * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them.
042 * The SEQRES sequence can be accessed via  {@link #getSeqResGroups()} and the
043 * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed
044 * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D()
045 *  </p>
046 *
047 * @author Andreas Prlic
048 * @version %I% %G%
049 * @since 1.4
050 */
051public interface Chain extends Serializable {
052
053        /**
054         * Returns an identical copy of this Chain.
055         * @return  an identical copy of this Chain
056         */
057        Object clone();
058
059        /**
060         * Add a group to the list of ATOM record group of this chain.
061         * To add SEQRES records a more complex alignment between ATOM and SEQRES residues
062         * is required, please see SeqRes2AtomAligner for more details on that.
063         * @param group  a Group object
064         */
065        void addGroup(Group group);
066
067        /**
068         * Get the 'private' asymId (internal chain IDs in mmCif) for this chain.
069         *
070         * @return the asymId
071         * @see #setId(String)
072         * @see #getName()
073         */
074        String getId() ;
075
076
077        /**
078         * Set the 'private' asymId (internal chain IDs in mmCif) for this chain.
079         *
080         * @param asymId the internal chain Id
081         */
082        void setId(String asymId) ;
083
084
085        /**
086         * Set the 'public' authId (chain ID in PDB file)
087         *
088         * @param authId the 'public' authId (chain ID in PDB file)
089         * @see #getId()
090         */
091        void setName(String authId);
092
093        /**
094         * Get the 'public' authId (chain ID in PDB file)
095         *
096         * @return the authId for this chain.
097         * @see #getId()
098         */
099        String getName();
100
101
102        /**
103         * Return the Group at given position,
104         * from within Groups with observed density in the chain, i.e.
105         * those with coordinates in ATOM and HETATMS (including waters) records.
106         * @param position  an int
107         * @return a Group object
108         * @see #getAtomLength()
109         * @see #getAtomGroups()
110         * @see #getSeqResGroup(int)
111         */
112        Group getAtomGroup (int position);
113
114        /**
115         * Return the Group at given position,
116         * from within groups in the SEQRES records of the chain, i.e.
117         * the aminoacids/nucleotides in the construct.
118         * @param position  an int
119         * @return a Group object
120         * @see #getSeqResLength()
121         * @see #getSeqResGroups()
122         * @see #getAtomGroup(int)
123         */
124        Group getSeqResGroup (int position);
125
126
127        /**
128         * Return all Groups with observed density in the chain, i.e.
129         * those with coordinates in ATOM and HETATMS (including waters) records.
130         *
131         * @return a List object representing the Groups of this Chain.
132         * @see #setAtomGroups(List)
133         * @see #getAtomLength()
134         * @see #getSeqResGroups()
135         */
136        List<Group> getAtomGroups();
137
138        /**
139         * Set all Groups with observed density in the chain, i.e.
140         * those with coordinates in ATOM and HETATMs (including waters) records.
141         * @param groups a List object representing the Groups of this Chain.
142         * @see #getAtomGroups()
143         */
144        void setAtomGroups(List<Group> groups);
145
146        /**
147         * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID},
148         * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
149         * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then
150         * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}.
151         * @param type  GroupType
152         * @return a List object
153         * @see #setAtomGroups(List)
154         */
155        List<Group> getAtomGroups (GroupType type);
156
157
158        /**
159         * Get a group by its PDB residue numbering. If the PDB residue number is not known,
160         * throws a StructureException.
161         *
162         * @param resNum the PDB residue number of the group
163         * @return the matching group
164         * @throws StructureException
165         */
166        Group getGroupByPDB(ResidueNumber resNum) throws StructureException;
167
168        /**
169         * Get all groups that are located between two PDB residue numbers.
170         *
171         * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
172         * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
173         * @return Groups in between. or throws a StructureException if either start or end can not be found,
174         * @throws StructureException
175         */
176        Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException;
177
178
179        /**
180         * Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB
181         * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range
182         * of groups as specified by the DBREF records - these frequently are rather inaccurate.
183         *
184         *
185         * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start.
186         * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end.
187         * @param ignoreMissing ignore missing groups in this range.
188         * @return Groups in between. or throws a StructureException if either start or end can not be found,
189         * @throws StructureException
190         *
191         */
192        Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException;
193
194
195        /**
196         * Returns the number of Groups with observed density in the chain, i.e.
197         * those with coordinates in ATOM and HETATMs (including waters) records
198         *
199         * @return the length
200         * @see #getAtomGroup(int)
201         * @see #getAtomGroups()
202         * @see #getSeqResLength()
203         */
204        int getAtomLength();
205
206        /**
207         * Returns the number of groups in the SEQRES records of the chain, i.e.
208         * the number of aminoacids/nucleotides in the construct
209         *
210         * @return the length
211         * @see #getSeqResGroup(int)
212         * @see #getSeqResGroups()
213         * @see #getAtomLength()
214         */
215        int getSeqResLength();
216
217        /**
218         * Sets the Entity information
219         * @param entityInfo the EntityInfo
220         * @see #getEntityInfo()
221         */
222        void setEntityInfo(EntityInfo entityInfo);
223
224        /**
225         * Returns the EntityInfo for this chain.
226         *
227         * @return the EntityInfo object
228         * @see #setEntityInfo(EntityInfo)
229         */
230        EntityInfo getEntityInfo();
231
232        @Override
233        String toString();
234
235        /**
236         * Converts the SEQRES groups of a Chain to a Biojava Sequence object.
237         *
238         * @return the SEQRES groups of the Chain as a Sequence object.
239         */
240        Sequence<?> getBJSequence()  ;
241
242        /**
243         * Returns the sequence of amino acids as it has been provided in the ATOM records.
244         * @return amino acid sequence as string
245         * @see #getSeqResSequence()
246         */
247        String getAtomSequence();
248
249        /**
250         * Returns the PDB SEQRES sequence as a one-letter sequence string.
251         * Non-standard residues are represented by an "X".
252         * @return one-letter PDB SEQRES sequence as string
253         * @see #getAtomSequence()
254         */
255        String getSeqResSequence();
256
257        /**
258         * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID},
259         * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
260         * @param type  a GroupType
261         * @return an List object
262         * @see #setSeqResGroups(List)
263         */
264        List<Group> getSeqResGroups (GroupType type);
265
266        /**
267         * Returns a list of all groups in SEQRES records of the chain, i.e.
268         * the aminoacids/nucleotides in the construct.
269         * @return a List of all Group objects of this chain
270         * @see #setSeqResGroups(List)
271         * @see #getSeqResLength()
272         * @see #getAtomGroups()
273         */
274        List<Group> getSeqResGroups ();
275
276        /**
277         * Sets the list of SeqResGroups for this chain.
278         *
279         * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain.
280         * @see #getSeqResGroups()
281         */
282        void setSeqResGroups(List<Group> seqResGroups);
283
284        /**
285         * Sets the back-reference to its parent Structure.
286         *
287         * @param parent
288         */
289        void setStructure(Structure parent) ;
290
291        /**
292         * Returns the parent Structure of this chain.
293         *
294         * @return the parent Structure object
295         * @see #setStructure(Structure)
296         */
297        Structure getStructure() ;
298
299        /**
300         * Convert this Chain to a String in PDB format
301         * @return
302         */
303        String toPDB();
304
305        /**
306         * Convert this Chain to a String in mmCIF format
307         * @return
308         */
309        String toMMCIF();
310
311
312        /**
313         * Sets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
314         *
315         * @param seqMisMatches
316         */
317        void setSeqMisMatches(List<SeqMisMatch> seqMisMatches);
318
319        /**
320         * Gets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
321         *
322         * @return a list of sequence mismatches (or null if none found)
323         */
324        List<SeqMisMatch> getSeqMisMatches();
325
326        /**
327         * Returns the EntityType of this chain. Equivalent to getEntityInfo().getType()
328         * @return
329         * @see EntityType
330         */
331        EntityType getEntityType();
332
333        /**
334         * Tests if a chain is consisting of water molecules only
335         *
336         * @return true if there are only solvent molecules in this chain.
337         */
338        public boolean isWaterOnly();
339
340        /**
341         * Returns true if the given chain is composed of non-polymeric (including water) groups only.
342         *
343         * @return true if only non-polymeric groups in this chain.
344         */
345        public boolean isPureNonPolymer();
346
347        /**
348         * Get the predominant {@link GroupType} for a given Chain, following these
349         * rules:
350         * <ul>
351         * <li>if the ratio of number of residues of a certain
352         * {@link GroupType} to total non-water residues is above the threshold
353         * {@value org.biojava.nbio.structure.StructureTools#RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is
354         * returned</li> <li>if there is no {@link GroupType} that is above the
355         * threshold then the {@link GroupType} with most members is chosen, logging
356         * it</li>
357         * </ul>
358         * <p>
359         * See also {@link ChemComp#getPolymerType()} and
360         * {@link ChemComp#getResidueType()} which follow the PDB chemical component
361         * dictionary and provide a much more accurate description of groups and
362         * their linking.
363         *
364         * @return the predominant group type
365         */
366        GroupType getPredominantGroupType();
367
368        /**
369         * Tell whether given chain is a protein chain
370         *
371         * @return true if protein, false if nucleotide or ligand
372         * @see #getPredominantGroupType()
373         */
374        boolean isProtein();
375
376        /**
377         * Tell whether given chain is DNA or RNA
378         *
379         * @return true if nucleic acid, false if protein or ligand
380         * @see #getPredominantGroupType()
381         */
382        boolean isNucleicAcid();
383}