001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 25.04.2004
021 * @author Andreas Prlic
022 *
023 */
024package org.biojava.nbio.structure;
025
026import org.biojava.nbio.core.sequence.template.Sequence;
027import org.biojava.nbio.structure.io.FileParsingParameters;
028import org.biojava.nbio.structure.io.PDBFileReader;
029
030import java.util.List;
031
032/**
033 * <p>
034 * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file.
035 * A chain consists out of a list of {@link Group} objects. A Group can either be
036 * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}.
037 * </p>
038 *
039 * <p>
040 * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file.
041 * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them.
042 * The SEQRES sequence can be accessed via  {@link #getSeqResGroups()} and the
043 * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed
044 * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D()
045 *  </p>
046 *
047 * @author Andreas Prlic
048 * @version %I% %G%
049 * @since 1.4
050 */
051public interface Chain {
052
053        /** returns an identical copy of this Chain.
054         * @return  an identical copy of this Chain
055         */
056        public Object clone();
057
058        /** add a group to the list of ATOM record group of this chain.
059         * To add SEQRES records a more complex alignment between ATOM and SEQRES residues
060         * is required, please see SeqRes2AtomAligner for more details on that.
061         * @param group  a Group object
062         */
063        public void addGroup(Group group);
064
065        /** Get the ID used by Hibernate.
066         *
067         * @return the ID used by Hibernate
068         * @see #setId(Long)
069         */
070        public Long getId() ;
071
072        /** Set the ID used by Hibernate.
073         *
074         * @param id assigned by Hibernate
075         * @see #getId()
076         */
077        public void setId(Long id) ;
078
079
080        /**
081         * Return the Group at given position,
082         * from within Groups with observed density in the chain, i.e.
083         * those with coordinates in ATOM and HETATMS (including waters) records.
084         * @param position  an int
085         * @return a Group object
086         * @see #getAtomLength()
087         * @see #getAtomGroups()
088         * @see #getSeqResGroup(int)
089         */
090        public Group getAtomGroup (int position);
091
092        /**
093         * Return the Group at given position,
094         * from within groups in the SEQRES records of the chain, i.e.
095         * the aminoacids/nucleotides in the construct.
096         * @param position  an int
097         * @return a Group object
098         * @see #getSeqResLength()
099         * @see #getSeqResGroups()
100         * @see #getAtomGroup(int)
101         */
102        public Group getSeqResGroup (int position);
103
104
105        /**
106         * Return all Groups with observed density in the chain, i.e.
107         * those with coordinates in ATOM and HETATMS (including waters) records.
108         *
109         * @return a List object representing the Groups of this Chain.
110         * @see #setAtomGroups(List)
111         * @see #getAtomLength()
112         * @see #getSeqResGroups()
113         */
114        public List<Group> getAtomGroups();
115
116        /**
117         * Set all Groups with observed density in the chain, i.e.
118         * those with coordinates in ATOM and HETATMs (including waters) records.
119         * @param groups a List object representing the Groups of this Chain.
120         * @see #getAtomGroups()
121         */
122        public void setAtomGroups(List<Group> groups);
123
124        /**
125         * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID},
126         * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
127         * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then
128         * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}.
129         * @param type  GroupType
130         * @return a List object
131         * @see #setAtomGroups(List)
132         */
133        public List<Group> getAtomGroups (GroupType type);
134
135
136        /**
137         * Get a group by its PDB residue numbering. If the PDB residue number is not known,
138         * throws a StructureException.
139         *
140         * @param resNum the PDB residue number of the group
141         * @return the matching group
142         * @throws StructureException
143         */
144        public Group getGroupByPDB(ResidueNumber resNum) throws StructureException;
145
146        /** Get all groups that are located between two PDB residue numbers.
147         *
148         * @param pdbresnumStart PDB residue number of start
149         * @param pdbresnumEnd PDB residue number of end
150         * @return Groups in between. or throws a StructureException if either start or end can not be found,
151         * @throws StructureException
152         */
153        public Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException;
154
155
156        /** Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB
157         * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range
158         * of groups as specified by the DBREF records - these frequently are rather inaccurate.
159         *
160         *
161         * @param pdbresnumStart PDB residue number of start
162         * @param pdbresnumEnd PDB residue number of end
163         * @param ignoreMissing ignore missing groups in this range.
164         * @return Groups in between. or throws a StructureException if either start or end can not be found,
165         * @throws StructureException
166         *
167         */
168        public Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException;
169
170
171        /**
172         * Returns the number of Groups with observed density in the chain, i.e.
173         * those with coordinates in ATOM and HETATMs (including waters) records
174         *
175         * @return the length
176         * @see #getAtomGroup(int)
177         * @see #getAtomGroups()
178         * @see #getSeqResLength())
179         */
180        public int getAtomLength();
181
182        /**
183         * Returns the number of groups in the SEQRES records of the chain, i.e.
184         * the number of aminoacids/nucleotides in the construct
185         *
186         * @return the length
187         * @see #getSeqResGroup(int)
188         * @see #getSeqResGroups()
189         * @see #getAtomLength()
190         */
191        public int getSeqResLength();
192
193        /**
194         * Sets the Compound
195         * @param compound the Compound
196         * @see #getCompound()
197        */
198        public void setCompound(Compound compound);
199
200        /**
201         * Returns the Compound for this chain.
202         *
203         * @return the Compound object
204         * @see #setCompound(Compound)
205         */
206        public Compound getCompound();
207
208        /**
209         * Sets the name of this chain (Chain id in PDB file ).
210         * @param name  a String specifying the name value
211         * @see #getChainID()
212         */
213        public void setChainID(String name);
214
215
216
217        /**
218         * Gets the name of this chain (Chain id in PDB file ).
219         * @return a String representing the name value
220         * @see #setChainID(String)
221         */
222        public String getChainID();
223
224
225        /**
226         * If available, returns the internal chain ID that is used in mmCIF files (asym_id), otherwise null
227         *
228         * @return String or null
229         * @since 3.0.5
230         */
231        public String getInternalChainID();
232
233        /**
234         * Sets the internal chain ID that is used in mmCif files
235         *
236         * @param internalChainID
237         * @since 3.0.5
238         */
239        public void setInternalChainID(String internalChainID);
240
241
242        @Override
243        public String toString();
244
245
246        /**
247         * Converts the SEQRES groups of a Chain to a Biojava Sequence object.
248         *
249         * @return the SEQRES groups of the Chain as a Sequence object.
250         */
251        public Sequence<?> getBJSequence()  ;
252
253        /**
254         * Returns the sequence of amino acids as it has been provided in the ATOM records.
255         * Non-standard residues will be present in the string only if the property
256         * {@value PDBFileReader#LOAD_CHEM_COMP_PROPERTY} has been set.
257         * @return amino acid sequence as string
258         * @see #getSeqResSequence()
259         */
260        public String getAtomSequence();
261
262        /**
263         * Returns the PDB SEQRES sequence as a one-letter sequence string.
264         * Non-standard residues are represented by an "X".
265         * @return one-letter PDB SEQRES sequence as string
266         * @see #getAtomSequence()
267         */
268        public String getSeqResSequence();
269
270        /**
271         * Sets the Swissprot id of this chain.
272         * @param sp_id  a String specifying the swissprot id value
273         * @see #getSwissprotId()
274         */
275        public void setSwissprotId(String sp_id);
276
277        /**
278         * Gets the Swissprot id of this chain.
279         * @return a String representing the swissprot id value
280         * @see #setSwissprotId(String sp_id)
281         */
282        public String getSwissprotId() ;
283
284
285        /**
286         * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID},
287         * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}.
288         * @param type  a GroupType
289         * @return an List object
290         * @see #setSeqResGroups(List)
291         */
292        public List<Group> getSeqResGroups (GroupType type);
293
294        /**
295         * Returns a list of all groups in SEQRES records of the chain, i.e.
296         * the aminoacids/nucleotides in the construct.
297         * @return a List of all Group objects of this chain
298         * @see #setSeqResGroups(List)
299         * @see #getSeqResLength()
300         * @see #getAtomGroups()
301         */
302        public List<Group> getSeqResGroups ();
303
304        /**
305         * Sets the list of SeqResGroups for this chain.
306         *
307         * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain.
308         * @see #getSeqResGroups()
309         */
310        public void setSeqResGroups(List<Group> seqResGroups);
311
312        /**
313         * Sets the back-reference to its parent Structure.
314         * @param parent the parent Structure object for this Chain
315         * @see #getStructure()
316         * @deprecated  use setStructure instead
317         *
318         */
319        @Deprecated
320        public void setParent(Structure parent) ;
321
322        /** Sets the back-reference to its parent Structure.
323         *
324         * @param parent
325         */
326
327        public void setStructure(Structure parent) ;
328
329        /**
330         * Returns the parent Structure of this chain.
331         *
332         * @return the parent Structure object
333         * @see #setStructure(Structure)
334         * @deprecated use getStructure(Structure) instead.
335         */
336        @Deprecated
337        public Structure getParent() ;
338
339
340        /**
341         * Returns the parent Structure of this chain.
342         *
343         * @return the parent Structure object
344         * @see #setStructure(Structure)
345         */
346        public Structure getStructure() ;
347
348        /**
349         * Gets all groups that are not polymer groups and that are not solvent groups.
350         * Will automatically fetch Chemical Component files from the PDB web site, even if
351         * {@link FileParsingParameters#setLoadChemCompInfo(boolean)} has not been set to true.
352         * Otherwise the Ligands could not correctly be identified.
353         * @return list of Groups that are ligands
354         */
355        public List<Group> getAtomLigands();
356
357        /**
358         * Convert this Chain to a String in PDB format
359         * @return
360         */
361        public String toPDB();
362
363        /**
364         * Convert this Chain to a String in mmCIF format
365         * @return
366         */
367        public String toMMCIF();
368
369
370        /** Set annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
371         *
372         * @param seqMisMatches
373         */
374        public void setSeqMisMatches(List<SeqMisMatch> seqMisMatches);
375
376        /** Get annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category
377         *
378         * @returns a list of sequence mismatches (or null if none found)
379         */
380        public List<SeqMisMatch> getSeqMisMatches();
381}