001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 25.04.2004 021 * @author Andreas Prlic 022 * 023 */ 024package org.biojava.nbio.structure; 025 026import org.biojava.nbio.core.sequence.template.Sequence; 027import org.biojava.nbio.structure.io.FileParsingParameters; 028import org.biojava.nbio.structure.io.PDBFileReader; 029 030import java.util.List; 031 032/** 033 * <p> 034 * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file. 035 * A chain consists out of a list of {@link Group} objects. A Group can either be 036 * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}. 037 * </p> 038 * 039 * <p> 040 * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file. 041 * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them. 042 * The SEQRES sequence can be accessed via {@link #getSeqResGroups()} and the 043 * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed 044 * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D() 045 * </p> 046 * 047 * @author Andreas Prlic 048 * @version %I% %G% 049 * @since 1.4 050 */ 051public interface Chain { 052 053 /** returns an identical copy of this Chain. 054 * @return an identical copy of this Chain 055 */ 056 public Object clone(); 057 058 /** add a group to the list of ATOM record group of this chain. 059 * To add SEQRES records a more complex alignment between ATOM and SEQRES residues 060 * is required, please see SeqRes2AtomAligner for more details on that. 061 * @param group a Group object 062 */ 063 public void addGroup(Group group); 064 065 /** Get the ID used by Hibernate. 066 * 067 * @return the ID used by Hibernate 068 * @see #setId(Long) 069 */ 070 public Long getId() ; 071 072 /** Set the ID used by Hibernate. 073 * 074 * @param id assigned by Hibernate 075 * @see #getId() 076 */ 077 public void setId(Long id) ; 078 079 080 /** 081 * Return the Group at given position, 082 * from within Groups with observed density in the chain, i.e. 083 * those with coordinates in ATOM and HETATMS (including waters) records. 084 * @param position an int 085 * @return a Group object 086 * @see #getAtomLength() 087 * @see #getAtomGroups() 088 * @see #getSeqResGroup(int) 089 */ 090 public Group getAtomGroup (int position); 091 092 /** 093 * Return the Group at given position, 094 * from within groups in the SEQRES records of the chain, i.e. 095 * the aminoacids/nucleotides in the construct. 096 * @param position an int 097 * @return a Group object 098 * @see #getSeqResLength() 099 * @see #getSeqResGroups() 100 * @see #getAtomGroup(int) 101 */ 102 public Group getSeqResGroup (int position); 103 104 105 /** 106 * Return all Groups with observed density in the chain, i.e. 107 * those with coordinates in ATOM and HETATMS (including waters) records. 108 * 109 * @return a List object representing the Groups of this Chain. 110 * @see #setAtomGroups(List) 111 * @see #getAtomLength() 112 * @see #getSeqResGroups() 113 */ 114 public List<Group> getAtomGroups(); 115 116 /** 117 * Set all Groups with observed density in the chain, i.e. 118 * those with coordinates in ATOM and HETATMs (including waters) records. 119 * @param groups a List object representing the Groups of this Chain. 120 * @see #getAtomGroups() 121 */ 122 public void setAtomGroups(List<Group> groups); 123 124 /** 125 * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID}, 126 * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. 127 * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then 128 * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}. 129 * @param type GroupType 130 * @return a List object 131 * @see #setAtomGroups(List) 132 */ 133 public List<Group> getAtomGroups (GroupType type); 134 135 136 /** 137 * Get a group by its PDB residue numbering. If the PDB residue number is not known, 138 * throws a StructureException. 139 * 140 * @param resNum the PDB residue number of the group 141 * @return the matching group 142 * @throws StructureException 143 */ 144 public Group getGroupByPDB(ResidueNumber resNum) throws StructureException; 145 146 /** Get all groups that are located between two PDB residue numbers. 147 * 148 * @param pdbresnumStart PDB residue number of start 149 * @param pdbresnumEnd PDB residue number of end 150 * @return Groups in between. or throws a StructureException if either start or end can not be found, 151 * @throws StructureException 152 */ 153 public Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException; 154 155 156 /** Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB 157 * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range 158 * of groups as specified by the DBREF records - these frequently are rather inaccurate. 159 * 160 * 161 * @param pdbresnumStart PDB residue number of start 162 * @param pdbresnumEnd PDB residue number of end 163 * @param ignoreMissing ignore missing groups in this range. 164 * @return Groups in between. or throws a StructureException if either start or end can not be found, 165 * @throws StructureException 166 * 167 */ 168 public Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException; 169 170 171 /** 172 * Returns the number of Groups with observed density in the chain, i.e. 173 * those with coordinates in ATOM and HETATMs (including waters) records 174 * 175 * @return the length 176 * @see #getAtomGroup(int) 177 * @see #getAtomGroups() 178 * @see #getSeqResLength()) 179 */ 180 public int getAtomLength(); 181 182 /** 183 * Returns the number of groups in the SEQRES records of the chain, i.e. 184 * the number of aminoacids/nucleotides in the construct 185 * 186 * @return the length 187 * @see #getSeqResGroup(int) 188 * @see #getSeqResGroups() 189 * @see #getAtomLength() 190 */ 191 public int getSeqResLength(); 192 193 /** 194 * Sets the Compound 195 * @param compound the Compound 196 * @see #getCompound() 197 */ 198 public void setCompound(Compound compound); 199 200 /** 201 * Returns the Compound for this chain. 202 * 203 * @return the Compound object 204 * @see #setCompound(Compound) 205 */ 206 public Compound getCompound(); 207 208 /** 209 * Sets the name of this chain (Chain id in PDB file ). 210 * @param name a String specifying the name value 211 * @see #getChainID() 212 */ 213 public void setChainID(String name); 214 215 216 217 /** 218 * Gets the name of this chain (Chain id in PDB file ). 219 * @return a String representing the name value 220 * @see #setChainID(String) 221 */ 222 public String getChainID(); 223 224 225 /** 226 * If available, returns the internal chain ID that is used in mmCIF files (asym_id), otherwise null 227 * 228 * @return String or null 229 * @since 3.0.5 230 */ 231 public String getInternalChainID(); 232 233 /** 234 * Sets the internal chain ID that is used in mmCif files 235 * 236 * @param internalChainID 237 * @since 3.0.5 238 */ 239 public void setInternalChainID(String internalChainID); 240 241 242 @Override 243 public String toString(); 244 245 246 /** 247 * Converts the SEQRES groups of a Chain to a Biojava Sequence object. 248 * 249 * @return the SEQRES groups of the Chain as a Sequence object. 250 */ 251 public Sequence<?> getBJSequence() ; 252 253 /** 254 * Returns the sequence of amino acids as it has been provided in the ATOM records. 255 * Non-standard residues will be present in the string only if the property 256 * {@value PDBFileReader#LOAD_CHEM_COMP_PROPERTY} has been set. 257 * @return amino acid sequence as string 258 * @see #getSeqResSequence() 259 */ 260 public String getAtomSequence(); 261 262 /** 263 * Returns the PDB SEQRES sequence as a one-letter sequence string. 264 * Non-standard residues are represented by an "X". 265 * @return one-letter PDB SEQRES sequence as string 266 * @see #getAtomSequence() 267 */ 268 public String getSeqResSequence(); 269 270 /** 271 * Sets the Swissprot id of this chain. 272 * @param sp_id a String specifying the swissprot id value 273 * @see #getSwissprotId() 274 */ 275 public void setSwissprotId(String sp_id); 276 277 /** 278 * Gets the Swissprot id of this chain. 279 * @return a String representing the swissprot id value 280 * @see #setSwissprotId(String sp_id) 281 */ 282 public String getSwissprotId() ; 283 284 285 /** 286 * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID}, 287 * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. 288 * @param type a GroupType 289 * @return an List object 290 * @see #setSeqResGroups(List) 291 */ 292 public List<Group> getSeqResGroups (GroupType type); 293 294 /** 295 * Returns a list of all groups in SEQRES records of the chain, i.e. 296 * the aminoacids/nucleotides in the construct. 297 * @return a List of all Group objects of this chain 298 * @see #setSeqResGroups(List) 299 * @see #getSeqResLength() 300 * @see #getAtomGroups() 301 */ 302 public List<Group> getSeqResGroups (); 303 304 /** 305 * Sets the list of SeqResGroups for this chain. 306 * 307 * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain. 308 * @see #getSeqResGroups() 309 */ 310 public void setSeqResGroups(List<Group> seqResGroups); 311 312 /** 313 * Sets the back-reference to its parent Structure. 314 * @param parent the parent Structure object for this Chain 315 * @see #getStructure() 316 * @deprecated use setStructure instead 317 * 318 */ 319 @Deprecated 320 public void setParent(Structure parent) ; 321 322 /** Sets the back-reference to its parent Structure. 323 * 324 * @param parent 325 */ 326 327 public void setStructure(Structure parent) ; 328 329 /** 330 * Returns the parent Structure of this chain. 331 * 332 * @return the parent Structure object 333 * @see #setStructure(Structure) 334 * @deprecated use getStructure(Structure) instead. 335 */ 336 @Deprecated 337 public Structure getParent() ; 338 339 340 /** 341 * Returns the parent Structure of this chain. 342 * 343 * @return the parent Structure object 344 * @see #setStructure(Structure) 345 */ 346 public Structure getStructure() ; 347 348 /** 349 * Gets all groups that are not polymer groups and that are not solvent groups. 350 * Will automatically fetch Chemical Component files from the PDB web site, even if 351 * {@link FileParsingParameters#setLoadChemCompInfo(boolean)} has not been set to true. 352 * Otherwise the Ligands could not correctly be identified. 353 * @return list of Groups that are ligands 354 */ 355 public List<Group> getAtomLigands(); 356 357 /** 358 * Convert this Chain to a String in PDB format 359 * @return 360 */ 361 public String toPDB(); 362 363 /** 364 * Convert this Chain to a String in mmCIF format 365 * @return 366 */ 367 public String toMMCIF(); 368 369 370 /** Set annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category 371 * 372 * @param seqMisMatches 373 */ 374 public void setSeqMisMatches(List<SeqMisMatch> seqMisMatches); 375 376 /** Get annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category 377 * 378 * @returns a list of sequence mismatches (or null if none found) 379 */ 380 public List<SeqMisMatch> getSeqMisMatches(); 381}