001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 25.04.2004 021 * @author Andreas Prlic 022 * 023 */ 024package org.biojava.nbio.structure; 025 026import org.biojava.nbio.core.sequence.template.Sequence; 027import org.biojava.nbio.structure.io.FileParsingParameters; 028import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 029 030import java.io.Serializable; 031import java.util.List; 032 033/** 034 * <p> 035 * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file. 036 * A chain consists out of a list of {@link Group} objects. A Group can either be 037 * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}. 038 * </p> 039 * 040 * <p> 041 * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file. 042 * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them. 043 * The SEQRES sequence can be accessed via {@link #getSeqResGroups()} and the 044 * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed 045 * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D() 046 * </p> 047 * 048 * @author Andreas Prlic 049 * @version %I% %G% 050 * @since 1.4 051 */ 052public interface Chain extends Serializable { 053 054 /** returns an identical copy of this Chain. 055 * @return an identical copy of this Chain 056 */ 057 Object clone(); 058 059 /** add a group to the list of ATOM record group of this chain. 060 * To add SEQRES records a more complex alignment between ATOM and SEQRES residues 061 * is required, please see SeqRes2AtomAligner for more details on that. 062 * @param group a Group object 063 */ 064 void addGroup(Group group); 065 066 /** Get the 'private' asymId (internal chain IDs in mmCif) for this chain. 067 * 068 * @return the asymId 069 * @see #setId(String) 070 * @see #getName() 071 */ 072 String getId() ; 073 074 075 /** 076 * Set the 'private' asymId (internal chain IDs in mmCif) for this chain. 077 * 078 * @param asymId the internal chain Id 079 */ 080 void setId(String asymId) ; 081 082 083 /** 084 * Set the 'public' authId (chain ID in PDB file) 085 * 086 * @param authId the 'public' authId (chain ID in PDB file) 087 * @see #getId() 088 */ 089 void setName(String authId); 090 091 /** 092 * Get the 'public' authId (chain ID in PDB file) 093 * 094 * @return the authId for this chain. 095 * @see #getId() 096 */ 097 String getName(); 098 099 100 /** 101 * Return the Group at given position, 102 * from within Groups with observed density in the chain, i.e. 103 * those with coordinates in ATOM and HETATMS (including waters) records. 104 * @param position an int 105 * @return a Group object 106 * @see #getAtomLength() 107 * @see #getAtomGroups() 108 * @see #getSeqResGroup(int) 109 */ 110 Group getAtomGroup (int position); 111 112 /** 113 * Return the Group at given position, 114 * from within groups in the SEQRES records of the chain, i.e. 115 * the aminoacids/nucleotides in the construct. 116 * @param position an int 117 * @return a Group object 118 * @see #getSeqResLength() 119 * @see #getSeqResGroups() 120 * @see #getAtomGroup(int) 121 */ 122 Group getSeqResGroup (int position); 123 124 125 /** 126 * Return all Groups with observed density in the chain, i.e. 127 * those with coordinates in ATOM and HETATMS (including waters) records. 128 * 129 * @return a List object representing the Groups of this Chain. 130 * @see #setAtomGroups(List) 131 * @see #getAtomLength() 132 * @see #getSeqResGroups() 133 */ 134 List<Group> getAtomGroups(); 135 136 /** 137 * Set all Groups with observed density in the chain, i.e. 138 * those with coordinates in ATOM and HETATMs (including waters) records. 139 * @param groups a List object representing the Groups of this Chain. 140 * @see #getAtomGroups() 141 */ 142 void setAtomGroups(List<Group> groups); 143 144 /** 145 * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID}, 146 * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. 147 * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then 148 * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}. 149 * @param type GroupType 150 * @return a List object 151 * @see #setAtomGroups(List) 152 */ 153 List<Group> getAtomGroups (GroupType type); 154 155 156 /** 157 * Get a group by its PDB residue numbering. If the PDB residue number is not known, 158 * throws a StructureException. 159 * 160 * @param resNum the PDB residue number of the group 161 * @return the matching group 162 * @throws StructureException 163 */ 164 Group getGroupByPDB(ResidueNumber resNum) throws StructureException; 165 166 /** 167 * Get all groups that are located between two PDB residue numbers. 168 * 169 * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start. 170 * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end. 171 * @return Groups in between. or throws a StructureException if either start or end can not be found, 172 * @throws StructureException 173 */ 174 Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException; 175 176 177 /** 178 * Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB 179 * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range 180 * of groups as specified by the DBREF records - these frequently are rather inaccurate. 181 * 182 * 183 * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start. 184 * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end. 185 * @param ignoreMissing ignore missing groups in this range. 186 * @return Groups in between. or throws a StructureException if either start or end can not be found, 187 * @throws StructureException 188 * 189 */ 190 Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException; 191 192 193 /** 194 * Returns the number of Groups with observed density in the chain, i.e. 195 * those with coordinates in ATOM and HETATMs (including waters) records 196 * 197 * @return the length 198 * @see #getAtomGroup(int) 199 * @see #getAtomGroups() 200 * @see #getSeqResLength()) 201 */ 202 int getAtomLength(); 203 204 /** 205 * Returns the number of groups in the SEQRES records of the chain, i.e. 206 * the number of aminoacids/nucleotides in the construct 207 * 208 * @return the length 209 * @see #getSeqResGroup(int) 210 * @see #getSeqResGroups() 211 * @see #getAtomLength() 212 */ 213 int getSeqResLength(); 214 215 /** 216 * Sets the Entity information 217 * @param entityInfo the EntityInfo 218 * @see #getEntityInfo() 219 */ 220 void setEntityInfo(EntityInfo entityInfo); 221 222 /** 223 * Returns the EntityInfo for this chain. 224 * 225 * @return the EntityInfo object 226 * @see #setEntityInfo(EntityInfo) 227 */ 228 EntityInfo getEntityInfo(); 229 230 /** 231 * Sets the 'private' asymId of this chain (Chain id in PDB file ). 232 * @param asymId a String specifying the name value 233 * @see #getChainID() 234 * @deprecated use {@link #setId(String asymId)} instead 235 */ 236 @Deprecated 237 void setChainID(String asymId); 238 239 240 241 /** 242 * Gets the 'private' asymId of this chain. 243 * @return a String representing the name value 244 * @see #setChainID(String) 245 * @deprecated use getId() instead 246 */ 247 @Deprecated 248 String getChainID(); 249 250 251 /** 252 * If available, returns the internal chain ID that is used in mmCIF files (asym_id), otherwise null 253 * 254 * @return String or null 255 * @since 3.0.5 256 * @deprecated use {@link #getId()} instead 257 */ 258 String getInternalChainID(); 259 260 /** 261 * Sets the internal chain ID that is used in mmCif files 262 * 263 * @param internalChainID 264 * @since 3.0.5 265 * @deprecated use {@link #setId()} instead 266 */ 267 void setInternalChainID(String internalChainID); 268 269 270 @Override 271 String toString(); 272 273 274 /** 275 * Converts the SEQRES groups of a Chain to a Biojava Sequence object. 276 * 277 * @return the SEQRES groups of the Chain as a Sequence object. 278 */ 279 Sequence<?> getBJSequence() ; 280 281 /** 282 * Returns the sequence of amino acids as it has been provided in the ATOM records. 283 * Non-standard residues will be present in the string only if the property 284 * {@value org.biojava.nbio.structure.io.PDBFileReader.LOAD_CHEM_COMP_PROPERTY} has been set. 285 * @return amino acid sequence as string 286 * @see #getSeqResSequence() 287 */ 288 String getAtomSequence(); 289 290 /** 291 * Returns the PDB SEQRES sequence as a one-letter sequence string. 292 * Non-standard residues are represented by an "X". 293 * @return one-letter PDB SEQRES sequence as string 294 * @see #getAtomSequence() 295 */ 296 String getSeqResSequence(); 297 298 /** 299 * Sets the Swissprot id of this chain. 300 * @param sp_id a String specifying the swissprot id value 301 * @see #getSwissprotId() 302 */ 303 void setSwissprotId(String sp_id); 304 305 /** 306 * Gets the Swissprot id of this chain. 307 * @return a String representing the swissprot id value 308 * @see #setSwissprotId(String sp_id) 309 */ 310 String getSwissprotId() ; 311 312 313 /** 314 * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID}, 315 * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. 316 * @param type a GroupType 317 * @return an List object 318 * @see #setSeqResGroups(List) 319 */ 320 List<Group> getSeqResGroups (GroupType type); 321 322 /** 323 * Returns a list of all groups in SEQRES records of the chain, i.e. 324 * the aminoacids/nucleotides in the construct. 325 * @return a List of all Group objects of this chain 326 * @see #setSeqResGroups(List) 327 * @see #getSeqResLength() 328 * @see #getAtomGroups() 329 */ 330 List<Group> getSeqResGroups (); 331 332 /** 333 * Sets the list of SeqResGroups for this chain. 334 * 335 * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain. 336 * @see #getSeqResGroups() 337 */ 338 void setSeqResGroups(List<Group> seqResGroups); 339 340 /** 341 * Sets the back-reference to its parent Structure. 342 * @param parent the parent Structure object for this Chain 343 * @see #getStructure() 344 * @deprecated use setStructure instead 345 * 346 */ 347 @Deprecated 348 void setParent(Structure parent) ; 349 350 /** 351 * Sets the back-reference to its parent Structure. 352 * 353 * @param parent 354 */ 355 void setStructure(Structure parent) ; 356 357 /** 358 * Returns the parent Structure of this chain. 359 * 360 * @return the parent Structure object 361 * @see #setStructure(Structure) 362 * @deprecated use getStructure(Structure) instead. 363 */ 364 @Deprecated 365 Structure getParent() ; 366 367 368 /** 369 * Returns the parent Structure of this chain. 370 * 371 * @return the parent Structure object 372 * @see #setStructure(Structure) 373 */ 374 Structure getStructure() ; 375 376 /** 377 * Gets all groups that are not polymer groups and that are not solvent groups. 378 * Will automatically fetch Chemical Component files from the PDB web site, even if 379 * {@link FileParsingParameters#setLoadChemCompInfo(boolean)} has not been set to true. 380 * Otherwise the Ligands could not correctly be identified. 381 * @return list of Groups that are ligands 382 * @deprecated since biojava 5.0 this does not apply anymore. Chains contain either 383 * polymeric groups or non-polymeric groups 384 */ 385 @Deprecated 386 List<Group> getAtomLigands(); 387 388 /** 389 * Convert this Chain to a String in PDB format 390 * @return 391 */ 392 String toPDB(); 393 394 /** 395 * Convert this Chain to a String in mmCIF format 396 * @return 397 */ 398 String toMMCIF(); 399 400 401 /** 402 * Sets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category 403 * 404 * @param seqMisMatches 405 */ 406 void setSeqMisMatches(List<SeqMisMatch> seqMisMatches); 407 408 /** 409 * Gets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category 410 * 411 * @returns a list of sequence mismatches (or null if none found) 412 */ 413 List<SeqMisMatch> getSeqMisMatches(); 414 415 /** 416 * Returns the EntityType of this chain. Equivalent to getEntityInfo().getType() 417 * @return 418 * @see EntityType 419 */ 420 EntityType getEntityType(); 421 422 /** Tests if a chain is consisting of water molecules only 423 * 424 * @return true if there are only solvent molecules in this chain. 425 */ 426 public boolean isWaterOnly(); 427 428 /** Returns true if the given chain is composed of non-polymeric (including water) groups only. 429 * 430 * @return true if only non-polymeric groups in this chain. 431 */ 432 public boolean isPureNonPolymer(); 433 434 /** 435 * Get the predominant {@link GroupType} for a given Chain, following these 436 * rules: <li>if the ratio of number of residues of a certain 437 * {@link GroupType} to total non-water residues is above the threshold 438 * {@value #org.biojava.nbio.structure.StructureTools.RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is 439 * returned</li> <li>if there is no {@link GroupType} that is above the 440 * threshold then the {@link GroupType} with most members is chosen, logging 441 * it</li> 442 * <p> 443 * See also {@link ChemComp#getPolymerType()} and 444 * {@link ChemComp#getResidueType()} which follow the PDB chemical component 445 * dictionary and provide a much more accurate description of groups and 446 * their linking. 447 * </p> 448 * 449 * @return 450 */ 451 public GroupType getPredominantGroupType(); 452 453 /** 454 * Tell whether given chain is a protein chain 455 * 456 457 * @return true if protein, false if nucleotide or ligand 458 * @see #getPredominantGroupType() 459 */ 460 public boolean isProtein(); 461 462 /** 463 * Tell whether given chain is DNA or RNA 464 * 465 * @return true if nucleic acid, false if protein or ligand 466 * @see #getPredominantGroupType() 467 */ 468 public boolean isNucleicAcid(); 469}