001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on 25.04.2004 021 * @author Andreas Prlic 022 * 023 */ 024package org.biojava.nbio.structure; 025 026import org.biojava.nbio.core.sequence.template.Sequence; 027import org.biojava.nbio.structure.chem.ChemComp; 028 029import java.io.Serializable; 030import java.util.List; 031 032/** 033 * <p> 034 * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file. 035 * A chain consists of a list of {@link Group} objects. A Group can either be 036 * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}. 037 * </p> 038 * 039 * <p> 040 * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file. 041 * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them. 042 * The SEQRES sequence can be accessed via {@link #getSeqResGroups()} and the 043 * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed 044 * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D() 045 * </p> 046 * 047 * @author Andreas Prlic 048 * @version %I% %G% 049 * @since 1.4 050 */ 051public interface Chain extends Serializable { 052 053 /** 054 * Returns an identical copy of this Chain. 055 * @return an identical copy of this Chain 056 */ 057 Object clone(); 058 059 /** 060 * Add a group to the list of ATOM record group of this chain. 061 * To add SEQRES records a more complex alignment between ATOM and SEQRES residues 062 * is required, please see SeqRes2AtomAligner for more details on that. 063 * @param group a Group object 064 */ 065 void addGroup(Group group); 066 067 /** 068 * Get the 'private' asymId (internal chain IDs in mmCif) for this chain. 069 * 070 * @return the asymId 071 * @see #setId(String) 072 * @see #getName() 073 */ 074 String getId() ; 075 076 077 /** 078 * Set the 'private' asymId (internal chain IDs in mmCif) for this chain. 079 * 080 * @param asymId the internal chain Id 081 */ 082 void setId(String asymId) ; 083 084 085 /** 086 * Set the 'public' authId (chain ID in PDB file) 087 * 088 * @param authId the 'public' authId (chain ID in PDB file) 089 * @see #getId() 090 */ 091 void setName(String authId); 092 093 /** 094 * Get the 'public' authId (chain ID in PDB file) 095 * 096 * @return the authId for this chain. 097 * @see #getId() 098 */ 099 String getName(); 100 101 102 /** 103 * Return the Group at given position, 104 * from within Groups with observed density in the chain, i.e. 105 * those with coordinates in ATOM and HETATMS (including waters) records. 106 * @param position an int 107 * @return a Group object 108 * @see #getAtomLength() 109 * @see #getAtomGroups() 110 * @see #getSeqResGroup(int) 111 */ 112 Group getAtomGroup (int position); 113 114 /** 115 * Return the Group at given position, 116 * from within groups in the SEQRES records of the chain, i.e. 117 * the aminoacids/nucleotides in the construct. 118 * @param position an int 119 * @return a Group object 120 * @see #getSeqResLength() 121 * @see #getSeqResGroups() 122 * @see #getAtomGroup(int) 123 */ 124 Group getSeqResGroup (int position); 125 126 127 /** 128 * Return all Groups with observed density in the chain, i.e. 129 * those with coordinates in ATOM and HETATMS (including waters) records. 130 * 131 * @return a List object representing the Groups of this Chain. 132 * @see #setAtomGroups(List) 133 * @see #getAtomLength() 134 * @see #getSeqResGroups() 135 */ 136 List<Group> getAtomGroups(); 137 138 /** 139 * Set all Groups with observed density in the chain, i.e. 140 * those with coordinates in ATOM and HETATMs (including waters) records. 141 * @param groups a List object representing the Groups of this Chain. 142 * @see #getAtomGroups() 143 */ 144 void setAtomGroups(List<Group> groups); 145 146 /** 147 * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID}, 148 * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. 149 * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then 150 * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}. 151 * @param type GroupType 152 * @return a List object 153 * @see #setAtomGroups(List) 154 */ 155 List<Group> getAtomGroups (GroupType type); 156 157 158 /** 159 * Get a group by its PDB residue numbering. If the PDB residue number is not known, 160 * throws a StructureException. 161 * 162 * @param resNum the PDB residue number of the group 163 * @return the matching group 164 * @throws StructureException 165 */ 166 Group getGroupByPDB(ResidueNumber resNum) throws StructureException; 167 168 /** 169 * Get all groups that are located between two PDB residue numbers. 170 * 171 * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start. 172 * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end. 173 * @return Groups in between. or throws a StructureException if either start or end can not be found, 174 * @throws StructureException 175 */ 176 Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException; 177 178 179 /** 180 * Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB 181 * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range 182 * of groups as specified by the DBREF records - these frequently are rather inaccurate. 183 * 184 * 185 * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start. 186 * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end. 187 * @param ignoreMissing ignore missing groups in this range. 188 * @return Groups in between. or throws a StructureException if either start or end can not be found, 189 * @throws StructureException 190 * 191 */ 192 Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException; 193 194 195 /** 196 * Returns the number of Groups with observed density in the chain, i.e. 197 * those with coordinates in ATOM and HETATMs (including waters) records 198 * 199 * @return the length 200 * @see #getAtomGroup(int) 201 * @see #getAtomGroups() 202 * @see #getSeqResLength() 203 */ 204 int getAtomLength(); 205 206 /** 207 * Returns the number of groups in the SEQRES records of the chain, i.e. 208 * the number of aminoacids/nucleotides in the construct 209 * 210 * @return the length 211 * @see #getSeqResGroup(int) 212 * @see #getSeqResGroups() 213 * @see #getAtomLength() 214 */ 215 int getSeqResLength(); 216 217 /** 218 * Sets the Entity information 219 * @param entityInfo the EntityInfo 220 * @see #getEntityInfo() 221 */ 222 void setEntityInfo(EntityInfo entityInfo); 223 224 /** 225 * Returns the EntityInfo for this chain. 226 * 227 * @return the EntityInfo object 228 * @see #setEntityInfo(EntityInfo) 229 */ 230 EntityInfo getEntityInfo(); 231 232 @Override 233 String toString(); 234 235 /** 236 * Converts the SEQRES groups of a Chain to a Biojava Sequence object. 237 * 238 * @return the SEQRES groups of the Chain as a Sequence object. 239 */ 240 Sequence<?> getBJSequence() ; 241 242 /** 243 * Returns the sequence of amino acids as it has been provided in the ATOM records. 244 * @return amino acid sequence as string 245 * @see #getSeqResSequence() 246 */ 247 String getAtomSequence(); 248 249 /** 250 * Returns the PDB SEQRES sequence as a one-letter sequence string. 251 * Non-standard residues are represented by an "X". 252 * @return one-letter PDB SEQRES sequence as string 253 * @see #getAtomSequence() 254 */ 255 String getSeqResSequence(); 256 257 /** 258 * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID}, 259 * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. 260 * @param type a GroupType 261 * @return an List object 262 * @see #setSeqResGroups(List) 263 */ 264 List<Group> getSeqResGroups (GroupType type); 265 266 /** 267 * Returns a list of all groups in SEQRES records of the chain, i.e. 268 * the aminoacids/nucleotides in the construct. 269 * @return a List of all Group objects of this chain 270 * @see #setSeqResGroups(List) 271 * @see #getSeqResLength() 272 * @see #getAtomGroups() 273 */ 274 List<Group> getSeqResGroups (); 275 276 /** 277 * Sets the list of SeqResGroups for this chain. 278 * 279 * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain. 280 * @see #getSeqResGroups() 281 */ 282 void setSeqResGroups(List<Group> seqResGroups); 283 284 /** 285 * Sets the back-reference to its parent Structure. 286 * 287 * @param parent 288 */ 289 void setStructure(Structure parent) ; 290 291 /** 292 * Returns the parent Structure of this chain. 293 * 294 * @return the parent Structure object 295 * @see #setStructure(Structure) 296 */ 297 Structure getStructure() ; 298 299 /** 300 * Convert this Chain to a String in PDB format 301 * @return 302 */ 303 String toPDB(); 304 305 /** 306 * Convert this Chain to a String in mmCIF format 307 * @return 308 */ 309 String toMMCIF(); 310 311 312 /** 313 * Sets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category 314 * 315 * @param seqMisMatches 316 */ 317 void setSeqMisMatches(List<SeqMisMatch> seqMisMatches); 318 319 /** 320 * Gets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category 321 * 322 * @return a list of sequence mismatches (or null if none found) 323 */ 324 List<SeqMisMatch> getSeqMisMatches(); 325 326 /** 327 * Returns the EntityType of this chain. Equivalent to getEntityInfo().getType() 328 * @return 329 * @see EntityType 330 */ 331 EntityType getEntityType(); 332 333 /** 334 * Tests if a chain is consisting of water molecules only 335 * 336 * @return true if there are only solvent molecules in this chain. 337 */ 338 public boolean isWaterOnly(); 339 340 /** 341 * Returns true if the given chain is composed of non-polymeric (including water) groups only. 342 * 343 * @return true if only non-polymeric groups in this chain. 344 */ 345 public boolean isPureNonPolymer(); 346 347 /** 348 * Get the predominant {@link GroupType} for a given Chain, following these 349 * rules: 350 * <ul> 351 * <li>if the ratio of number of residues of a certain 352 * {@link GroupType} to total non-water residues is above the threshold 353 * {@value org.biojava.nbio.structure.StructureTools#RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is 354 * returned</li> <li>if there is no {@link GroupType} that is above the 355 * threshold then the {@link GroupType} with most members is chosen, logging 356 * it</li> 357 * </ul> 358 * <p> 359 * See also {@link ChemComp#getPolymerType()} and 360 * {@link ChemComp#getResidueType()} which follow the PDB chemical component 361 * dictionary and provide a much more accurate description of groups and 362 * their linking. 363 * 364 * @return the predominant group type 365 */ 366 GroupType getPredominantGroupType(); 367 368 /** 369 * Tell whether given chain is a protein chain 370 * 371 * @return true if protein, false if nucleotide or ligand 372 * @see #getPredominantGroupType() 373 */ 374 boolean isProtein(); 375 376 /** 377 * Tell whether given chain is DNA or RNA 378 * 379 * @return true if nucleic acid, false if protein or ligand 380 * @see #getPredominantGroupType() 381 */ 382 boolean isNucleicAcid(); 383}