001/*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Author: Daniel Asarnow
021 * Date:   2012-7-23
022 */
023
024package org.biojava.nbio.structure.cath;
025
026/**
027 * @author Daniel Asarnow
028 */
029
030import java.io.IOException;
031import java.io.Serializable;
032import java.util.ArrayList;
033import java.util.Date;
034import java.util.HashSet;
035import java.util.List;
036import java.util.Set;
037
038import org.biojava.nbio.structure.ResidueRange;
039import org.biojava.nbio.structure.Structure;
040import org.biojava.nbio.structure.StructureException;
041import org.biojava.nbio.structure.StructureIdentifier;
042import org.biojava.nbio.structure.SubstructureIdentifier;
043import org.biojava.nbio.structure.align.util.AtomCache;
044
045/**
046 * A class which represents a single CATH domain.
047 */
048public class CathDomain implements Serializable, StructureIdentifier {
049
050        public static final long serialVersionUID = 1L;
051
052        /**
053         * The CATH domain code. Always 7 characters in length, combining the PDB and chain letter with the number of the domain within CATH.
054         * Example: 1aoiA00
055         * If the chain letter '0', domain refers to an entire PDB entry.
056         */
057        String domainName; // 7 characters 1oaiA00
058
059        /**
060         * The class number of this domain.
061         */
062        Integer classId; // C
063
064        /**
065         * The architecture number of this domain.
066         */
067        Integer architectureId; // A
068
069        /**
070         * The topology number of this domain.
071         */
072        Integer topologyId; // T
073
074        /**
075         * The homologous superfamily number of this domain.
076         */
077        Integer homologyId; // H
078
079        /**
080         * The sequence family (35% identity) number of this domain.
081         */
082        Integer sequenceFamilyId; // S
083
084        /**
085         * The "orthologous" sequence family (60% identity) number of this domain.
086         */
087        Integer orthologousSequenceFamilyId; // O
088
089        /**
090         * The "Like" sequence family (95% identity) number of this domain.
091         */
092        Integer likeSequenceFamilyId; // L
093
094        /**
095         * The identical sequence family (100% identity) number of this domain.
096         */
097
098        Integer identicalSequenceFamilyId; // I
099
100        /**
101         * The count of this domain among the identical sequence family members.
102         */
103        Integer domainCounter; // D
104
105        /**
106         * The domain length..
107         */
108        Integer length;
109
110        /**
111         * The resolution of the domain structure. Nominally in Angstroms,
112         * the values 999.000 and 1000.000 signify NMR structures and obsolete structures, respectively.
113         */
114        Double resolution;
115
116        /**
117         * The format and version of the CathDomainDescriptionFile.
118         */
119        String format;
120
121        /**
122         * The CATH version.
123         */
124        String version;
125
126        Date date;
127
128        /**
129         * The so-called name field holds a potentially long description of the domain.
130         */
131        String name;
132
133        /**
134         * Complete source organism listing.
135         */
136        String source;
137
138        /**
139         * FASTA header.
140         */
141        String sequenceHeader;
142
143        /**
144         * FASTA sequence.
145         */
146        String sequence;
147
148        /**
149         * List of all sub-domain segments.
150         */
151        List<CathSegment> segments;
152
153        /**
154         * A (potentially long) comment. Usually empty.
155         */
156        String comment;
157
158        public String getDomainName() {
159                return domainName;
160        }
161
162        public void setDomainName(String domainName) {
163                this.domainName = domainName;
164        }
165
166        /**
167         * Returns the PDB ID.
168         */
169        public String getThePdbId() {
170                return domainName.substring(0, 4);
171        }
172
173        /**
174         * Returns a string of the form {@code PDBID.CHAIN}.
175         * For example: {@code 1hiv.A}.
176         */
177        public String getPdbIdAndChain() {
178                return domainName.substring(0, 4) +
179                                (!"0".equals(domainName.substring(4, 5)) ? "." + domainName.substring(4, 5) : "");
180        }
181
182        public Integer getDomainId() {
183                return Integer.parseInt(domainName.substring(5));
184        }
185
186        public Integer getClassId() {
187                return classId;
188        }
189
190        public void setClassId(Integer classId) {
191                this.classId = classId;
192        }
193
194        public Integer getArchitectureId() {
195                return architectureId;
196        }
197
198        public void setArchitectureId(Integer architectureId) {
199                this.architectureId = architectureId;
200        }
201
202        public Integer getTopologyId() {
203                return topologyId;
204        }
205
206        public void setTopologyId(Integer topologyId) {
207                this.topologyId = topologyId;
208        }
209
210        public Integer getHomologyId() {
211                return homologyId;
212        }
213
214        public void setHomologyId(Integer homologyId) {
215                this.homologyId = homologyId;
216        }
217
218        public Integer getSequenceFamilyId() {
219                return sequenceFamilyId;
220        }
221
222        public void setSequenceFamilyId(Integer sequenceFamilyId) {
223                this.sequenceFamilyId = sequenceFamilyId;
224        }
225
226        public Integer getOrthologousSequenceFamilyId() {
227                return orthologousSequenceFamilyId;
228        }
229
230        public void setOrthologousSequenceFamilyId(Integer orthologousSequenceFamilyId) {
231                this.orthologousSequenceFamilyId = orthologousSequenceFamilyId;
232        }
233
234        public Integer getLikeSequenceFamilyId() {
235                return likeSequenceFamilyId;
236        }
237
238        public void setLikeSequenceFamilyId(Integer likeSequenceFamilyId) {
239                this.likeSequenceFamilyId = likeSequenceFamilyId;
240        }
241
242        public Integer getIdenticalSequenceFamilyId() {
243                return identicalSequenceFamilyId;
244        }
245
246        public void setIdenticalSequenceFamilyId(Integer identicalSequenceFamilyId) {
247                this.identicalSequenceFamilyId = identicalSequenceFamilyId;
248        }
249
250        public Integer getDomainCounter() {
251                return domainCounter;
252        }
253
254        public void setDomainCounter(Integer domainCounter) {
255                this.domainCounter = domainCounter;
256        }
257
258        public Integer getLength() {
259                return length;
260        }
261
262        public void setLength(Integer length) {
263                this.length = length;
264        }
265
266        public Double getResolution() {
267                return resolution;
268        }
269
270        public void setResolution(Double resolution) {
271                this.resolution = resolution;
272        }
273
274        public void setCATH(String cathCode) {
275                String[] token = cathCode.split("[.]");
276                setClassId(Integer.parseInt(token[0]));
277                setArchitectureId(Integer.parseInt(token[1]));
278                setTopologyId(Integer.parseInt(token[2]));
279                setHomologyId(Integer.parseInt(token[3]));
280        }
281
282        public String getCATH() {
283                return Integer.toString(getClassId()) + "." +
284                                Integer.toString(getArchitectureId()) + "." +
285                                Integer.toString(getTopologyId()) + "." +
286                                Integer.toString(getHomologyId());
287        }
288
289        public void setSOLID(String cathCode) {
290                String[] token = cathCode.split("[.]");
291                setSequenceFamilyId(Integer.parseInt(token[0]));
292                setOrthologousSequenceFamilyId(Integer.parseInt(token[1]));
293                setLikeSequenceFamilyId(Integer.parseInt(token[2]));
294                setIdenticalSequenceFamilyId(Integer.parseInt(token[3]));
295                setDomainCounter(Integer.parseInt(token[4]));
296        }
297
298        public String getSOILD() {
299                return Integer.toString(getSequenceFamilyId()) + "." +
300                                Integer.toString(getOrthologousSequenceFamilyId()) + "." +
301                                Integer.toString(getLikeSequenceFamilyId()) + "." +
302                                Integer.toString(getIdenticalSequenceFamilyId()) + "." +
303                                Integer.toString(getDomainCounter());
304        }
305
306        public Integer getClassificationId(CathCategory cathCategory) {
307                switch (cathCategory) {
308                        case Class:
309                                return getClassId();
310                        case Architecture:
311                                return getArchitectureId();
312                        case Topolgy:
313                                return getTopologyId();
314                        case Homology:
315                                return getHomologyId();
316                        case SequenceFamily:
317                                return getSequenceFamilyId();
318                        case OrthologousSequenceFamily:
319                                return getOrthologousSequenceFamilyId();
320                        case LikeSequenceFamily:
321                                return getLikeSequenceFamilyId();
322                        case IdenticalSequenceFamily:
323                                return getIdenticalSequenceFamilyId();
324                        case DomainCounter:
325                                return getDomainCounter();
326                        default:
327                                return null;
328                }
329        }
330
331        public String getFormat() {
332                return format;
333        }
334
335        public void setFormat(String format) {
336                this.format = format;
337        }
338
339        public String getVersion() {
340                return version;
341        }
342
343        public void setVersion(String version) {
344                this.version = version;
345        }
346
347        public Date getDate() {
348                return date;
349        }
350
351        public void setDate(Date date) {
352                this.date = date;
353        }
354
355        public String getName() {
356                return name;
357        }
358
359        public void setName(String name) {
360                this.name = name;
361        }
362
363        public String getSource() {
364                return source;
365        }
366
367        public void setSource(String source) {
368                this.source = source;
369        }
370
371        public String getSequenceHeader() {
372                return sequenceHeader;
373        }
374
375        public void setSequenceHeader(String sequenceHeader) {
376                this.sequenceHeader = sequenceHeader;
377        }
378
379        public String getSequence() {
380                return sequence;
381        }
382
383        public void setSequence(String sequence) {
384                this.sequence = sequence;
385        }
386
387        public List<CathSegment> getSegments() {
388                return segments;
389        }
390
391        public void setSegments(List<CathSegment> segments) {
392                this.segments = segments;
393        }
394
395        public String getComment() {
396                return comment;
397        }
398
399        public void setComment(String comment) {
400                this.comment = comment;
401        }
402
403        @Override
404        public String toString() {
405                return "CathDomain [domainName=" + domainName + ", classId=" + classId
406                                + ", architectureId=" + architectureId + ", topologyId="
407                                + topologyId + ", homologyId=" + homologyId
408                                + ", sequenceFamilyId=" + sequenceFamilyId
409                                + ", orthologousSequenceFamilyId="
410                                + orthologousSequenceFamilyId + ", likeSequenceFamilyId="
411                                + likeSequenceFamilyId + ", identicalSequenceFamilyId="
412                                + identicalSequenceFamilyId + ", domainCounter="
413                                + domainCounter + ", length=" + length + ", resolution="
414                                + resolution + ", format=" + format + ", version=" + version
415                                + ", date=" + date + ", name=" + name + ", source=" + source
416                                + ", sequenceHeader=" + sequenceHeader + ", sequence="
417                                + sequence + ", segments=" + segments + ", comment=" + comment
418                                + "]";
419        }
420
421        /**
422         * Returns the chains this domain is defined over; contains more than 1 element only if this domains is a multi-chain domain.
423         * @throws StructureException 
424         */
425        public Set<String> getChains() throws StructureException {
426                Set<String> chains = new HashSet<>();
427                List<ResidueRange> rrs = toCanonical().getResidueRanges();
428                for (ResidueRange rr : rrs) chains.add(rr.getChainName());
429                return chains;
430        }
431
432        @Override
433        public String getIdentifier() {
434                return getCATH();
435        }
436
437        @Override
438        public SubstructureIdentifier toCanonical() throws StructureException{
439                List<ResidueRange> ranges = new ArrayList<>();
440                String chain = String.valueOf(getDomainName().charAt(getDomainName().length() - 3));
441                for (CathSegment segment : this.getSegments()) {
442                        ranges.add(new ResidueRange(chain, segment.getStart(), segment.getStop()));
443                }
444
445                return new SubstructureIdentifier(getThePdbId(), ranges);
446        }
447
448        @Override
449        public Structure reduce(Structure input) throws StructureException {
450                return toCanonical().reduce(input);
451        }
452
453        @Override
454        public Structure loadStructure(AtomCache cache) throws StructureException,
455                        IOException {
456                return cache.getStructure(getThePdbId());
457        }
458
459
460}