001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on December 19, 2013
021 * Author: Douglas Myers-Turnbull
022 */
023
024package org.biojava.nbio.structure;
025
026import java.io.IOException;
027import java.io.Serializable;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.LinkedList;
031import java.util.List;
032
033import org.biojava.nbio.structure.align.util.AtomCache;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037/**
038 * This is the canonical way to identify a part of a structure.
039 *
040 * <p>The current syntax allows the specification of a set of residues from
041 * the first model of a structure. Future versions may be extended to represent
042 * additional properties.
043 *
044 * <p>Identifiers should adhere to the following specification, although some
045 * additional forms may be tolerated where unambiguous for backwards compatibility.
046 * <pre>
047 *              name          := pdbID
048 *                             | pdbID '.' chainID
049 *                             | pdbID '.' range
050 *              range         := range (',' range)?
051 *                             | chainID
052 *                             | chainID '_' resNum '-' resNum
053 *              pdbID         := [0-9][a-zA-Z0-9]{3}
054 *              chainID       := [a-zA-Z0-9]+
055 *              resNum        := [-+]?[0-9]+[A-Za-z]?
056 * </pre>
057 * For example:
058 * <pre>
059 *              1TIM                            #whole structure
060 *              1tim                            #same as above
061 *              4HHB.C                          #single chain
062 *              3AA0.A,B                        #two chains
063 *              4GCR.A_1-40                     #substructure
064 *      3iek.A_17-28,A_56-294,A_320-377 #substructure of 3 disjoint parts
065 * </pre>
066 * More options may be added to the specification at a future time.
067
068 * @author dmyersturnbull
069 * @author Spencer Bliven
070 */
071public class SubstructureIdentifier implements Serializable, StructureIdentifier {
072
073        private static final long serialVersionUID = 1L;
074
075        private static final Logger logger = LoggerFactory.getLogger(SubstructureIdentifier.class);
076
077        private final String pdbId;
078        private final List<ResidueRange> ranges;
079
080        /**
081         * Create a new identifier from a string.
082         * @param id
083         */
084        public SubstructureIdentifier(String id) {
085                String[] idRange = id.split("\\.");
086                if(1 > idRange.length || idRange.length > 2 ) {
087                        throw new IllegalArgumentException(String.format("Malformed %s: %s",getClass().getSimpleName(),id));
088                }
089                if(idRange[0].length() != 4) {
090                        this.pdbId = idRange[0];
091                        // Changed from Exception to a warning to support files and stuff -sbliven 2015/01/22
092                        logger.warn(String.format("Unrecognized PDB code %s",this.pdbId));
093                } else {
094                        this.pdbId = idRange[0].toUpperCase();
095                }
096
097                if( idRange.length == 2) {
098                        String rangeStr = idRange[1].trim();
099
100                        this.ranges = ResidueRange.parseMultiple(rangeStr);
101                } else {
102                        this.ranges = new LinkedList<ResidueRange>();
103                }
104        }
105
106        /**
107         * Create a new identifier based on a set of ranges.
108         *
109         * If ranges is empty, includes all residues.
110         * @param pdbId
111         * @param ranges
112         */
113        public SubstructureIdentifier(String pdbId, List<ResidueRange> ranges) {
114                if(ranges == null) {
115                        throw new NullPointerException("Null ranges list");
116                }
117                this.pdbId = pdbId;
118                this.ranges = ranges;
119        }
120
121        @Override
122        public String toString() {
123                return getIdentifier();
124        }
125
126        /**
127         * Get the String form of this identifier.
128         *
129         * This provides the canonical form for a StructureIdentifier and has
130         * all the information needed to recreate a particular substructure.
131         *
132         * Example: 3iek.A_17-28,A_56-294
133         * @return The String form of this identifier
134         */
135        @Override
136        public String getIdentifier() {
137                if (ranges.isEmpty()) return pdbId;
138                return pdbId + "." + ResidueRange.toString(ranges);
139        }
140
141        public String getPdbId() {
142                return pdbId;
143        }
144
145        public List<ResidueRange> getResidueRanges() {
146                return ranges;
147        }
148
149        /**
150         * Return itself. SubstructureIdentifiers are canonical!
151         */
152        @Override
153        public SubstructureIdentifier toCanonical() {
154                return this;
155        }
156
157        /**
158         * Takes a complete structure as input and reduces it to residues present in
159         * the specified ranges
160         *
161         * <p>The returned structure will be a shallow copy of the input, with shared
162         * Chains, Residues, etc.
163         * @param input A full structure, e.g. as loaded from the PDB. The structure
164         * ID should match that returned by getPdbId().
165         * @return
166         * @throws StructureException
167         * @see StructureTools#getReducedStructure(Structure, String)
168         */
169        @Override
170        public Structure reduce(Structure s) throws StructureException {
171                // Follows StructureImpl.clone()
172
173                // Create new structure & copy basic properties
174                Structure newS = new StructureImpl();
175
176                newS.setPDBCode(s.getPDBCode());
177                newS.setPDBHeader(s.getPDBHeader());
178                newS.setName(this.toString());
179                newS.setDBRefs(s.getDBRefs());
180                newS.setBiologicalAssembly(s.isBiologicalAssembly());
181                newS.getPDBHeader().setDescription(
182                                "sub-range " + ranges + " of "  + newS.getPDBCode() + " "
183                                                + s.getPDBHeader().getDescription());
184                // TODO The following should be only copied for atoms which are present in the range.
185                newS.setCompounds(s.getCompounds());
186
187                newS.setSSBonds(s.getSSBonds());
188                newS.setSites(s.getSites());
189
190                newS.setStructureIdentifier(this);
191
192                for( int modelNr=0;modelNr<s.nrModels();modelNr++) {
193                        String prevChainId = null;
194
195
196                        // Construct new model
197                        newS.addModel(new ArrayList<Chain>());
198
199                        if(getResidueRanges().isEmpty()) {
200                                // Include all residues
201                                newS.setCompounds(s.getCompounds());
202                                newS.setSSBonds(s.getSSBonds());
203                                newS.setSites(s.getSites());
204
205                                newS.setModel(modelNr, s.getModel(modelNr));
206                        } else {
207                                // Restrict residues
208                                for( ResidueRange range: getResidueRanges()) {
209
210                                        String chainId = range.getChainId();
211                                        ResidueNumber pdbresnum1 = range.getStart();
212                                        ResidueNumber pdbresnum2 = range.getEnd();
213
214                                        Chain chain;
215                                        if(chainId.equals("_") ) {
216                                                // Handle special case of "_" chain for single-chain proteins
217                                                chain = s.getChain(modelNr,0);
218                                                if(pdbresnum1 != null)
219                                                        pdbresnum1.setChainId(chain.getChainID());
220                                                if(pdbresnum2 != null)
221                                                        pdbresnum2.setChainId(chain.getChainID());
222
223                                                if(s.size() != 1) {
224                                                        // SCOP 1.71 uses this for some proteins with multiple chains
225                                                        // Print a warning in this ambiguous case
226                                                        logger.warn("Multiple possible chains match '_'. Using chain {}",chain.getChainID());
227                                                }
228                                        } else {
229                                                // Explicit chain
230                                                try {
231                                                        chain = s.getChainByPDB(chainId,modelNr);
232                                                } catch(StructureException e) {
233                                                        // Chain not found
234                                                        // Maybe it was a chain index, masquerading as a chainId?
235                                                        try {
236                                                                int chainNum = Integer.parseInt(chainId);
237                                                                try {
238                                                                        chain = s.getChain(modelNr, chainNum);
239                                                                        logger.warn("No chain found for {}. Interpretting it as an index, using chain {} instead",chainId,chain.getChainID());
240                                                                } catch(Exception e2) { //we don't care what gets thrown here -sbliven
241                                                                        throw e; // Nope, not an index. Throw the original exception
242                                                                }
243                                                        } catch(NumberFormatException e3) {
244                                                                // Not an index. Throw the original exception
245                                                                throw e;
246                                                        }
247                                                }
248                                        }
249
250                                        List<Group> groups;
251                                        if(pdbresnum1 == null && pdbresnum2 == null) {
252                                                groups = chain.getAtomGroups();
253                                        } else {
254//                                              // Trim extra residues off the range
255//                                              Atom[] allAtoms = StructureTools.getRepresentativeAtomArray(chain);
256//                                              AtomPositionMap map = new AtomPositionMap(allAtoms);
257//                                              ResidueRange trimmed = map.trimToValidResidues(
258//                                                              new ResidueRange(chain.getChainID(),
259//                                                                              pdbresnum1, pdbresnum2));
260//                                              if (trimmed != null) {
261//                                                      pdbresnum1 = trimmed.getStart();
262//                                                      pdbresnum2 = trimmed.getEnd();
263//                                              }
264                                                groups = Arrays.asList(chain.getGroupsByPDB(pdbresnum1, pdbresnum2));
265                                        }
266
267                                        Chain c = null;
268                                        
269                                        // Reuse prevChain
270                                        if ( prevChainId != null && prevChainId.equals(chain.getChainID())) {
271                                                c = newS.getChainByPDB(prevChainId,modelNr);
272                                        } else {
273                                                try {
274                                                        c = newS.getChainByPDB(chain.getChainID(),modelNr);
275                                                } catch (StructureException e){
276                                                        // chain not in structure yet...
277                                                }
278                                        }
279                                        // Create new chain
280                                        if ( c == null) {
281                                                // first chain...
282                                                c = new ChainImpl();
283                                                c.setChainID(chain.getChainID());
284                                                newS.addChain(c,modelNr);
285                                                c.setSeqResGroups(chain.getSeqResGroups());
286                                                c.setSeqMisMatches(chain.getSeqMisMatches());
287                                        } 
288
289                                        // add the groups to the chain:
290                                        for ( Group g: groups) {
291                                                c.addGroup(g);
292                                        }
293
294                                        prevChainId = c.getChainID();
295                                } // end range
296                        }
297                } // end modelNr
298
299                return newS;
300        }
301
302        /**
303         * Loads the complete structure based on {@link #getPdbId()}.
304         *
305         * @param AtomCache A source of structures
306         * @return A Structure containing at least the atoms identified by this,
307         *  or null if no PDB ID is set
308         * @throws StructureException For errors loading and parsing the structure
309         * @throws IOException Errors reading the structure from disk
310         */
311        @Override
312        public Structure loadStructure(AtomCache cache) throws IOException, StructureException {
313                String pdb = getPdbId();
314                if(pdb == null)
315                        return null;
316                return cache.getStructureForPdbId(pdb);
317        }
318
319}