001/*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Author: Daniel Asarnow
021 * Date:   2012-6-23
022 */
023
024package org.biojava.nbio.structure.cath;
025
026import org.biojava.nbio.structure.align.util.UserConfiguration;
027import org.biojava.nbio.core.util.FileDownloadUtils;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030import org.biojava.nbio.core.util.InputStreamProvider;
031
032import java.io.*;
033import java.net.URL;
034import java.text.DateFormat;
035import java.text.DecimalFormat;
036import java.text.ParseException;
037import java.text.SimpleDateFormat;
038import java.util.*;
039import java.util.concurrent.atomic.AtomicBoolean;
040
041/**
042 * @author Daniel Asarnow
043 */
044public class CathInstallation implements CathDatabase{
045
046        private static final Logger LOGGER = LoggerFactory.getLogger(CathInstallation.class);
047        
048        public static final String DEFAULT_VERSION = CathFactory.DEFAULT_VERSION;
049
050        public static final String domainListFileName = "cath-domain-list-v%s.txt"; 
051        public static final String domainDescriptionFileName = "cath-domain-description-file-v%s.txt";
052        public static final String nodeListFileName = "cath-names-v%s.txt";
053        public static final String domallFileName = "cath-domain-boundaries-v%s.txt";
054        
055        public static final String CATH_DOWNLOAD_URL                     = "http://download.cathdb.info/cath/releases/";
056        public static final String CATH_DOWNLOAD_ALL_RELEASES_DIR        = "all-releases";
057        public static final String CATH_DOWNLOAD_CLASSIFICATION_DATA_DIR = "cath-classification-data";
058
059        public static final String NEWLINE = System.getProperty("line.separator");;
060        public static final String FILESPLIT = System.getProperty("file.separator");;
061
062        
063        private String cathVersion;
064
065        private String cathDownloadUrl;
066        
067        private String cacheLocation ;
068
069        private AtomicBoolean installedDomainList;
070        private AtomicBoolean installedDomainDescription;
071        private AtomicBoolean installedNodeList;
072        private AtomicBoolean installedDomall;
073
074        private final boolean useCathDomainDescriptionFile;
075        private final boolean parseCathFragments;
076
077        private Map<String, List<CathDomain>> pdbMap;
078        private Map<String, CathDomain> domainMap;
079        private Map<String, CathNode> cathTree;
080        private Map<String, List<CathFragment>> fragmentMap;
081
082
083
084        public CathInstallation(String cacheLocation, boolean usingCDDF, boolean parseCF) {
085                setCacheLocation(cacheLocation);
086
087                useCathDomainDescriptionFile = usingCDDF;
088                parseCathFragments = parseCF;
089
090                installedDomainDescription = new AtomicBoolean(false);
091                installedDomainList = new AtomicBoolean(false);
092                installedNodeList = new AtomicBoolean(false);
093                installedDomall = new AtomicBoolean(false);
094
095                cathVersion = DEFAULT_VERSION;
096                cathDownloadUrl = CATH_DOWNLOAD_URL;
097
098                pdbMap = new HashMap<String, List<CathDomain>>();
099                domainMap = new HashMap<String ,CathDomain>();
100                cathTree = new HashMap<String, CathNode>();
101
102                if (parseCathFragments) fragmentMap = new HashMap<String,List<CathFragment>>();
103
104        }
105
106        public CathInstallation(String cacheLocation) {
107                this(cacheLocation, false, false);
108        }
109
110        public CathInstallation() {
111                this((new UserConfiguration()).getCacheFilePath());
112        }
113
114        public String getDomainListFileName() {
115                return cacheLocation + buildFileName(domainListFileName);
116        }
117
118        public String getDomainDescriptionFileName() {
119                return cacheLocation + buildFileName(domainDescriptionFileName);
120        }
121
122        public String getNodeListFileName() {
123                return cacheLocation + buildFileName(nodeListFileName);
124        }
125
126        public String getDomallFileName() {
127                return cacheLocation + buildFileName(domallFileName);
128        }
129        
130        private String buildFileName(String fileNameTemplate) {
131                return String.format(fileNameTemplate, cathVersion);
132        }
133        
134        private String buildUrl(String remoteFileName) {
135                String remoteFileNameWithVer =  buildFileName(remoteFileName);
136                String releasesDir = CATH_DOWNLOAD_ALL_RELEASES_DIR;
137                return cathDownloadUrl + releasesDir + "/v" + cathVersion + "/" + CATH_DOWNLOAD_CLASSIFICATION_DATA_DIR + "/" + remoteFileNameWithVer;
138        }
139
140        public String getCathDownloadUrl() {
141                return cathDownloadUrl;
142        }
143
144        public void setCathDownloadUrl(String cathDownloadUrl) {
145                this.cathDownloadUrl = cathDownloadUrl;
146        }
147
148        public String getCacheLocation() {
149                return cacheLocation;
150        }
151
152        public void setCacheLocation(String cacheLocation) {
153                if ( !cacheLocation.endsWith(FILESPLIT) ) cacheLocation += FILESPLIT;
154                this.cacheLocation = cacheLocation;
155        }
156
157        public AtomicBoolean getInstalledDomainList() {
158                return installedDomainList;
159        }
160
161        public void setInstalledDomainList(AtomicBoolean installedDomainList) {
162                this.installedDomainList = installedDomainList;
163        }
164
165        public AtomicBoolean getInstalledDomainDescription() {
166                return installedDomainDescription;
167        }
168
169        public void setInstalledDomainDescription(AtomicBoolean installedDomainDescription) {
170                this.installedDomainDescription = installedDomainDescription;
171        }
172
173        public AtomicBoolean getInstalledNodeList() {
174                return installedNodeList;
175        }
176
177        public AtomicBoolean getInstalledDomall() {
178                return installedDomall;
179        }
180
181        public void setInstalledNodeList(AtomicBoolean installedNodeList) {
182                this.installedNodeList = installedNodeList;
183        }
184
185        public void setInstalledDomall(AtomicBoolean installedDomall) {
186                this.installedDomall = installedDomall;
187        }
188
189        @Override
190        public String getCathVersion() {
191                return cathVersion;
192        }
193
194        @Override
195        public CathNode getCathNode(String nodeId) {
196                ensureNodeListInstalled();
197                return cathTree.get(nodeId);
198        }
199
200        @Override
201        public List<CathDomain> getByCategory(CathCategory category) {
202                if (useCathDomainDescriptionFile) {
203                        ensureDomainDescriptionInstalled();
204                } else {
205                        ensureDomallInstalled();
206                }
207                ensureNodeListInstalled();
208                List<CathDomain> matches = new ArrayList<CathDomain>();
209                CathNode node;
210                for ( String nodeId : cathTree.keySet() ) {
211                        if ( (node = cathTree.get(nodeId)).getCategory() == category ) {
212                                matches.add( domainMap.get( node.getRepresentative() ) );
213                        }
214                }
215                return matches;
216        }
217
218        @Override
219        public List<CathDomain> filterByCathCode(String query) {
220                if (useCathDomainDescriptionFile) {
221                        ensureDomainDescriptionInstalled();
222                } else {
223                        ensureDomallInstalled();
224                }
225                List<CathDomain> matches = new ArrayList<CathDomain>();
226                for ( String k : domainMap.keySet() ) {
227                        if ( domainMap.get(k).getCATH().startsWith(query) ) {
228                                matches.add( domainMap.get(k) );
229                        }
230                }
231                return matches;
232        }
233
234        @Override
235        public List<CathNode> getTree(CathDomain domain) {
236                CathNode node = getCathNode( domain.getCATH() );
237                List<CathNode> tree = new ArrayList<CathNode>();
238                while (node != null) {
239                        node = getCathNode( node.getParentId() );
240                        if (node != null) tree.add(node);
241                }
242                Collections.reverse(tree);
243                return tree;
244        }
245
246        @Override
247        public List<CathDomain> filterByNodeName(String query) {
248                ensureNodeListInstalled();
249                List<CathNode> matchingNodes = new ArrayList<CathNode>();
250                CathNode node;
251                for ( String nodeId : cathTree.keySet() ) {
252                        if ( (node = cathTree.get(nodeId) ).getDescription().startsWith(query) ) {
253                                matchingNodes.add(node);
254                        }
255                }
256                List<CathDomain> matches = new ArrayList<CathDomain>();
257                for (CathNode n : matchingNodes) {
258                        matches.addAll(getDomainsByNodeId(n.getNodeId()));
259                }
260                return matches;
261        }
262
263        @Override
264        public List<CathDomain> filterByDescription(String query) {
265                if (useCathDomainDescriptionFile) {
266                        ensureDomainDescriptionInstalled();
267                } else {
268                        ensureDomallInstalled();
269                }
270                List<CathDomain> matches = new ArrayList<CathDomain>();
271                for ( String k : domainMap.keySet() ) {
272                        if ( domainMap.get(k).getName().startsWith(query) ) {
273                                matches.add( domainMap.get(k) );
274                        }
275                }
276                return matches;
277        }
278
279        @Override
280        public CathDomain getDescriptionByNodeId(String nodeId) {
281                if (useCathDomainDescriptionFile) {
282                        ensureDomainDescriptionInstalled();
283                } else {
284                        ensureDomallInstalled();
285                }
286                CathNode node = getCathNode(nodeId);
287                return domainMap.get(node.getRepresentative());
288        }
289
290        @Override
291        public List<CathDomain> getDomainsForPdb(String pdbId) {
292                if (useCathDomainDescriptionFile) {
293                        ensureDomainDescriptionInstalled();
294                } else {
295                        ensureDomallInstalled();
296                }
297
298          // cath IDs in lower case...
299                return pdbMap.get(pdbId.toLowerCase());
300        }
301
302        @Override
303        public CathDomain getDomainByCathId(String cathId) {
304                if (useCathDomainDescriptionFile) {
305                        ensureDomainDescriptionInstalled();
306                } else {
307                        ensureDomallInstalled();
308                }
309                return domainMap.get(cathId);
310        }
311
312        @Override
313        public CathDomain getDescriptionByCathId(String cathId) {
314                if (useCathDomainDescriptionFile) {
315                        ensureDomainDescriptionInstalled();
316                } else {
317                        ensureDomallInstalled();
318                }
319                return domainMap.get(cathId);
320        }
321
322        @Override
323        public List<CathDomain> getDomainsByNodeId(String nodeId) {
324                if (useCathDomainDescriptionFile) {
325                        ensureDomainDescriptionInstalled();
326                } else {
327                        ensureDomallInstalled();
328                }
329                List<CathDomain> domains = new ArrayList<CathDomain>();
330                for (String domainName : domainMap.keySet()) {
331                        CathDomain description = domainMap.get(domainName);
332                        if ( description.getCATH().startsWith(nodeId) ) {
333                                domains.add(description);
334                        }
335                }
336                return domains;
337        }
338
339        @Override
340        public List<CathFragment> getFragmentsByPdbId(String pdbId) {
341                if ( useCathDomainDescriptionFile || !parseCathFragments ) return null;
342                ensureDomallInstalled();
343                return fragmentMap.get(pdbId);
344        }
345
346        private void parseCathDomainList() throws IOException {
347                File file = new File(getDomainListFileName());
348                InputStreamProvider ips = new InputStreamProvider();
349                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
350                parseCathDomainList(buffer);
351        }
352
353        private void parseCathDomainList(BufferedReader bufferedReader) throws IOException{
354                String line;
355         //   int counter = 0;
356                while ( (line = bufferedReader.readLine()) != null ) {
357                        if ( line.startsWith("#") ) continue;
358                        CathDomain cathDomain = parseCathListFileLine(line);
359                   // counter++;
360
361                        String pdbId = cathDomain.getPdbIdAndChain().substring(0,4); // includes chain letter
362
363                        List<CathDomain> domainList;
364                        if ( pdbMap.containsKey(pdbId)){
365                                domainList = pdbMap.get(pdbId);
366                        } else {
367                                domainList = new ArrayList<CathDomain>();
368                                pdbMap.put(pdbId,domainList);
369                        }
370
371                        domainList.add(cathDomain);
372
373                        domainMap.put( cathDomain.getDomainName(), cathDomain );
374                }
375        }
376
377        private void parseCathNames() throws IOException {
378                File file = new File(getNodeListFileName());
379                InputStreamProvider ips = new InputStreamProvider();
380                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
381                parseCathNames(buffer);
382        }
383
384        private void parseCathNames(BufferedReader bufferedReader) throws IOException{
385                String line;
386                //int counter = 0;
387                while ( (line = bufferedReader.readLine()) != null ) {
388                        if ( line.startsWith("#") ) continue;
389                        CathNode cathNode = parseCathNamesFileLine(line);
390                        cathTree.put(cathNode.getNodeId(), cathNode);
391                }
392        }
393
394        private void parseCathDomainDescriptionFile() throws IOException {
395                File file = new File(getDomainDescriptionFileName());
396                InputStreamProvider ips = new InputStreamProvider();
397                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
398                parseCathDomainDescriptionFile(buffer);
399        }
400
401        private void parseCathDomainDescriptionFile(BufferedReader bufferedReader) throws IOException{
402                String line;
403                DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yyyy");
404                //int counter = 0;
405                CathDomain cathDescription = null; //TODO initialize these or catch NPE
406                StringBuilder name = null;
407                StringBuilder source = null;
408                StringBuilder seqh = null;
409                StringBuilder seqs = null;
410                List<CathSegment> segments = null;
411                CathSegment segment = null;
412                StringBuilder sseqh = null;
413                StringBuilder sseqs = null;
414                while ( (line = bufferedReader.readLine()) != null ) {
415                        if ( line.startsWith("#") ) continue;
416                        if ( line.startsWith("FORMAT") ) {
417                                cathDescription = new CathDomain();
418                                cathDescription.setFormat( line.substring(10) );
419
420                                name = new StringBuilder();
421                                source = new StringBuilder();
422                                seqh = new StringBuilder();
423                                seqs = new StringBuilder();
424
425                        } else if ( line.startsWith("DOMAIN") ) {
426                                cathDescription.setDomainName( line.substring(10) );
427                        } else if ( line.startsWith("VERSION") ) {
428                                cathDescription.setVersion( line.substring(10) );
429                        } else if ( line.startsWith("VERDATE") ) {
430                                try {
431                                        cathDescription.setDate( dateFormat.parse( line.substring(10) ) );
432                                } catch (ParseException e) {
433                                        LOGGER.error(e.getMessage(), e);
434                                }
435                        } else if ( line.startsWith("NAME") ) {
436                                name.append( line.substring(10) );
437                        } else if ( line.startsWith("SOURCE") ) {
438                                source.append( line.substring(10) );
439                        } else if ( line.startsWith("CATHCODE") ) {
440                                cathDescription.setCATH( line.substring(10) );
441                        } else if ( line.startsWith("DLENGTH") ) {
442                                cathDescription.setLength( Integer.parseInt( line.substring(10) ) );
443                        } else if ( line.startsWith("DSEQH") ) {
444                                seqh.append( line.substring(10) );
445                        } else if ( line.startsWith("DSEQS") ) {
446                                seqs = seqs.append( line.substring(10) );
447                        } else if ( line.startsWith("NSEGMENTS") ) {
448                                segments = new ArrayList<CathSegment>();
449                        } else if ( line.startsWith("SEGMENT") ) {
450                                segment = new CathSegment();
451                                sseqh = new StringBuilder();
452                                sseqs = new StringBuilder();
453                        } else if ( line.startsWith("SRANGE") ) {
454                                int startStart = line.indexOf("=",10) + 1;
455                                int startStop = line.indexOf(" ",10);
456                                int stopStart = line.indexOf("=",startStop) + 1;
457//                Integer start = Integer.parseInt( line.substring(startStart,startStop) );
458//                Integer stop = Integer.parseInt( line.substring(stopStart, line.length()) );
459                                segment.setStart( line.substring(startStart,startStop) );
460                                segment.setStop( line.substring(stopStart) );
461                        } else if ( line.startsWith("SLENGTH") ) {
462                                segment.setLength( Integer.parseInt( line.substring(10) ) );
463                        } else if ( line.startsWith("SSEQH") ) {
464                                sseqh.append( line.substring(10) );
465                        } else if ( line.startsWith("SSEQS") ) {
466                                sseqs.append( line.substring(10) );
467                        } else if ( line.startsWith("ENDSEG") ) {
468                                segments.add( segment );
469                                segment.setSegmentId( segments.size() );
470                                segment.setSequenceHeader( sseqh.toString() );
471                                segment.setSequence( sseqs.toString() );
472                        } else if ( line.startsWith("//") ) {
473                                cathDescription.setName( name.toString() );
474                                cathDescription.setSource( source.toString() );
475                                cathDescription.setSequenceHeader( seqh.toString() );
476                                cathDescription.setSequence( seqs.toString() );
477                                cathDescription.setSegments(segments);
478                                //counter++;
479
480                                String pdbId = cathDescription.getPdbIdAndChain().substring(0,4); // includes chain letter
481                                List<CathDomain> domainList;
482                                if ( pdbMap.containsKey(pdbId)){
483                                        domainList = pdbMap.get(pdbId);
484                                } else {
485                                        domainList = new ArrayList<CathDomain>();
486                                        pdbMap.put(pdbId,domainList);
487                                }
488
489                                domainList.add(cathDescription);
490
491                                domainMap.put( cathDescription.getDomainName(), cathDescription );
492
493                        }
494                }
495//        transposeDomainData();
496        }
497
498/*    private void transposeDomainData() {
499                ensureDomainListInstalled();
500                for (String k : domainMap.keySet() ) {
501                        cathMap.get(k).getDomain().setResolution(domainMap.get(k).getResolution());
502                        cathMap.get(k).getDomain().setSOLID(domainMap.get(k).getSOILD());
503                }
504        }*/
505
506        private CathDomain parseCathListFileLine(String line) {
507                CathDomain cathDomain = new CathDomain();
508                String [] token = line.split("\\s+");
509                cathDomain.setDomainName(token[0]);
510                cathDomain.setClassId(Integer.parseInt(token[1]));
511                cathDomain.setArchitectureId(Integer.parseInt(token[2]));
512                cathDomain.setTopologyId(Integer.parseInt(token[3]));
513                cathDomain.setHomologyId(Integer.parseInt(token[4]));
514                cathDomain.setSequenceFamilyId(Integer.parseInt(token[5]));
515                cathDomain.setOrthologousSequenceFamilyId(Integer.parseInt(token[6]));
516                cathDomain.setLikeSequenceFamilyId(Integer.parseInt(token[7]));
517                cathDomain.setIdenticalSequenceFamilyId(Integer.parseInt(token[8]));
518                cathDomain.setDomainCounter(Integer.parseInt(token[9]));
519                cathDomain.setLength(Integer.parseInt(token[10]));
520                cathDomain.setResolution(Double.parseDouble(token[11]));
521                return cathDomain;
522        }
523
524        private CathNode parseCathNamesFileLine(String line) {
525                CathNode cathNode = new CathNode();
526                String[] token = line.split("\\s+",3);
527                cathNode.setNodeId( token[0] );
528                int idx = token[0].lastIndexOf(".");
529                if ( idx == -1 ) idx = token[0].length();
530                cathNode.setParentId( token[0].substring( 0, idx ) );
531                cathNode.setRepresentative( token[1] );
532                cathNode.setDescription( token[2].replace(":","") );
533                return cathNode;
534        }
535
536        private void parseCathDomall() throws IOException{
537                File file = new File(getDomallFileName());
538                InputStreamProvider ips = new InputStreamProvider();
539                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
540                parseCathDomall(buffer);
541        }
542
543        private void parseCathDomall(BufferedReader bufferedReader) throws IOException{
544                String line;
545                while ( ((line = bufferedReader.readLine()) != null) ) {
546                        if ( line.startsWith("#") ) continue;
547                        if ( line.length() == 0 ) continue;
548                        String[] token = line.split("\\s+");
549                        String chainId = token[0];
550                        Integer numberOfDomains = Integer.parseInt( token[1].substring(1) );
551                        Integer numberOfFragments = Integer.parseInt( token[2].substring(1) );
552                        int domIdx = 3;
553                        int segIdx;
554                        Integer sstop;
555                        Integer sstart;
556                        Integer fstart;
557                        Integer fstop;
558                        Integer flength;
559                        for (int i=1; i<=numberOfDomains; i++) {
560                                DecimalFormat df = new DecimalFormat("00");
561                                String domainId;
562                                CathDomain domain;
563
564//                This logic is necessary because singular domains may be labeled with 00 or 01.
565//                If there is more than one domain, they are always numbered from 01.
566                                if (numberOfDomains==1) {
567                                        domainId = chainId + "00";
568                                        domain = domainMap.get(domainId);
569                                        if (domain==null) {
570                                                domainId = chainId + "01";
571                                                domain = domainMap.get(domainId);
572                                        }
573                                } else {
574                                        domainId = chainId + df.format(i);
575                                        domain = domainMap.get(domainId);
576                                }
577
578                                Integer numberOfSegments = Integer.parseInt( token[domIdx] );
579
580                                if ( domain == null ) {
581                                        domIdx += 6*numberOfSegments + 1;
582                                        continue;
583                                }
584
585                                List<CathSegment> segments = new ArrayList<CathSegment>(numberOfSegments);
586                                segIdx = 1; // Offset from domIdx.
587                                for (int j=1; j<=numberOfSegments; j++) {
588                                        CathSegment segment = new CathSegment();
589                                        segment.setSegmentId(j);
590//                    String chainLetter = token[domIdx+segIdx]; // Redundant unless some domains cross chain boundaries.
591                                        sstart = Integer.parseInt( token[domIdx + segIdx + 1] );
592                                        String sstartInsertion = token[domIdx + segIdx + 2];
593                                        sstartInsertion = sstartInsertion.equals("-") ? "" : sstartInsertion;
594//                    String chainLetter = token[domIdx+segIdx+4]; // Redundant unless some segments cross chain boundaries.
595                                        segment.setStart(sstart + sstartInsertion);
596
597                                        sstop = Integer.parseInt( token[domIdx + segIdx + 4] );
598                                        String sstopInsertion = token[domIdx + segIdx + 5];
599                                        sstopInsertion = sstopInsertion.equals("-") ? "" : sstopInsertion;
600
601                                        segment.setStart(sstart + sstartInsertion);
602                                        segment.setStop(sstop + sstopInsertion);
603                                        segment.setLength(1 + sstop - sstart);
604                                        segments.add(segment);
605
606                                        segIdx += 6;
607                                }
608                                domain.setSegments(segments);
609                                domIdx += 6*numberOfSegments + 1;
610                        }
611                        if (parseCathFragments) {
612                        List<CathFragment> fragments = new ArrayList<CathFragment>(numberOfFragments);
613                                for (int i=1; i<=numberOfFragments; i++) {
614                                        CathFragment fragment = new CathFragment();
615                                        fragment.setFragmentId(i);
616//                    String chainLetter = token[domIdx]; // Redundant unless some fragments cross chain boundaries.
617                                        fstart = Integer.parseInt( token[domIdx+1] );
618                                        String fstartInsertion = token[domIdx + 2];
619                                        fstartInsertion = fstartInsertion.equals("-") ? "" : fstartInsertion;
620                                        fragment.setStart(fstart + fstartInsertion);
621//                    String chainLetter = token[domIdx+3]; // Redundant unless some fragments cross chain boundaries.
622                                        fstop = Integer.parseInt( token[domIdx+4] );
623                                        String fstopInsertion = token[domIdx + 5];
624                                        fstopInsertion = fstopInsertion.equals("-") ? "" : fstopInsertion;
625                                        fragment.setStop(fstop + fstopInsertion);
626                                        flength = Integer.parseInt( token[domIdx + 6].replaceAll("[^0-9]","") );
627                                        fragment.setLength(flength);
628                                        fragments.add(fragment);
629                                        domIdx += 7;
630                                }
631                                fragmentMap.put(chainId, fragments);
632                        }
633//            if ( domIdx != token.length ); // Problems.
634                }
635        }
636
637        protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException{
638//        System.out.println("downloading " + remoteURL + " to: " + localFile);
639
640                long timeS = System.currentTimeMillis();
641                File tempFile  = File.createTempFile(FileDownloadUtils.getFilePrefix(localFile), "."+ FileDownloadUtils.getFileExtension(localFile));
642
643                FileOutputStream out = new FileOutputStream(tempFile);
644
645                InputStream in = remoteURL.openStream();
646                byte[] buf = new byte[4 * 1024]; // 4K buffer
647                int bytesRead;
648                while ((bytesRead = in.read(buf)) != -1) {
649                        out.write(buf, 0, bytesRead);
650                }
651                in.close();
652                out.close();
653
654                FileDownloadUtils.copy(tempFile,localFile);
655
656                // delete the tmp file
657                tempFile.delete();
658
659                long size =  localFile.length();
660
661                double disp = size / 1024.0;
662                String unit = " kB";
663                if ( disp > 1024 ) {
664                        unit = " MB";
665                        disp = disp / 1024.0;
666                }
667                long timeE = System.currentTimeMillis();
668                LOGGER.info("Downloaded file {} ({}) to local file {} in {} sec.", remoteURL, String.format("%.1f",disp) + unit, localFile, (timeE - timeS)/1000);
669        }
670
671        private boolean domainDescriptionFileAvailable(){
672                String fileName = getDomainDescriptionFileName();
673                File f = new File(fileName);
674                return f.exists();
675        }
676
677        private boolean domainListFileAvailable(){
678                String fileName = getDomainListFileName();
679                File f = new File(fileName);
680                return f.exists();
681        }
682
683        private boolean nodeListFileAvailable(){
684                String fileName = getNodeListFileName();
685                File f = new File(fileName);
686                return f.exists();
687        }
688
689        private boolean domallFileAvailable() {
690                String fileName = getDomallFileName();
691                File f= new File(fileName);
692                return f.exists();
693        }
694
695        protected void downloadDomainListFile() throws IOException{
696                String remoteFilename = domainListFileName;
697                URL url = new URL(buildUrl(remoteFilename)); 
698                String localFileName = getDomainListFileName();
699                File localFile = new File(localFileName);
700                downloadFileFromRemote(url, localFile);
701        }
702
703        protected void downloadDomainDescriptionFile() throws IOException{
704                String remoteFilename = domainDescriptionFileName;
705                URL url = new URL(buildUrl(remoteFilename));
706                String localFileName = getDomainDescriptionFileName();
707                File localFile = new File(localFileName);
708                downloadFileFromRemote(url, localFile);
709        }
710
711        protected void downloadNodeListFile() throws IOException{
712                String remoteFilename = nodeListFileName;
713                URL url = new URL(buildUrl(remoteFilename));
714                String localFileName = getNodeListFileName();
715                File localFile = new File(localFileName);
716                downloadFileFromRemote(url, localFile);
717        }
718
719        protected void downloadDomallFile() throws IOException {
720                String remoteFileName = domallFileName;
721                URL url = new URL(buildUrl(remoteFileName));
722                String localFileName = getDomallFileName();
723                File localFile = new File(localFileName);
724                downloadFileFromRemote(url, localFile);
725        }
726
727        public void ensureDomainListInstalled(){
728                if ( installedDomainList.get() ) return;
729
730                if ( ! domainListFileAvailable() ){
731                        try {
732                                downloadDomainListFile();
733                        } catch (Exception e){
734                                LOGGER.error("Could not download CATH domain list file. Error: {}", e.getMessage());
735                                installedDomainList.set(false);
736                                return;
737                        }
738                }
739
740                try {
741                        parseCathDomainList();
742                } catch (Exception e){
743                        LOGGER.error(e.getMessage(), e);
744                        installedDomainList.set(false);
745                        return;
746                }
747                installedDomainList.set(true);
748        }
749
750        public void ensureDomainDescriptionInstalled(){
751                if ( installedDomainDescription.get() ) return;
752
753                if ( ! domainDescriptionFileAvailable() ){
754                        try {
755                                downloadDomainDescriptionFile();
756                        } catch (Exception e){
757                                LOGGER.error("Could not download CATH domain description file. Error: {}", e.getMessage());
758                                installedDomainDescription.set(false);
759                                return;
760                        }
761                }
762
763                try {
764                        parseCathDomainDescriptionFile();
765                } catch (Exception e){
766                        LOGGER.error(e.getMessage(), e);
767                        installedDomainDescription.set(false);
768                        return;
769                }
770                installedDomainDescription.set(true);
771        }
772
773        public void ensureNodeListInstalled(){
774                if ( installedNodeList.get() ) return;
775
776                if ( ! nodeListFileAvailable() ){
777                        try {
778                                downloadNodeListFile();
779                        } catch (Exception e){
780                                LOGGER.error("Could not download CATH node list file. Error: {}", e.getMessage());
781                                installedNodeList.set(false);
782                                return;
783                        }
784                }
785
786                try {
787                        parseCathNames();
788                } catch (Exception e){
789                        LOGGER.error(e.getMessage(), e);
790                        installedNodeList.set(false);
791                        return;
792                }
793                installedNodeList.set(true);
794        }
795
796        public void ensureDomallInstalled() {
797                ensureDomainListInstalled();
798
799                if ( !installedDomainList.get() ) {
800                        installedDomall.set(false);
801                        return;
802                }
803
804                if ( installedDomall.get() ) return;
805
806                if ( ! domallFileAvailable() ){
807                        try {
808                                downloadDomallFile();
809                        } catch (Exception e) {
810                                LOGGER.error("Could not download CATH domain all file. Error: {}", e.getMessage());
811                                installedDomall.set(false);
812                                return;
813                        }
814                }
815
816                try {
817                        parseCathDomall();
818                } catch (Exception e) {
819                        LOGGER.error(e.getMessage(), e);
820                        installedDomall.set(false);
821                        return;
822                }
823                installedDomall.set(true);
824        }
825
826        public void setCathVersion(String cathVersion) {
827                this.cathVersion = cathVersion;
828        }
829        
830
831}