001/*
002 * BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Author: Daniel Asarnow
021 * Date:   2012-6-23
022 */
023
024package org.biojava.nbio.structure.cath;
025
026import org.biojava.nbio.structure.align.util.UserConfiguration;
027import org.biojava.nbio.core.util.FileDownloadUtils;
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030import org.biojava.nbio.core.util.InputStreamProvider;
031
032import java.io.*;
033import java.net.URL;
034import java.nio.file.Files;
035import java.nio.file.StandardCopyOption;
036import java.text.DateFormat;
037import java.text.DecimalFormat;
038import java.text.ParseException;
039import java.text.SimpleDateFormat;
040import java.util.*;
041import java.util.concurrent.atomic.AtomicBoolean;
042
043/**
044 * @author Daniel Asarnow
045 */
046public class CathInstallation implements CathDatabase{
047
048        private static final Logger LOGGER = LoggerFactory.getLogger(CathInstallation.class);
049
050        public static final String DEFAULT_VERSION = CathFactory.DEFAULT_VERSION;
051
052        public static final String domainListFileName = "cath-domain-list-v%s.txt";
053        public static final String domainDescriptionFileName = "cath-domain-description-file-v%s.txt";
054        public static final String nodeListFileName = "cath-names-v%s.txt";
055        public static final String domallFileName = "cath-domain-boundaries-v%s.txt";
056
057        public static final String CATH_DOWNLOAD_URL                     = "http://download.cathdb.info/cath/releases/";
058        public static final String CATH_DOWNLOAD_ALL_RELEASES_DIR        = "all-releases";
059        public static final String CATH_DOWNLOAD_CLASSIFICATION_DATA_DIR = "cath-classification-data";
060
061        public static final String NEWLINE = System.getProperty("line.separator");;
062        public static final String FILESPLIT = System.getProperty("file.separator");;
063
064
065        private String cathVersion;
066
067        private String cathDownloadUrl;
068
069        private String cacheLocation ;
070
071        private AtomicBoolean installedDomainList;
072        private AtomicBoolean installedDomainDescription;
073        private AtomicBoolean installedNodeList;
074        private AtomicBoolean installedDomall;
075
076        private final boolean useCathDomainDescriptionFile;
077        private final boolean parseCathFragments;
078
079        private Map<String, List<CathDomain>> pdbMap;
080        private Map<String, CathDomain> domainMap;
081        private Map<String, CathNode> cathTree;
082        private Map<String, List<CathFragment>> fragmentMap;
083
084
085
086        public CathInstallation(String cacheLocation, boolean usingCDDF, boolean parseCF) {
087                setCacheLocation(cacheLocation);
088
089                useCathDomainDescriptionFile = usingCDDF;
090                parseCathFragments = parseCF;
091
092                installedDomainDescription = new AtomicBoolean(false);
093                installedDomainList = new AtomicBoolean(false);
094                installedNodeList = new AtomicBoolean(false);
095                installedDomall = new AtomicBoolean(false);
096
097                cathVersion = DEFAULT_VERSION;
098                cathDownloadUrl = CATH_DOWNLOAD_URL;
099
100                pdbMap = new HashMap<>();
101                domainMap = new HashMap< >();
102                cathTree = new HashMap<>();
103
104                if (parseCathFragments) fragmentMap = new HashMap<>();
105
106        }
107
108        public CathInstallation(String cacheLocation) {
109                this(cacheLocation, false, false);
110        }
111
112        public CathInstallation() {
113                this((new UserConfiguration()).getCacheFilePath());
114        }
115
116        public String getDomainListFileName() {
117                return cacheLocation + buildFileName(domainListFileName);
118        }
119
120        public String getDomainDescriptionFileName() {
121                return cacheLocation + buildFileName(domainDescriptionFileName);
122        }
123
124        public String getNodeListFileName() {
125                return cacheLocation + buildFileName(nodeListFileName);
126        }
127
128        public String getDomallFileName() {
129                return cacheLocation + buildFileName(domallFileName);
130        }
131
132        private String buildFileName(String fileNameTemplate) {
133                return String.format(fileNameTemplate, cathVersion);
134        }
135
136        private String buildUrl(String remoteFileName) {
137                String remoteFileNameWithVer =  buildFileName(remoteFileName);
138                String releasesDir = CATH_DOWNLOAD_ALL_RELEASES_DIR;
139                return cathDownloadUrl + releasesDir + "/v" + cathVersion + "/" + CATH_DOWNLOAD_CLASSIFICATION_DATA_DIR + "/" + remoteFileNameWithVer;
140        }
141
142        public String getCathDownloadUrl() {
143                return cathDownloadUrl;
144        }
145
146        public void setCathDownloadUrl(String cathDownloadUrl) {
147                this.cathDownloadUrl = cathDownloadUrl;
148        }
149
150        public String getCacheLocation() {
151                return cacheLocation;
152        }
153
154        public void setCacheLocation(String cacheLocation) {
155                if ( !cacheLocation.endsWith(FILESPLIT) ) cacheLocation += FILESPLIT;
156                this.cacheLocation = cacheLocation;
157        }
158
159        public AtomicBoolean getInstalledDomainList() {
160                return installedDomainList;
161        }
162
163        public void setInstalledDomainList(AtomicBoolean installedDomainList) {
164                this.installedDomainList = installedDomainList;
165        }
166
167        public AtomicBoolean getInstalledDomainDescription() {
168                return installedDomainDescription;
169        }
170
171        public void setInstalledDomainDescription(AtomicBoolean installedDomainDescription) {
172                this.installedDomainDescription = installedDomainDescription;
173        }
174
175        public AtomicBoolean getInstalledNodeList() {
176                return installedNodeList;
177        }
178
179        public AtomicBoolean getInstalledDomall() {
180                return installedDomall;
181        }
182
183        public void setInstalledNodeList(AtomicBoolean installedNodeList) {
184                this.installedNodeList = installedNodeList;
185        }
186
187        public void setInstalledDomall(AtomicBoolean installedDomall) {
188                this.installedDomall = installedDomall;
189        }
190
191        @Override
192        public String getCathVersion() {
193                return cathVersion;
194        }
195
196        @Override
197        public CathNode getCathNode(String nodeId) {
198                ensureNodeListInstalled();
199                return cathTree.get(nodeId);
200        }
201
202        @Override
203        public List<CathDomain> getByCategory(CathCategory category) {
204                if (useCathDomainDescriptionFile) {
205                        ensureDomainDescriptionInstalled();
206                } else {
207                        ensureDomallInstalled();
208                }
209                ensureNodeListInstalled();
210                List<CathDomain> matches = new ArrayList<>();
211                CathNode node;
212                for ( String nodeId : cathTree.keySet() ) {
213                        if ( (node = cathTree.get(nodeId)).getCategory() == category ) {
214                                matches.add( domainMap.get( node.getRepresentative() ) );
215                        }
216                }
217                return matches;
218        }
219
220        @Override
221        public List<CathDomain> filterByCathCode(String query) {
222                if (useCathDomainDescriptionFile) {
223                        ensureDomainDescriptionInstalled();
224                } else {
225                        ensureDomallInstalled();
226                }
227                List<CathDomain> matches = new ArrayList<>();
228                for ( String k : domainMap.keySet() ) {
229                        if ( domainMap.get(k).getCATH().startsWith(query) ) {
230                                matches.add( domainMap.get(k) );
231                        }
232                }
233                return matches;
234        }
235
236        @Override
237        public List<CathNode> getTree(CathDomain domain) {
238                CathNode node = getCathNode( domain.getCATH() );
239                List<CathNode> tree = new ArrayList<>();
240                while (node != null) {
241                        node = getCathNode( node.getParentId() );
242                        if (node != null) tree.add(node);
243                }
244                Collections.reverse(tree);
245                return tree;
246        }
247
248        @Override
249        public List<CathDomain> filterByNodeName(String query) {
250                ensureNodeListInstalled();
251                List<CathNode> matchingNodes = new ArrayList<>();
252                CathNode node;
253                for ( String nodeId : cathTree.keySet() ) {
254                        if ( (node = cathTree.get(nodeId) ).getDescription().startsWith(query) ) {
255                                matchingNodes.add(node);
256                        }
257                }
258                List<CathDomain> matches = new ArrayList<>();
259                for (CathNode n : matchingNodes) {
260                        matches.addAll(getDomainsByNodeId(n.getNodeId()));
261                }
262                return matches;
263        }
264
265        @Override
266        public List<CathDomain> filterByDescription(String query) {
267                if (useCathDomainDescriptionFile) {
268                        ensureDomainDescriptionInstalled();
269                } else {
270                        ensureDomallInstalled();
271                }
272                List<CathDomain> matches = new ArrayList<>();
273                for ( String k : domainMap.keySet() ) {
274                        if ( domainMap.get(k).getName().startsWith(query) ) {
275                                matches.add( domainMap.get(k) );
276                        }
277                }
278                return matches;
279        }
280
281        @Override
282        public CathDomain getDescriptionByNodeId(String nodeId) {
283                if (useCathDomainDescriptionFile) {
284                        ensureDomainDescriptionInstalled();
285                } else {
286                        ensureDomallInstalled();
287                }
288                CathNode node = getCathNode(nodeId);
289                return domainMap.get(node.getRepresentative());
290        }
291
292        @Override
293        public List<CathDomain> getDomainsForPdb(String pdbId) {
294                if (useCathDomainDescriptionFile) {
295                        ensureDomainDescriptionInstalled();
296                } else {
297                        ensureDomallInstalled();
298                }
299
300          // cath IDs in lower case...
301                return pdbMap.get(pdbId.toLowerCase());
302        }
303
304        @Override
305        public CathDomain getDomainByCathId(String cathId) {
306                if (useCathDomainDescriptionFile) {
307                        ensureDomainDescriptionInstalled();
308                } else {
309                        ensureDomallInstalled();
310                }
311                return domainMap.get(cathId);
312        }
313
314        @Override
315        public CathDomain getDescriptionByCathId(String cathId) {
316                if (useCathDomainDescriptionFile) {
317                        ensureDomainDescriptionInstalled();
318                } else {
319                        ensureDomallInstalled();
320                }
321                return domainMap.get(cathId);
322        }
323
324        @Override
325        public List<CathDomain> getDomainsByNodeId(String nodeId) {
326                if (useCathDomainDescriptionFile) {
327                        ensureDomainDescriptionInstalled();
328                } else {
329                        ensureDomallInstalled();
330                }
331                List<CathDomain> domains = new ArrayList<>();
332                for (String domainName : domainMap.keySet()) {
333                        CathDomain description = domainMap.get(domainName);
334                        if ( description.getCATH().startsWith(nodeId) ) {
335                                domains.add(description);
336                        }
337                }
338                return domains;
339        }
340
341        @Override
342        public List<CathFragment> getFragmentsByPdbId(String pdbId) {
343                if ( useCathDomainDescriptionFile || !parseCathFragments ) return null;
344                ensureDomallInstalled();
345                return fragmentMap.get(pdbId);
346        }
347
348        private void parseCathDomainList() throws IOException {
349                File file = new File(getDomainListFileName());
350                InputStreamProvider ips = new InputStreamProvider();
351                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
352                parseCathDomainList(buffer);
353        }
354
355        private void parseCathDomainList(BufferedReader bufferedReader) throws IOException{
356                String line;
357         //   int counter = 0;
358                while ( (line = bufferedReader.readLine()) != null ) {
359                        if ( line.startsWith("#") ) continue;
360                        CathDomain cathDomain = parseCathListFileLine(line);
361                   // counter++;
362
363                        String pdbId = cathDomain.getPdbIdAndChain().substring(0,4); // includes chain letter
364
365                        List<CathDomain> domainList;
366                        if ( pdbMap.containsKey(pdbId)){
367                                domainList = pdbMap.get(pdbId);
368                        } else {
369                                domainList = new ArrayList<>();
370                                pdbMap.put(pdbId,domainList);
371                        }
372
373                        domainList.add(cathDomain);
374
375                        domainMap.put( cathDomain.getDomainName(), cathDomain );
376                }
377        }
378
379        private void parseCathNames() throws IOException {
380                File file = new File(getNodeListFileName());
381                InputStreamProvider ips = new InputStreamProvider();
382                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
383                parseCathNames(buffer);
384        }
385
386        private void parseCathNames(BufferedReader bufferedReader) throws IOException{
387                String line;
388                //int counter = 0;
389                while ( (line = bufferedReader.readLine()) != null ) {
390                        if ( line.startsWith("#") ) continue;
391                        CathNode cathNode = parseCathNamesFileLine(line);
392                        cathTree.put(cathNode.getNodeId(), cathNode);
393                }
394        }
395
396        private void parseCathDomainDescriptionFile() throws IOException {
397                File file = new File(getDomainDescriptionFileName());
398                InputStreamProvider ips = new InputStreamProvider();
399                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
400                parseCathDomainDescriptionFile(buffer);
401        }
402
403        private void parseCathDomainDescriptionFile(BufferedReader bufferedReader) throws IOException{
404                String line;
405                DateFormat dateFormat = new SimpleDateFormat("dd-MMM-yyyy");
406                //int counter = 0;
407                CathDomain cathDescription = null; //TODO initialize these or catch NPE
408                StringBuilder name = null;
409                StringBuilder source = null;
410                StringBuilder seqh = null;
411                StringBuilder seqs = null;
412                List<CathSegment> segments = null;
413                CathSegment segment = null;
414                StringBuilder sseqh = null;
415                StringBuilder sseqs = null;
416                while ( (line = bufferedReader.readLine()) != null ) {
417                        if ( line.startsWith("#") ) continue;
418                        if ( line.startsWith("FORMAT") ) {
419                                cathDescription = new CathDomain();
420                                cathDescription.setFormat( line.substring(10) );
421
422                                name = new StringBuilder();
423                                source = new StringBuilder();
424                                seqh = new StringBuilder();
425                                seqs = new StringBuilder();
426
427                        } else if ( line.startsWith("DOMAIN") ) {
428                                cathDescription.setDomainName( line.substring(10) );
429                        } else if ( line.startsWith("VERSION") ) {
430                                cathDescription.setVersion( line.substring(10) );
431                        } else if ( line.startsWith("VERDATE") ) {
432                                try {
433                                        cathDescription.setDate( dateFormat.parse( line.substring(10) ) );
434                                } catch (ParseException e) {
435                                        LOGGER.error(e.getMessage(), e);
436                                }
437                        } else if ( line.startsWith("NAME") ) {
438                                name.append( line.substring(10) );
439                        } else if ( line.startsWith("SOURCE") ) {
440                                source.append( line.substring(10) );
441                        } else if ( line.startsWith("CATHCODE") ) {
442                                cathDescription.setCATH( line.substring(10) );
443                        } else if ( line.startsWith("DLENGTH") ) {
444                                cathDescription.setLength( Integer.parseInt( line.substring(10) ) );
445                        } else if ( line.startsWith("DSEQH") ) {
446                                seqh.append( line.substring(10) );
447                        } else if ( line.startsWith("DSEQS") ) {
448                                seqs = seqs.append( line.substring(10) );
449                        } else if ( line.startsWith("NSEGMENTS") ) {
450                                segments = new ArrayList<>();
451                        } else if ( line.startsWith("SEGMENT") ) {
452                                segment = new CathSegment();
453                                sseqh = new StringBuilder();
454                                sseqs = new StringBuilder();
455                        } else if ( line.startsWith("SRANGE") ) {
456                                int startStart = line.indexOf("=",10) + 1;
457                                int startStop = line.indexOf(" ",10);
458                                int stopStart = line.indexOf("=",startStop) + 1;
459//                Integer start = Integer.parseInt( line.substring(startStart,startStop) );
460//                Integer stop = Integer.parseInt( line.substring(stopStart, line.length()) );
461                                segment.setStart( line.substring(startStart,startStop) );
462                                segment.setStop( line.substring(stopStart) );
463                        } else if ( line.startsWith("SLENGTH") ) {
464                                segment.setLength( Integer.parseInt( line.substring(10) ) );
465                        } else if ( line.startsWith("SSEQH") ) {
466                                sseqh.append( line.substring(10) );
467                        } else if ( line.startsWith("SSEQS") ) {
468                                sseqs.append( line.substring(10) );
469                        } else if ( line.startsWith("ENDSEG") ) {
470                                segments.add( segment );
471                                segment.setSegmentId( segments.size() );
472                                segment.setSequenceHeader( sseqh.toString() );
473                                segment.setSequence( sseqs.toString() );
474                        } else if ( line.startsWith("//") ) {
475                                cathDescription.setName( name.toString() );
476                                cathDescription.setSource( source.toString() );
477                                cathDescription.setSequenceHeader( seqh.toString() );
478                                cathDescription.setSequence( seqs.toString() );
479                                cathDescription.setSegments(segments);
480                                //counter++;
481
482                                String pdbId = cathDescription.getPdbIdAndChain().substring(0,4); // includes chain letter
483                                List<CathDomain> domainList;
484                                if ( pdbMap.containsKey(pdbId)){
485                                        domainList = pdbMap.get(pdbId);
486                                } else {
487                                        domainList = new ArrayList<>();
488                                        pdbMap.put(pdbId,domainList);
489                                }
490
491                                domainList.add(cathDescription);
492
493                                domainMap.put( cathDescription.getDomainName(), cathDescription );
494
495                        }
496                }
497//        transposeDomainData();
498        }
499
500/*    private void transposeDomainData() {
501                ensureDomainListInstalled();
502                for (String k : domainMap.keySet() ) {
503                        cathMap.get(k).getDomain().setResolution(domainMap.get(k).getResolution());
504                        cathMap.get(k).getDomain().setSOLID(domainMap.get(k).getSOILD());
505                }
506        }*/
507
508        private CathDomain parseCathListFileLine(String line) {
509                CathDomain cathDomain = new CathDomain();
510                String [] token = line.split("\\s+");
511                cathDomain.setDomainName(token[0]);
512                cathDomain.setClassId(Integer.parseInt(token[1]));
513                cathDomain.setArchitectureId(Integer.parseInt(token[2]));
514                cathDomain.setTopologyId(Integer.parseInt(token[3]));
515                cathDomain.setHomologyId(Integer.parseInt(token[4]));
516                cathDomain.setSequenceFamilyId(Integer.parseInt(token[5]));
517                cathDomain.setOrthologousSequenceFamilyId(Integer.parseInt(token[6]));
518                cathDomain.setLikeSequenceFamilyId(Integer.parseInt(token[7]));
519                cathDomain.setIdenticalSequenceFamilyId(Integer.parseInt(token[8]));
520                cathDomain.setDomainCounter(Integer.parseInt(token[9]));
521                cathDomain.setLength(Integer.parseInt(token[10]));
522                cathDomain.setResolution(Double.parseDouble(token[11]));
523                return cathDomain;
524        }
525
526        private CathNode parseCathNamesFileLine(String line) {
527                CathNode cathNode = new CathNode();
528                String[] token = line.split("\\s+",3);
529                cathNode.setNodeId( token[0] );
530                int idx = token[0].lastIndexOf(".");
531                if ( idx == -1 ) idx = token[0].length();
532                cathNode.setParentId( token[0].substring( 0, idx ) );
533                cathNode.setRepresentative( token[1] );
534                cathNode.setDescription( token[2].replace(":","") );
535                return cathNode;
536        }
537
538        private void parseCathDomall() throws IOException{
539                File file = new File(getDomallFileName());
540                InputStreamProvider ips = new InputStreamProvider();
541                BufferedReader buffer = new BufferedReader (new InputStreamReader(ips.getInputStream(file)));
542                parseCathDomall(buffer);
543        }
544
545        private void parseCathDomall(BufferedReader bufferedReader) throws IOException{
546                String line;
547                while ( ((line = bufferedReader.readLine()) != null) ) {
548                        if ( line.startsWith("#") ) continue;
549                        if ( line.length() == 0 ) continue;
550                        String[] token = line.split("\\s+");
551                        String chainId = token[0];
552                        Integer numberOfDomains = Integer.parseInt( token[1].substring(1) );
553                        Integer numberOfFragments = Integer.parseInt( token[2].substring(1) );
554                        int domIdx = 3;
555                        int segIdx;
556                        Integer sstop;
557                        Integer sstart;
558                        Integer fstart;
559                        Integer fstop;
560                        Integer flength;
561                        for (int i=1; i<=numberOfDomains; i++) {
562                                DecimalFormat df = new DecimalFormat("00");
563                                String domainId;
564                                CathDomain domain;
565
566//                This logic is necessary because singular domains may be labeled with 00 or 01.
567//                If there is more than one domain, they are always numbered from 01.
568                                if (numberOfDomains==1) {
569                                        domainId = chainId + "00";
570                                        domain = domainMap.get(domainId);
571                                        if (domain==null) {
572                                                domainId = chainId + "01";
573                                                domain = domainMap.get(domainId);
574                                        }
575                                } else {
576                                        domainId = chainId + df.format(i);
577                                        domain = domainMap.get(domainId);
578                                }
579
580                                Integer numberOfSegments = Integer.parseInt( token[domIdx] );
581
582                                if ( domain == null ) {
583                                        domIdx += 6*numberOfSegments + 1;
584                                        continue;
585                                }
586
587                                List<CathSegment> segments = new ArrayList<>(numberOfSegments);
588                                segIdx = 1; // Offset from domIdx.
589                                for (int j=1; j<=numberOfSegments; j++) {
590                                        CathSegment segment = new CathSegment();
591                                        segment.setSegmentId(j);
592//                    String chainLetter = token[domIdx+segIdx]; // Redundant unless some domains cross chain boundaries.
593                                        sstart = Integer.parseInt( token[domIdx + segIdx + 1] );
594                                        String sstartInsertion = token[domIdx + segIdx + 2];
595                                        sstartInsertion = "-".equals(sstartInsertion) ? "" : sstartInsertion;
596//                    String chainLetter = token[domIdx+segIdx+4]; // Redundant unless some segments cross chain boundaries.
597                                        segment.setStart(sstart + sstartInsertion);
598
599                                        sstop = Integer.parseInt( token[domIdx + segIdx + 4] );
600                                        String sstopInsertion = token[domIdx + segIdx + 5];
601                                        sstopInsertion = "-".equals(sstopInsertion) ? "" : sstopInsertion;
602
603                                        segment.setStart(sstart + sstartInsertion);
604                                        segment.setStop(sstop + sstopInsertion);
605                                        segment.setLength(1 + sstop - sstart);
606                                        segments.add(segment);
607
608                                        segIdx += 6;
609                                }
610                                domain.setSegments(segments);
611                                domIdx += 6*numberOfSegments + 1;
612                        }
613                        if (parseCathFragments) {
614                        List<CathFragment> fragments = new ArrayList<>(numberOfFragments);
615                                for (int i=1; i<=numberOfFragments; i++) {
616                                        CathFragment fragment = new CathFragment();
617                                        fragment.setFragmentId(i);
618//                    String chainLetter = token[domIdx]; // Redundant unless some fragments cross chain boundaries.
619                                        fstart = Integer.parseInt( token[domIdx+1] );
620                                        String fstartInsertion = token[domIdx + 2];
621                                        fstartInsertion = "-".equals(fstartInsertion) ? "" : fstartInsertion;
622                                        fragment.setStart(fstart + fstartInsertion);
623//                    String chainLetter = token[domIdx+3]; // Redundant unless some fragments cross chain boundaries.
624                                        fstop = Integer.parseInt( token[domIdx+4] );
625                                        String fstopInsertion = token[domIdx + 5];
626                                        fstopInsertion = "-".equals(fstopInsertion) ? "" : fstopInsertion;
627                                        fragment.setStop(fstop + fstopInsertion);
628                                        flength = Integer.parseInt( token[domIdx + 6].replaceAll("[^0-9]","") );
629                                        fragment.setLength(flength);
630                                        fragments.add(fragment);
631                                        domIdx += 7;
632                                }
633                                fragmentMap.put(chainId, fragments);
634                        }
635//            if ( domIdx != token.length ); // Problems.
636                }
637        }
638
639        protected void downloadFileFromRemote(URL remoteURL, File localFile) throws IOException{
640//        System.out.println("downloading " + remoteURL + " to: " + localFile);
641                LOGGER.info("Downloading file {} to local file {}", remoteURL, localFile);
642
643                long timeS = System.currentTimeMillis();
644                File tempFile  = Files.createTempFile(FileDownloadUtils.getFilePrefix(localFile),"." + FileDownloadUtils.getFileExtension(localFile)).toFile();
645
646                FileOutputStream out = new FileOutputStream(tempFile);
647
648                InputStream in = remoteURL.openStream();
649                byte[] buf = new byte[4 * 1024]; // 4K buffer
650                int bytesRead;
651                while ((bytesRead = in.read(buf)) != -1) {
652                        out.write(buf, 0, bytesRead);
653                }
654                in.close();
655                out.close();
656
657                Files.copy(tempFile.toPath(), localFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
658
659                // delete the tmp file
660                tempFile.delete();
661
662                long size =  localFile.length();
663
664                double disp = size / 1024.0;
665                String unit = " kB";
666                if ( disp > 1024 ) {
667                        unit = " MB";
668                        disp = disp / 1024.0;
669                }
670                long timeE = System.currentTimeMillis();
671                LOGGER.info("Downloaded {} in {} sec. to {}", String.format("%.1f",disp) + unit, (timeE - timeS)/1000, localFile);
672        }
673
674        private boolean domainDescriptionFileAvailable(){
675                String fileName = getDomainDescriptionFileName();
676                File f = new File(fileName);
677                return f.exists();
678        }
679
680        private boolean domainListFileAvailable(){
681                String fileName = getDomainListFileName();
682                File f = new File(fileName);
683                return f.exists();
684        }
685
686        private boolean nodeListFileAvailable(){
687                String fileName = getNodeListFileName();
688                File f = new File(fileName);
689                return f.exists();
690        }
691
692        private boolean domallFileAvailable() {
693                String fileName = getDomallFileName();
694                File f= new File(fileName);
695                return f.exists();
696        }
697
698        protected void downloadDomainListFile() throws IOException{
699                String remoteFilename = domainListFileName;
700                URL url = new URL(buildUrl(remoteFilename));
701                String localFileName = getDomainListFileName();
702                File localFile = new File(localFileName);
703                downloadFileFromRemote(url, localFile);
704        }
705
706        protected void downloadDomainDescriptionFile() throws IOException{
707                String remoteFilename = domainDescriptionFileName;
708                URL url = new URL(buildUrl(remoteFilename));
709                String localFileName = getDomainDescriptionFileName();
710                File localFile = new File(localFileName);
711                downloadFileFromRemote(url, localFile);
712        }
713
714        protected void downloadNodeListFile() throws IOException{
715                String remoteFilename = nodeListFileName;
716                URL url = new URL(buildUrl(remoteFilename));
717                String localFileName = getNodeListFileName();
718                File localFile = new File(localFileName);
719                downloadFileFromRemote(url, localFile);
720        }
721
722        protected void downloadDomallFile() throws IOException {
723                String remoteFileName = domallFileName;
724                URL url = new URL(buildUrl(remoteFileName));
725                String localFileName = getDomallFileName();
726                File localFile = new File(localFileName);
727                downloadFileFromRemote(url, localFile);
728        }
729
730        public void ensureDomainListInstalled(){
731                if ( installedDomainList.get() ) return;
732
733                if ( ! domainListFileAvailable() ){
734                        try {
735                                downloadDomainListFile();
736                        } catch (Exception e){
737                                LOGGER.error("Could not download CATH domain list file. Error: {}", e.getMessage());
738                                installedDomainList.set(false);
739                                return;
740                        }
741                }
742
743                try {
744                        parseCathDomainList();
745                } catch (Exception e){
746                        LOGGER.error(e.getMessage(), e);
747                        installedDomainList.set(false);
748                        return;
749                }
750                installedDomainList.set(true);
751        }
752
753        public void ensureDomainDescriptionInstalled(){
754                if ( installedDomainDescription.get() ) return;
755
756                if ( ! domainDescriptionFileAvailable() ){
757                        try {
758                                downloadDomainDescriptionFile();
759                        } catch (Exception e){
760                                LOGGER.error("Could not download CATH domain description file. Error: {}", e.getMessage());
761                                installedDomainDescription.set(false);
762                                return;
763                        }
764                }
765
766                try {
767                        parseCathDomainDescriptionFile();
768                } catch (Exception e){
769                        LOGGER.error(e.getMessage(), e);
770                        installedDomainDescription.set(false);
771                        return;
772                }
773                installedDomainDescription.set(true);
774        }
775
776        public void ensureNodeListInstalled(){
777                if ( installedNodeList.get() ) return;
778
779                if ( ! nodeListFileAvailable() ){
780                        try {
781                                downloadNodeListFile();
782                        } catch (Exception e){
783                                LOGGER.error("Could not download CATH node list file. Error: {}", e.getMessage());
784                                installedNodeList.set(false);
785                                return;
786                        }
787                }
788
789                try {
790                        parseCathNames();
791                } catch (Exception e){
792                        LOGGER.error(e.getMessage(), e);
793                        installedNodeList.set(false);
794                        return;
795                }
796                installedNodeList.set(true);
797        }
798
799        public void ensureDomallInstalled() {
800                ensureDomainListInstalled();
801
802                if ( !installedDomainList.get() ) {
803                        installedDomall.set(false);
804                        return;
805                }
806
807                if ( installedDomall.get() ) return;
808
809                if ( ! domallFileAvailable() ){
810                        try {
811                                downloadDomallFile();
812                        } catch (Exception e) {
813                                LOGGER.error("Could not download CATH domain all file. Error: {}", e.getMessage());
814                                installedDomall.set(false);
815                                return;
816                        }
817                }
818
819                try {
820                        parseCathDomall();
821                } catch (Exception e) {
822                        LOGGER.error(e.getMessage(), e);
823                        installedDomall.set(false);
824                        return;
825                }
826                installedDomall.set(true);
827        }
828
829        public void setCathVersion(String cathVersion) {
830                this.cathVersion = cathVersion;
831        }
832
833
834}