001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on 01-21-2010
021 *
022 * @auther Scooter Willis
023 *
024 */
025package org.biojava.nbio.core.sequence.loader;
026
027import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
028import org.biojava.nbio.core.sequence.AccessionID;
029import org.biojava.nbio.core.sequence.DataSource;
030import org.biojava.nbio.core.sequence.ProteinSequence;
031import org.biojava.nbio.core.sequence.Strand;
032import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
033import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
034import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
035import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface;
036import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface;
037import org.biojava.nbio.core.sequence.storage.SequenceAsStringHelper;
038import org.biojava.nbio.core.sequence.template.*;
039import org.biojava.nbio.core.util.Equals;
040import org.biojava.nbio.core.util.XMLHelper;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043import org.w3c.dom.Document;
044import org.w3c.dom.Element;
045import org.xml.sax.SAXException;
046
047import javax.xml.parsers.ParserConfigurationException;
048import javax.xml.xpath.XPathExpressionException;
049import java.io.*;
050import java.net.HttpURLConnection;
051import java.net.URL;
052import java.rmi.RemoteException;
053import java.util.ArrayList;
054import java.util.Iterator;
055import java.util.LinkedHashMap;
056import java.util.List;
057import java.util.regex.Pattern;
058
059/**
060 *
061 * Pass in a Uniprot ID and this ProxySequenceReader when passed to a ProteinSequence will get the sequence data and other data elements
062 * associated with the ProteinSequence by Uniprot. This is an example of how to map external databases of proteins and features to the BioJava3
063 * ProteinSequence.
064 * Important to call @see setUniprotDirectoryCache to allow caching of XML files so they don't need to be reloaded each time. Does
065 * not manage cache.
066 * @param <C>
067 */
068public class UniprotProxySequenceReader<C extends Compound> implements ProxySequenceReader<C>, FeaturesKeyWordInterface, DatabaseReferenceInterface {
069
070        private final static Logger logger = LoggerFactory.getLogger(UniprotProxySequenceReader.class);
071
072        /*
073         * Taken from http://www.uniprot.org/help/accession_numbers
074         */
075        private static final String SPID_PATTERN = "[OPQ][0-9][A-Z0-9]{3}[0-9]";
076        private static final String TREMBLID_PATTERN = "[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}";
077        public static final Pattern UP_AC_PATTERN = Pattern.compile("(" + SPID_PATTERN + "|" + TREMBLID_PATTERN + ")");
078
079        private static String uniprotbaseURL = "http://www.uniprot.org"; //"http://pir.uniprot.org";
080        private static String uniprotDirectoryCache = null;
081        private String sequence;
082        private CompoundSet<C> compoundSet;
083        private List<C> parsedCompounds = new ArrayList<C>();
084        Document uniprotDoc;
085
086        /**
087         * The UniProt id is used to retrieve the UniProt XML which is then parsed as a DOM object
088         * so we know everything about the protein. If an error occurs throw an exception. We could
089         * have a bad uniprot id or network error
090         * @param accession
091         * @param compoundSet
092         * @throws CompoundNotFoundException
093         * @throws IOException if problems while reading the UniProt XML
094         */
095        public UniprotProxySequenceReader(String accession, CompoundSet<C> compoundSet) throws CompoundNotFoundException, IOException {
096                if (!UP_AC_PATTERN.matcher(accession.toUpperCase()).matches()) {
097                        throw new IllegalArgumentException("Accession provided " + accession + " doesn't comply with the uniprot acession pattern.");
098                }
099                setCompoundSet(compoundSet);
100                uniprotDoc = this.getUniprotXML(accession);
101                String seq = this.getSequence(uniprotDoc);
102                setContents(seq);
103        }
104
105        /**
106         * The xml is passed in as a DOM object so we know everything about the protein.
107         *  If an error occurs throw an exception. We could have a bad uniprot id
108         * @param document
109         * @param compoundSet
110         * @throws CompoundNotFoundException
111         */
112        public UniprotProxySequenceReader(Document document, CompoundSet<C> compoundSet) throws CompoundNotFoundException {
113                setCompoundSet(compoundSet);
114                uniprotDoc = document;
115                String seq = this.getSequence(uniprotDoc);
116                setContents(seq);
117        }
118        /**
119         * The passed in xml is parsed as a DOM object so we know everything about the protein.
120         *  If an error occurs throw an exception. We could have a bad uniprot id
121         * @param xml
122         * @param compoundSet
123         * @return UniprotProxySequenceReader
124         * @throws Exception
125         */
126        public static <C extends Compound> UniprotProxySequenceReader<C> parseUniprotXMLString(String xml, CompoundSet<C> compoundSet) {
127                try {
128                        Document document = XMLHelper.inputStreamToDocument(new ByteArrayInputStream(xml.getBytes()));
129                        return new UniprotProxySequenceReader<C>(document, compoundSet);
130                } catch (Exception e) {
131                        logger.error("Exception on xml parse of: {}", xml);
132                }
133                return null;
134        }
135
136        @Override
137        public void setCompoundSet(CompoundSet<C> compoundSet) {
138                this.compoundSet = compoundSet;
139        }
140
141        /**
142         * Once the sequence is retrieved set the contents and make sure everything this is valid
143         * @param sequence
144         * @throws CompoundNotFoundException
145         */
146        @Override
147        public void setContents(String sequence) throws CompoundNotFoundException {
148                // Horrendously inefficient - pretty much the way the old BJ did things.
149                // TODO Should be optimised.
150                this.sequence = sequence;
151                this.parsedCompounds.clear();
152                for (int i = 0; i < sequence.length();) {
153                        String compoundStr = null;
154                        C compound = null;
155                        for (int compoundStrLength = 1; compound == null && compoundStrLength <= compoundSet.getMaxSingleCompoundStringLength(); compoundStrLength++) {
156                                compoundStr = sequence.substring(i, i + compoundStrLength);
157                                compound = compoundSet.getCompoundForString(compoundStr);
158                        }
159                        if (compound == null) {
160                                throw new CompoundNotFoundException("Compound "+compoundStr+" not found");
161                        } else {
162                                i += compoundStr.length();
163                        }
164                        this.parsedCompounds.add(compound);
165                }
166        }
167
168        /**
169         * The sequence length
170         * @return
171         */
172        @Override
173        public int getLength() {
174                return this.parsedCompounds.size();
175        }
176
177        /**
178         *
179         * @param position
180         * @return
181         */
182        @Override
183        public C getCompoundAt(int position) {
184                return this.parsedCompounds.get(position - 1);
185        }
186
187        /**
188         *
189         * @param compound
190         * @return
191         */
192        @Override
193        public int getIndexOf(C compound) {
194                return this.parsedCompounds.indexOf(compound) + 1;
195        }
196
197        /**
198         *
199         * @param compound
200         * @return
201         */
202        @Override
203        public int getLastIndexOf(C compound) {
204                return this.parsedCompounds.lastIndexOf(compound) + 1;
205        }
206
207        /**
208         *
209         * @return
210         */
211        @Override
212        public String toString() {
213                return getSequenceAsString();
214        }
215
216        /**
217         *
218         * @return
219         */
220        @Override
221        public String getSequenceAsString() {
222                return sequence;
223        }
224
225        /**
226         *
227         * @return
228         */
229        @Override
230        public List<C> getAsList() {
231                return this.parsedCompounds;
232        }
233
234        @Override
235        public boolean equals(Object o){
236
237                if(! Equals.classEqual(this, o)) {
238                        return false;
239                }
240
241                Sequence<C> other = (Sequence<C>)o;
242                if ( other.getCompoundSet() != getCompoundSet())
243                        return false;
244
245                List<C> rawCompounds = getAsList();
246                List<C> otherCompounds = other.getAsList();
247
248                if ( rawCompounds.size() != otherCompounds.size())
249                        return false;
250
251                for (int i = 0 ; i < rawCompounds.size() ; i++){
252                        Compound myCompound = rawCompounds.get(i);
253                        Compound otherCompound = otherCompounds.get(i);
254                        if ( ! myCompound.equalsIgnoreCase(otherCompound))
255                                return false;
256                }
257                return true;
258        }
259
260        @Override
261        public int hashCode(){
262                String s = getSequenceAsString();
263                return s.hashCode();
264        }
265
266        /**
267         *
268         * @return
269         */
270        @Override
271        public SequenceView<C> getInverse() {
272                return SequenceMixin.inverse(this);
273        }
274
275        /**
276         *
277         * @param bioBegin
278         * @param bioEnd
279         * @param strand
280         * @return
281         */
282        public String getSequenceAsString(Integer bioBegin, Integer bioEnd, Strand strand) {
283                SequenceAsStringHelper<C> sequenceAsStringHelper = new SequenceAsStringHelper<C>();
284                return sequenceAsStringHelper.getSequenceAsString(this.parsedCompounds, compoundSet, bioBegin, bioEnd, strand);
285        }
286
287        /**
288         *
289         * @param bioBegin
290         * @param bioEnd
291         * @return
292         */
293        @Override
294        public SequenceView<C> getSubSequence(final Integer bioBegin, final Integer bioEnd) {
295                return new SequenceProxyView<C>(UniprotProxySequenceReader.this, bioBegin, bioEnd);
296        }
297
298        /**
299         *
300         * @return
301         */
302        @Override
303        public Iterator<C> iterator() {
304                return this.parsedCompounds.iterator();
305        }
306
307        /**
308         *
309         * @return
310         */
311        @Override
312        public CompoundSet<C> getCompoundSet() {
313                return compoundSet;
314        }
315
316        /**
317         *
318         * @return
319         */
320        @Override
321        public AccessionID getAccession() {
322                AccessionID accessionID = new AccessionID();
323                if (uniprotDoc == null) {
324                        return accessionID;
325                }
326                try {
327                        Element uniprotElement = uniprotDoc.getDocumentElement();
328                        Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
329                        Element nameElement = XMLHelper.selectSingleElement(entryElement, "name");
330                        accessionID = new AccessionID(nameElement.getTextContent(), DataSource.UNIPROT);
331                } catch (XPathExpressionException e) {
332                        logger.error("Exception: ", e);
333                }
334                return accessionID;
335        }
336
337        /**
338         * Pull uniprot accessions associated with this sequence
339         * @return
340         * @throws XPathExpressionException
341         */
342        public ArrayList<AccessionID> getAccessions() throws XPathExpressionException {
343                ArrayList<AccessionID> accessionList = new ArrayList<AccessionID>();
344                if (uniprotDoc == null) {
345                        return accessionList;
346                }
347                Element uniprotElement = uniprotDoc.getDocumentElement();
348                Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
349                ArrayList<Element> keyWordElementList = XMLHelper.selectElements(entryElement, "accession");
350                for (Element element : keyWordElementList) {
351                        AccessionID accessionID = new AccessionID(element.getTextContent(), DataSource.UNIPROT);
352                        accessionList.add(accessionID);
353                }
354
355                return accessionList;
356        }
357
358        /**
359         * Pull uniprot protein aliases associated with this sequence
360         * Provided for backwards compatibility now that we support both
361         * gene and protein aliases via separate methods.
362         * @return
363         * @throws XPathExpressionException
364         */
365        public ArrayList<String> getAliases() throws XPathExpressionException {
366
367                return getProteinAliases();
368        }
369        /**
370         * Pull uniprot protein aliases associated with this sequence
371         * @return
372         * @throws XPathExpressionException
373         */
374        public ArrayList<String> getProteinAliases() throws XPathExpressionException {
375                ArrayList<String> aliasList = new ArrayList<String>();
376                if (uniprotDoc == null) {
377                        return aliasList;
378                }
379                Element uniprotElement = uniprotDoc.getDocumentElement();
380                Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
381                Element proteinElement = XMLHelper.selectSingleElement(entryElement, "protein");
382                ArrayList<Element> keyWordElementList = XMLHelper.selectElements(proteinElement, "alternativeName");
383                for (Element element : keyWordElementList) {
384                        Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
385                        aliasList.add(fullNameElement.getTextContent());
386                        Element shortNameElement = XMLHelper.selectSingleElement(element, "shortName");
387                        if(null != shortNameElement) {
388                                String shortName = shortNameElement.getTextContent();
389                                if(null != shortName && !shortName.trim().isEmpty()) {
390                                        aliasList.add(shortName);
391                                }
392                        }
393                }
394                keyWordElementList = XMLHelper.selectElements(proteinElement, "recommendedName");
395                for (Element element : keyWordElementList) {
396                        Element fullNameElement = XMLHelper.selectSingleElement(element, "fullName");
397                        aliasList.add(fullNameElement.getTextContent());
398                        Element shortNameElement = XMLHelper.selectSingleElement(element, "shortName");
399                        if(null != shortNameElement) {
400                                String shortName = shortNameElement.getTextContent();
401                                if(null != shortName && !shortName.trim().isEmpty()) {
402                                        aliasList.add(shortName);
403                                }
404                        }
405                }
406                Element cdAntigen = XMLHelper.selectSingleElement(proteinElement, "cdAntigenName");
407                if(null != cdAntigen) {
408                        String cdAntigenName = cdAntigen.getTextContent();
409                        if(null != cdAntigenName && !cdAntigenName.trim().isEmpty()) {
410                                aliasList.add(cdAntigenName);
411                        }
412                }
413
414                return aliasList;
415        }
416
417        /**
418         * Pull uniprot gene aliases associated with this sequence
419         * @return
420         * @throws XPathExpressionException
421         */
422        public ArrayList<String> getGeneAliases() throws XPathExpressionException {
423                ArrayList<String> aliasList = new ArrayList<String>();
424                if (uniprotDoc == null) {
425                        return aliasList;
426                }
427                Element uniprotElement = uniprotDoc.getDocumentElement();
428                Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
429                ArrayList<Element> proteinElements = XMLHelper.selectElements(entryElement, "gene");
430                for(Element proteinElement : proteinElements) {
431                        ArrayList<Element> keyWordElementList = XMLHelper.selectElements(proteinElement, "name");
432                        for (Element element : keyWordElementList) {
433                                aliasList.add(element.getTextContent());
434                        }
435                }
436                return aliasList;
437        }
438
439        /**
440         *
441         * @param compounds
442         * @return
443         */
444        @Override
445        public int countCompounds(C... compounds) {
446                throw new UnsupportedOperationException("Not supported yet.");
447        }
448
449        /**
450         *
451         * @param accession
452         * @return
453         * @throws IOException
454         */
455        private Document getUniprotXML(String accession) throws IOException, CompoundNotFoundException {
456                StringBuilder sb = new StringBuilder();
457                // try in cache
458                if (uniprotDirectoryCache != null && uniprotDirectoryCache.length() > 0) {
459                        sb = fetchFromCache(accession);
460                }
461
462                // http://www.uniprot.org/uniprot/?query=SORBIDRAFT_03g027040&format=xml
463                if (sb.length() == 0) {
464                        String uniprotURL = getUniprotbaseURL() + "/uniprot/" + accession.toUpperCase() + ".xml";
465                        logger.info("Loading: {}", uniprotURL);
466                        sb = fetchUniprotXML(uniprotURL);
467
468                        int index = sb.indexOf("xmlns="); //strip out name space stuff to make it easier on xpath
469                        if (index != -1) {
470                                int lastIndex = sb.indexOf(">", index);
471                                sb.replace(index, lastIndex, "");
472                        }
473                        if (uniprotDirectoryCache != null && uniprotDirectoryCache.length() > 0)
474                                writeCache(sb,accession);
475                }
476
477                logger.info("Load complete");
478                try {
479                        //       logger.debug(sb.toString());
480                        Document document = XMLHelper.inputStreamToDocument(new ByteArrayInputStream(sb.toString().getBytes()));
481                        return document;
482                } catch (SAXException e) {
483                        logger.error("Exception on xml parse of: {}", sb.toString());
484                } catch (ParserConfigurationException e) {
485                        logger.error("Exception on xml parse of: {}", sb.toString());
486                }
487                return null;
488        }
489
490        private void writeCache(StringBuilder sb, String accession) throws IOException {
491                File f = new File(uniprotDirectoryCache + File.separatorChar + accession + ".xml");
492                FileWriter fw = new FileWriter(f);
493                fw.write(sb.toString());
494                fw.close();
495        }
496
497        private StringBuilder fetchUniprotXML(String uniprotURL)
498                        throws IOException, CompoundNotFoundException {
499
500                StringBuilder sb = new StringBuilder();
501                URL uniprot = new URL(uniprotURL);
502                int attempt = 5;
503                List<String> errorCodes = new ArrayList<String>();
504                while(attempt > 0) {
505                        HttpURLConnection uniprotConnection = (HttpURLConnection) uniprot.openConnection();
506                        uniprotConnection.setRequestProperty("User-Agent", "BioJava");
507                        uniprotConnection.connect();
508                        int statusCode = uniprotConnection.getResponseCode();
509                        if (statusCode == 200) {
510                                BufferedReader in = new BufferedReader(
511                                                new InputStreamReader(
512                                                uniprotConnection.getInputStream()));
513                                String inputLine;
514
515                                while ((inputLine = in.readLine()) != null) {
516                                        sb.append(inputLine);
517                                }
518                                in.close();
519                                return sb;
520                        }
521                        attempt--;
522                        errorCodes.add(String.valueOf(statusCode));
523                }
524                throw new RemoteException("Couldn't fetch accession from the url " + uniprotURL + " error codes on 5 attempts are " + errorCodes.toString());
525        }
526
527        /**
528         * @param key
529         * @return A string containing the contents of entry specified by key and if not found returns an empty string
530         * @throws FileNotFoundException
531         * @throws IOException
532         */
533        private StringBuilder fetchFromCache(String key)
534                        throws FileNotFoundException, IOException {
535                int index;
536                File f = new File(uniprotDirectoryCache + File.separatorChar + key + ".xml");
537                StringBuilder sb = new StringBuilder();
538                if (f.exists()) {
539                        FileReader fr = new FileReader(f);
540                        int size = (int) f.length();
541                        char[] data = new char[size];
542                        fr.read(data);
543                        fr.close();
544                        sb.append(data);
545                        index = sb.indexOf("xmlns="); //strip out name space stuff to make it easier on xpath
546                        if (index != -1) {
547                                int lastIndex = sb.indexOf(">", index);
548                                sb.replace(index, lastIndex, "");
549                        }
550                }
551                return sb;
552        }
553
554        /**
555         *
556         * @param uniprotDoc
557         * @return
558         */
559        private String getSequence(Document uniprotDoc)  {
560
561                try {
562                        Element uniprotElement = uniprotDoc.getDocumentElement();
563                        Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
564                        Element sequenceElement = XMLHelper.selectSingleElement(entryElement, "sequence");
565
566                        String seqdata = sequenceElement.getTextContent();
567
568                        return seqdata;
569                } catch (XPathExpressionException e) {
570                        logger.error("Problems while parsing sequence in UniProt XML: {}. Sequence will be blank.", e.getMessage());
571                        return "";
572                }
573        }
574
575        /**
576         * The current UniProt URL to deal with caching issues. www.uniprot.org is load balanced
577         * but you can access pir.uniprot.org directly.
578         * @return the uniprotbaseURL
579         */
580        public static String getUniprotbaseURL() {
581                return uniprotbaseURL;
582        }
583
584        /**
585         * @param aUniprotbaseURL the uniprotbaseURL to set
586         */
587        public static void setUniprotbaseURL(String aUniprotbaseURL) {
588                uniprotbaseURL = aUniprotbaseURL;
589        }
590
591        /**
592         * Local directory cache of XML that can be downloaded
593         * @return the uniprotDirectoryCache
594         */
595        public static String getUniprotDirectoryCache() {
596                return uniprotDirectoryCache;
597        }
598
599        /**
600         * @param aUniprotDirectoryCache the uniprotDirectoryCache to set
601         */
602        public static void setUniprotDirectoryCache(String aUniprotDirectoryCache) {
603                File f = new File(aUniprotDirectoryCache);
604                if (!f.exists()) {
605                        f.mkdirs();
606                }
607                uniprotDirectoryCache = aUniprotDirectoryCache;
608        }
609
610        public static void main(String[] args) {
611
612                try {
613                        UniprotProxySequenceReader<AminoAcidCompound> uniprotSequence = new UniprotProxySequenceReader<AminoAcidCompound>("YA745_GIBZE", AminoAcidCompoundSet.getAminoAcidCompoundSet());
614                        ProteinSequence proteinSequence = new ProteinSequence(uniprotSequence);
615                        logger.info("Accession: {}", proteinSequence.getAccession().getID());
616                        logger.info("Sequence: {}", proteinSequence.getSequenceAsString());
617                } catch (Exception e) {
618                        logger.error("Exception: ", e);
619                }
620
621        }
622
623        /**
624         * Get the gene name associated with this sequence.
625         * @return
626         */
627        public String getGeneName() {
628                if (uniprotDoc == null) {
629                        return "";
630                }
631                try {
632                        Element uniprotElement = uniprotDoc.getDocumentElement();
633                        Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
634                        Element geneElement = XMLHelper.selectSingleElement(entryElement, "gene");
635                        if (geneElement == null) {
636                                return "";
637                        }
638                        Element nameElement = XMLHelper.selectSingleElement(geneElement, "name");
639                        if (nameElement == null) {
640                                return "";
641                        }
642                        return nameElement.getTextContent();
643                } catch (XPathExpressionException e) {
644                        logger.error("Problems while parsing gene name in UniProt XML: {}. Gene name will be blank.",e.getMessage());
645                        return "";
646                }
647        }
648
649        /**
650         * Get the organism name assigned to this sequence
651         * @return
652         */
653        public String getOrganismName() {
654                if (uniprotDoc == null) {
655                        return "";
656                }
657                try {
658                        Element uniprotElement = uniprotDoc.getDocumentElement();
659                        Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
660                        Element organismElement = XMLHelper.selectSingleElement(entryElement, "organism");
661                        if (organismElement == null) {
662                                return "";
663                        }
664                        Element nameElement = XMLHelper.selectSingleElement(organismElement, "name");
665                        if (nameElement == null) {
666                                return "";
667                        }
668                        return nameElement.getTextContent();
669                } catch (XPathExpressionException e) {
670                        logger.error("Problems while parsing organism name in UniProt XML: {}. Organism name will be blank.",e.getMessage());
671                        return "";
672                }
673
674        }
675
676        /**
677         * Pull UniProt key words which is a mixed bag of words associated with this sequence
678         * @return
679         */
680        @Override
681        public ArrayList<String> getKeyWords() {
682                ArrayList<String> keyWordsList = new ArrayList<String>();
683                if (uniprotDoc == null) {
684                        return keyWordsList;
685                }
686                try {
687                        Element uniprotElement = uniprotDoc.getDocumentElement();
688
689                        Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
690                        ArrayList<Element> keyWordElementList = XMLHelper.selectElements(entryElement, "keyword");
691                        for (Element element : keyWordElementList) {
692                                keyWordsList.add(element.getTextContent());
693                        }
694                } catch (XPathExpressionException e) {
695                        logger.error("Problems while parsing keywords in UniProt XML: {}. No keywords will be available.",e.getMessage());
696                        return new ArrayList<String>();
697                }
698
699                return keyWordsList;
700        }
701
702        /**
703         * The Uniprot mappings to other database identifiers for this sequence
704         * @return
705         */
706        @Override
707        public LinkedHashMap<String, ArrayList<DBReferenceInfo>> getDatabaseReferences()  {
708                LinkedHashMap<String, ArrayList<DBReferenceInfo>> databaseReferencesHashMap = new LinkedHashMap<String, ArrayList<DBReferenceInfo>>();
709                if (uniprotDoc == null) {
710                        return databaseReferencesHashMap;
711                }
712
713                try {
714                        Element uniprotElement = uniprotDoc.getDocumentElement();
715                        Element entryElement = XMLHelper.selectSingleElement(uniprotElement, "entry");
716                        ArrayList<Element> dbreferenceElementList = XMLHelper.selectElements(entryElement, "dbReference");
717                        for (Element element : dbreferenceElementList) {
718                                String type = element.getAttribute("type");
719                                String id = element.getAttribute("id");
720                                ArrayList<DBReferenceInfo> idlist = databaseReferencesHashMap.get(type);
721                                if (idlist == null) {
722                                        idlist = new ArrayList<DBReferenceInfo>();
723                                        databaseReferencesHashMap.put(type, idlist);
724                                }
725                                DBReferenceInfo dbreferenceInfo = new DBReferenceInfo(type, id);
726                                ArrayList<Element> propertyElementList = XMLHelper.selectElements(element, "property");
727                                for (Element propertyElement : propertyElementList) {
728                                        String propertyType = propertyElement.getAttribute("type");
729                                        String propertyValue = propertyElement.getAttribute("value");
730                                        dbreferenceInfo.addProperty(propertyType, propertyValue);
731                                }
732
733                                idlist.add(dbreferenceInfo);
734                        }
735                } catch (XPathExpressionException e) {
736                        logger.error("Problems while parsing db references in UniProt XML: {}. No db references will be available.",e.getMessage());
737                        return new LinkedHashMap<String, ArrayList<DBReferenceInfo>>();
738                }
739
740                return databaseReferencesHashMap;
741        }
742}