001package org.biojava.nbio.structure.chem; 002 003import org.biojava.nbio.core.util.SoftHashMap; 004import org.biojava.nbio.structure.AminoAcid; 005import org.biojava.nbio.structure.AminoAcidImpl; 006import org.biojava.nbio.structure.Group; 007import org.biojava.nbio.structure.HetatomImpl; 008import org.biojava.nbio.structure.NucleotideImpl; 009import org.slf4j.Logger; 010import org.slf4j.LoggerFactory; 011 012public class ChemCompGroupFactory { 013 private static final Logger logger = LoggerFactory.getLogger(ChemCompGroupFactory.class); 014 private static ChemCompProvider chemCompProvider = new DownloadChemCompProvider(); 015 private static final SoftHashMap<String, ChemComp> cache = new SoftHashMap<>(0); 016 017 public static ChemComp getChemComp(String recordName) { 018 recordName = recordName.toUpperCase().trim(); 019 020 // we are using the cache, to avoid hitting the file system too often. 021 ChemComp cc = cache.get(recordName); 022 if (cc != null) { 023 logger.debug("Chem comp {} read from cache", cc.getThreeLetterCode()); 024 return cc; 025 } 026 027 // not cached, get the chem comp from the provider 028 logger.debug("Chem comp {} read from provider {}", recordName, chemCompProvider.getClass().getCanonicalName()); 029 cc = chemCompProvider.getChemComp(recordName); 030 031 // Note that this also caches null or empty responses 032 cache.put(recordName, cc); 033 return cc; 034 } 035 036 /** 037 * The new ChemCompProvider will be set in the static variable, 038 * so this provider will be used from now on until it is changed 039 * again. Note that this change can have unexpected behavior of 040 * code executed afterwards. 041 * <p> 042 * Changing the provider also resets the cache, so any groups 043 * previously accessed will be reread or re-downloaded. 044 * 045 * @param provider 046 */ 047 public static void setChemCompProvider(ChemCompProvider provider) { 048 logger.debug("Setting new chem comp provider to {}", provider.getClass().getCanonicalName()); 049 chemCompProvider = provider; 050 // clear cache 051 cache.clear(); 052 } 053 054 public static ChemCompProvider getChemCompProvider(){ 055 return chemCompProvider; 056 } 057 058 /** 059 * Force the in-memory cache to be reset. 060 * 061 * Note that the ChemCompProvider may have additional memory or disk caches that need to be cleared too. 062 */ 063 public static void clearCache() { 064 cache.clear(); 065 } 066 067 public static Group getGroupFromChemCompDictionary(String recordName) { 068 // make sure we work with upper case records 069 recordName = recordName.toUpperCase().trim(); 070 Group g; 071 ChemComp cc = getChemComp(recordName); 072 073 if (cc == null) { 074 return null; 075 } 076 077 if (PolymerType.PROTEIN_ONLY.contains(cc.getPolymerType())) { 078 AminoAcid aa = new AminoAcidImpl(); 079 080 String one_letter = cc.getOneLetterCode(); 081 if (one_letter == null || "X".equals(one_letter) || "?".equals(one_letter) || one_letter.length() == 0) { 082 String parent = cc.getMonNstdParentCompId(); 083 if (parent != null && parent.length() == 3) { 084 String parentid = cc.getMonNstdParentCompId(); 085 ChemComp parentCC = getChemComp(parentid); 086 one_letter = parentCC.getOneLetterCode(); 087 } 088 } 089 090 if (one_letter == null || one_letter.length() == 0 || "?".equals(one_letter)) { 091 // e.g. problem with PRR, which probably should have a parent of ALA, but as of 20110127 does not. 092 logger.warn("Problem with chemical component: {} Did not find one letter code! Setting it to 'X'", 093 recordName); 094 aa.setAminoType('X'); 095 } else { 096 aa.setAminoType(one_letter.charAt(0)); 097 } 098 099 g = aa; 100 } else if (PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) { 101 g = new NucleotideImpl(); 102 } else { 103 g = new HetatomImpl(); 104 } 105 106 g.setChemComp(cc); 107 return g; 108 } 109 110 public static String getOneLetterCode(ChemComp cc) { 111 String oneLetter = cc.getOneLetterCode(); 112 if (oneLetter == null || "X".equals(oneLetter) || "?".equals(oneLetter)) { 113 String parentId = cc.getMonNstdParentCompId(); 114 if (parentId == null) { 115 return oneLetter; 116 } 117 // cases like OIM have multiple parents (comma separated), we shouldn't try grab a chemcomp for those strings 118 if (parentId.length() > 3) { 119 return oneLetter; 120 } 121 ChemComp parentCC = ChemCompGroupFactory.getChemComp(parentId); 122 if (parentCC == null) { 123 return oneLetter; 124 } 125 oneLetter = parentCC.getOneLetterCode(); 126 } 127 return oneLetter; 128 } 129}