001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on May 23, 2010 021 * 022 */ 023package org.biojava.nbio.structure.io.mmcif; 024 025import org.biojava.nbio.core.util.SoftHashMap; 026import org.biojava.nbio.structure.AminoAcid; 027import org.biojava.nbio.structure.AminoAcidImpl; 028import org.biojava.nbio.structure.Group; 029import org.biojava.nbio.structure.HetatomImpl; 030import org.biojava.nbio.structure.NucleotideImpl; 031import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 032import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036 037public class ChemCompGroupFactory { 038 039 private static final Logger logger = LoggerFactory.getLogger(ChemCompGroupFactory.class); 040 041 private static ChemCompProvider chemCompProvider = new DownloadChemCompProvider(); 042 043 private static SoftHashMap<String, ChemComp> cache = new SoftHashMap<String, ChemComp>(0); 044 045 public static ChemComp getChemComp(String recordName){ 046 047 recordName = recordName.toUpperCase().trim(); 048 049 // we are using the cache, to avoid hitting the file system too often. 050 ChemComp cc = cache.get(recordName); 051 if ( cc != null) { 052 logger.debug("Chem comp "+cc.getThree_letter_code()+" read from cache"); 053 return cc; 054 } 055 056 // not cached, get the chem comp from the provider 057 logger.debug("Chem comp "+recordName+" read from provider "+chemCompProvider.getClass().getCanonicalName()); 058 cc = chemCompProvider.getChemComp(recordName); 059 060 // Note that this also caches null or empty responses 061 cache.put(recordName, cc); 062 return cc; 063 } 064 065 /** 066 * The new ChemCompProvider will be set in the static variable, 067 * so this provider will be used from now on until it is changed 068 * again. Note that this change can have unexpected behavior of 069 * code executed afterwards. 070 * <p> 071 * Changing the provider also resets the cache, so any groups 072 * previously accessed will be reread or re-downloaded. 073 * 074 * @param provider 075 */ 076 public static void setChemCompProvider(ChemCompProvider provider) { 077 logger.debug("Setting new chem comp provider to "+provider.getClass().getCanonicalName()); 078 chemCompProvider = provider; 079 // clear cache 080 cache.clear(); 081 } 082 083 public static ChemCompProvider getChemCompProvider(){ 084 return chemCompProvider; 085 } 086 087 /** 088 * Force the in-memory cache to be reset. 089 * 090 * Note that the ChemCompProvider may have additional memory or disk caches that need to be cleared too. 091 */ 092 public static void clearCache() { 093 cache.clear(); 094 } 095 096 public static Group getGroupFromChemCompDictionary(String recordName) { 097 098 // make sure we work with upper case records 099 recordName = recordName.toUpperCase().trim(); 100 101 Group g = null; 102 103 104 ChemComp cc = getChemComp(recordName); 105 106 if ( cc == null) 107 return null; 108 109 if ( PolymerType.PROTEIN_ONLY.contains( cc.getPolymerType() ) ){ 110 AminoAcid aa = new AminoAcidImpl(); 111 112 String one_letter = cc.getOne_letter_code(); 113 if ( one_letter == null || one_letter.equals("X") || one_letter.equals("?") || one_letter.length()==0){ 114 String parent = cc.getMon_nstd_parent_comp_id(); 115 if ( parent != null && parent.length() == 3){ 116 String parentid = cc.getMon_nstd_parent_comp_id() ; 117 ChemComp parentCC = getChemComp(parentid); 118 one_letter = parentCC.getOne_letter_code(); 119 } 120 } 121 122 if ( one_letter == null || one_letter.length()==0 || one_letter.equals("?")) { 123 // e.g. problem with PRR, which probably should have a parent of ALA, but as of 20110127 does not. 124 logger.warn("Problem with chemical component: " + recordName + " Did not find one letter code! Setting it to 'X'"); 125 aa.setAminoType('X'); 126 127 } else { 128 aa.setAminoType(one_letter.charAt(0)); 129 } 130 131 132 g = aa; 133 } else if ( PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) { 134 NucleotideImpl nuc = new NucleotideImpl(); 135 136 g = nuc; 137 138 139 } else { 140 141 g = new HetatomImpl(); 142 } 143 144 g.setChemComp(cc); 145 146 147 return g; 148 } 149 150 151 public static String getOneLetterCode(ChemComp cc){ 152 String oneLetter = cc.getOne_letter_code(); 153 if ( oneLetter == null || oneLetter.equals("X") || oneLetter.equals("?")) { 154 String parentId = cc.getMon_nstd_parent_comp_id() ; 155 if ( parentId == null) 156 return oneLetter; 157 // cases like OIM have multiple parents (comma separated), we shouldn't try grab a chemcomp for those strings 158 if (parentId.length()>3) 159 return oneLetter; 160 ChemComp parentCC = ChemCompGroupFactory.getChemComp(parentId); 161 if ( parentCC == null) 162 return oneLetter; 163 oneLetter = parentCC.getOne_letter_code(); 164 } 165 return oneLetter; 166 } 167 168}