001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on May 23, 2010 021 * 022 */ 023package org.biojava.nbio.structure.io.mmcif; 024 025import org.biojava.nbio.core.util.SoftHashMap; 026import org.biojava.nbio.structure.AminoAcid; 027import org.biojava.nbio.structure.AminoAcidImpl; 028import org.biojava.nbio.structure.Group; 029import org.biojava.nbio.structure.HetatomImpl; 030import org.biojava.nbio.structure.NucleotideImpl; 031import org.biojava.nbio.structure.io.mmcif.chem.PolymerType; 032import org.biojava.nbio.structure.io.mmcif.model.ChemComp; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036 037public class ChemCompGroupFactory { 038 039 private static final Logger logger = LoggerFactory.getLogger(ChemCompGroupFactory.class); 040 041 private static ChemCompProvider chemCompProvider = new DownloadChemCompProvider(); 042 043 private static SoftHashMap<String, ChemComp> cache = new SoftHashMap<String, ChemComp>(0); 044 045 public static ChemComp getChemComp(String recordName){ 046 047 recordName = recordName.toUpperCase().trim(); 048 049 // we are using the cache, to avoid hitting the file system too often. 050 ChemComp cc = cache.get(recordName); 051 if ( cc != null) { 052 logger.debug("Chem comp "+cc.getThree_letter_code()+" read from cache"); 053 return cc; 054 } 055 056 // not cached, get the chem comp from the provider 057 logger.debug("Chem comp "+recordName+" read from provider "+chemCompProvider.getClass().getCanonicalName()); 058 cc = chemCompProvider.getChemComp(recordName); 059 060 // Note that this also caches null or empty responses 061 cache.put(recordName, cc); 062 return cc; 063 } 064 065 /** 066 * The new ChemCompProvider will be set in the static variable, 067 * so this provider will be used from now on until it is changed 068 * again. Note that this change can have unexpected behavior of 069 * code executed afterwards. 070 * <p> 071 * Changing the provider does not reset the cache, so Chemical 072 * Component definitions already downloaded from previous providers 073 * will be used. To reset the cache see {@link #getCache()). 074 * 075 * @param provider 076 */ 077 public static void setChemCompProvider(ChemCompProvider provider) { 078 logger.debug("Setting new chem comp provider to "+provider.getClass().getCanonicalName()); 079 chemCompProvider = provider; 080 // clear cache 081 cache.clear(); 082 } 083 084 public static ChemCompProvider getChemCompProvider(){ 085 return chemCompProvider; 086 } 087 088 public static Group getGroupFromChemCompDictionary(String recordName) { 089 090 // make sure we work with upper case records 091 recordName = recordName.toUpperCase().trim(); 092 093 Group g = null; 094 095 096 ChemComp cc = getChemComp(recordName); 097 098 if ( cc == null) 099 return null; 100 101 if ( PolymerType.PROTEIN_ONLY.contains( cc.getPolymerType() ) ){ 102 AminoAcid aa = new AminoAcidImpl(); 103 104 String one_letter = cc.getOne_letter_code(); 105 if ( one_letter == null || one_letter.equals("X") || one_letter.equals("?") || one_letter.length()==0){ 106 String parent = cc.getMon_nstd_parent_comp_id(); 107 if ( parent != null && parent.length() == 3){ 108 String parentid = cc.getMon_nstd_parent_comp_id() ; 109 ChemComp parentCC = getChemComp(parentid); 110 one_letter = parentCC.getOne_letter_code(); 111 } 112 } 113 114 if ( one_letter == null || one_letter.length()==0 || one_letter.equals("?")) { 115 // e.g. problem with PRR, which probably should have a parent of ALA, but as of 20110127 does not. 116 logger.warn("Problem with chemical component: " + recordName + " Did not find one letter code! Setting it to 'X'"); 117 aa.setAminoType('X'); 118 119 } else { 120 aa.setAminoType(one_letter.charAt(0)); 121 } 122 123 124 g = aa; 125 } else if ( PolymerType.POLYNUCLEOTIDE_ONLY.contains(cc.getPolymerType())) { 126 NucleotideImpl nuc = new NucleotideImpl(); 127 128 g = nuc; 129 130 131 } else { 132 133 g = new HetatomImpl(); 134 } 135 136 g.setChemComp(cc); 137 138 139 return g; 140 } 141 142 143 public static String getOneLetterCode(ChemComp cc){ 144 String oneLetter = cc.getOne_letter_code(); 145 if ( oneLetter == null || oneLetter.equals("X") || oneLetter.equals("?")) { 146 String parentId = cc.getMon_nstd_parent_comp_id() ; 147 if ( parentId == null) 148 return oneLetter; 149 // cases like OIM have multiple parents (comma separated), we shouldn't try grab a chemcomp for those strings 150 if (parentId.length()>3) 151 return oneLetter; 152 ChemComp parentCC = ChemCompGroupFactory.getChemComp(parentId); 153 if ( parentCC == null) 154 return oneLetter; 155 oneLetter = parentCC.getOne_letter_code(); 156 } 157 return oneLetter; 158 } 159 160}