001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.structure.align.xml; 022 023 024import org.biojava.nbio.structure.*; 025import org.biojava.nbio.structure.align.ce.CeCPMain; 026import org.biojava.nbio.structure.align.model.AFP; 027import org.biojava.nbio.structure.align.model.AFPChain; 028import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay; 029import org.biojava.nbio.structure.jama.Matrix; 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032import org.w3c.dom.Document; 033import org.w3c.dom.NamedNodeMap; 034import org.w3c.dom.Node; 035import org.w3c.dom.NodeList; 036import org.xml.sax.InputSource; 037import org.xml.sax.SAXException; 038 039import javax.xml.parsers.DocumentBuilder; 040import javax.xml.parsers.DocumentBuilderFactory; 041import javax.xml.parsers.ParserConfigurationException; 042import java.io.IOException; 043import java.io.StringReader; 044import java.util.ArrayList; 045import java.util.List; 046 047//http://www.developerfusion.com/code/2064/a-simple-way-to-read-an-xml-file-in-java/ 048 049public class AFPChainXMLParser 050{ 051 052 private static final Logger logger = LoggerFactory.getLogger(AFPChainXMLParser.class); 053 public static final String DEFAULT_ALGORITHM_NAME = "jFatCat_rigid"; 054 055 /** new utility method that checks that the order of the pair in the XML alignment is correct and flips the direction if needed 056 * 057 * @param xml 058 * @param name1 059 * @param name1 060 * @param ca1 061 * @param ca2 062 * @return 063 */ 064 public static AFPChain fromXML(String xml, String name1, String name2, Atom[] ca1, Atom[] ca2) throws IOException, StructureException{ 065 AFPChain[] afps = parseMultiXML( xml); 066 if ( afps.length > 0 ) { 067 068 AFPChain afpChain = afps[0]; 069 070 String n1 = afpChain.getName1(); 071 String n2 = afpChain.getName2(); 072 073 if ( n1 == null ) 074 n1 = ""; 075 if ( n2 == null) 076 n2 = ""; 077 078 //System.out.println("from AFPCHAIN: " + n1 + " " + n2); 079 if ( n1.equals(name2) && n2.equals(name1)){ 080 // flipped order 081 //System.out.println("AfpChain in wrong order, flipping..."); 082 afpChain = AFPChainFlipper.flipChain(afpChain); 083 } 084 rebuildAFPChain(afpChain, ca1, ca2); 085 086 return afpChain; 087 } 088 return null; 089 090 } 091 092 public static AFPChain fromXML(String xml, Atom[] ca1, Atom[] ca2) throws IOException 093 { 094 AFPChain[] afps = parseMultiXML( xml); 095 if ( afps.length > 0 ) { 096 097 AFPChain afpChain = afps[0]; 098 rebuildAFPChain(afpChain, ca1, ca2); 099 100 return afpChain; 101 } 102 return null; 103 } 104 105 /** returns true if the alignment XML contains an error message 106 * 107 * @param xml 108 * @return flag if there was an Error while processing the alignment. 109 */ 110 public static boolean isErrorXML(String xml){ 111 112 if ( xml.contains("error=\"")) 113 return true; 114 115 return false; 116 117 118 } 119 120 /** Takes an XML representation of the alignment and flips the positions of name1 and name2 121 * 122 * @param xml String representing the alignment 123 * @return XML representation of the flipped alignment 124 */ 125 public static String flipAlignment(String xml) throws IOException,StructureException{ 126 AFPChain[] afps = parseMultiXML( xml); 127 if ( afps.length < 1 ) 128 return null; 129 130 if ( afps.length == 1) { 131 AFPChain newChain = AFPChainFlipper.flipChain(afps[0]); 132 if ( newChain.getAlgorithmName() == null) { 133 newChain.setAlgorithmName(DEFAULT_ALGORITHM_NAME); 134 } 135 return AFPChainXMLConverter.toXML(newChain); 136 } 137 throw new StructureException("not Implemented yet!"); 138 } 139 140 141 /** replace the PDB res nums with atom positions: 142 * 143 * @param afpChain 144 * @param ca1 145 * @param ca2 146 */ 147 public static void rebuildAFPChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2){ 148 149 if ( afpChain.getAlgorithmName() == null) { 150 afpChain.setAlgorithmName(DEFAULT_ALGORITHM_NAME); 151 } 152 if ( afpChain.getVersion() == null){ 153 afpChain.setVersion("1.0"); 154 } 155 156 int blockNum = afpChain.getBlockNum(); 157 int ca1Length = afpChain.getCa1Length(); 158 int ca2Length = afpChain.getCa2Length(); 159 160 int minLength = Math.min(ca1Length, ca2Length); 161 int[][][] optAln = new int[blockNum][2][minLength]; 162 163 int[][][] blockResList = afpChain.getBlockResList(); 164 if ( blockResList == null){ 165 blockResList = new int[blockNum][2][minLength]; 166 } 167 int[] optLen = afpChain.getOptLen(); 168 169 String[][][] pdbAln = afpChain.getPdbAln(); 170 int[] verifiedOptLen = null; 171 if ( optLen != null) 172 verifiedOptLen = afpChain.getOptLen().clone(); 173 else { 174 logger.warn("did not find optimal alignment, building up empty alignment."); 175 optLen = new int[1]; 176 optLen[0] = 0; 177 } 178 for (int blockNr = 0 ; blockNr < blockNum ; blockNr++){ 179 180 //System.out.println("got block " + blockNr + " size: " + optLen[blockNr]); 181 int verifiedEQR = -1; 182 for ( int eqrNr = 0 ; eqrNr < optLen[blockNr] ; eqrNr++ ){ 183 String pdbResnum1 = pdbAln[blockNr][0][eqrNr]; 184 String pdbResnum2 = pdbAln[blockNr][1][eqrNr]; 185 186 //System.out.println(blockNr + " " + eqrNr + " got resnum: " + pdbResnum1 + " " + pdbResnum2); 187 String[] spl1 = pdbResnum1.split(":"); 188 String[] spl2 = pdbResnum2.split(":"); 189 190 String chain1 = spl1[0]; 191 String pdbres1 = spl1[1]; 192 193 String chain2 = spl2[0]; 194 String pdbres2 = spl2[1]; 195 196 int pos1 = getPositionForPDBresunm(pdbres1,chain1,ca1); 197 int pos2 = getPositionForPDBresunm(pdbres2,chain2,ca2); 198 199 if ( pos1 == -1 || pos2 == -1 ){ 200 // this can happen when parsing old files that contained Calcium atoms... 201 logger.warn("pos1: {} (residue {}), pos2: {} (residue {}), should never be -1. Probably parsing an old file.", 202 pos1, pdbResnum1, pos2, pdbResnum2); 203 verifiedOptLen[blockNr]-- ; 204 continue; 205 } 206 207 verifiedEQR++; 208 //System.out.println(blockNr + " " + eqrNr + " " + pos1 + " " + pos2); 209 optAln[blockNr][0][verifiedEQR] = pos1; 210 optAln[blockNr][1][verifiedEQR] = pos2; 211 blockResList[blockNr][0][verifiedEQR] = pos1; 212 blockResList[blockNr][1][verifiedEQR] = pos2; 213 } 214 } 215 216 afpChain.setOptLen(verifiedOptLen); 217 afpChain.setOptAln(optAln); 218 afpChain.setBlockResList(blockResList); 219 // build up alignment image: 220 AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2); 221 222 223 } 224 225 public static AFPChain[] parseMultiXML(String xml) throws IOException { 226 List<AFPChain> afpChains = new ArrayList<AFPChain>(); 227 228 try 229 { 230 //Convert string to XML document 231 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 232 DocumentBuilder db = factory.newDocumentBuilder(); 233 InputSource inStream = new InputSource(); 234 inStream.setCharacterStream(new StringReader(xml)); 235 Document doc = db.parse(inStream); 236 237 // normalize text representation 238 doc.getDocumentElement().normalize(); 239 240 241 //Element rootElement = doc.getDocumentElement(); 242 243 NodeList listOfAFPChains = doc.getElementsByTagName("AFPChain"); 244 //int numArrays = listOfArrays.getLength(); 245 // go over the blocks 246 for(int afpPos=0; afpPos<listOfAFPChains.getLength() ; afpPos++) 247 { 248 249 AFPChain a = new AFPChain(DEFAULT_ALGORITHM_NAME); 250 a.setVersion("1.0"); 251 Node rootElement = listOfAFPChains.item(afpPos); 252 253 a.setName1(getAttribute(rootElement,"name1")); 254 a.setName2(getAttribute(rootElement,"name2")); 255 String algoname = getAttribute(rootElement,"method"); 256 if ( algoname != null) { 257 a.setAlgorithmName(algoname); 258 } 259 String version = getAttribute(rootElement,"version"); 260 if ( version != null) 261 a.setVersion(version); 262 263 a.setAlnLength( Integer.parseInt(getAttribute(rootElement,"alnLength"))); 264 a.setBlockNum( Integer.parseInt(getAttribute(rootElement,"blockNum"))); 265 a.setGapLen( Integer.parseInt(getAttribute(rootElement,"gapLen"))); 266 a.setOptLength( Integer.parseInt(getAttribute(rootElement,"optLength"))); 267 a.setTotalLenIni( Integer.parseInt(getAttribute(rootElement,"totalLenIni"))); 268 a.setBlockNum( Integer.parseInt(getAttribute(rootElement,"blockNum"))); 269 270 if ( a.getAlgorithmName().equals(CeCPMain.algorithmName)){ 271 a.setSequentialAlignment(a.getBlockNum() == 1); 272 } 273 274 a.setAlignScore(Double.parseDouble(getAttribute(rootElement,"alignScore"))); 275 a.setChainRmsd(Double.parseDouble(getAttribute(rootElement,"chainRmsd"))); 276 Double identity = Double.parseDouble(getAttribute(rootElement,"identity")); 277 a.setIdentity(identity); 278 279 a.setNormAlignScore(Double.parseDouble(getAttribute(rootElement,"normAlignScore"))); 280 a.setProbability(Double.parseDouble(getAttribute(rootElement,"probability"))); 281 a.setSimilarity(Double.parseDouble(getAttribute(rootElement,"similarity"))); 282 a.setTotalRmsdIni(Double.parseDouble(getAttribute(rootElement,"totalRmsdIni"))); 283 a.setTotalRmsdOpt(Double.parseDouble(getAttribute(rootElement,"totalRmsdOpt"))); 284 a.setAlignScoreUpdate(Double.parseDouble(getAttribute(rootElement,"alignScoreUpdate"))); 285 int ca1Length = Integer.parseInt(getAttribute(rootElement,"ca1Length")); 286 a.setCa1Length(ca1Length); 287 int ca2Length = Integer.parseInt(getAttribute(rootElement,"ca2Length")); 288 a.setCa2Length(ca2Length); 289 290 String tmScoreS = getAttribute(rootElement,"tmScore"); 291 if ( tmScoreS != null) { 292 Double tmScore = null; 293 try { 294 tmScore = Double.parseDouble(tmScoreS); 295 } catch (Exception e){ 296 } 297 a.setTMScore(tmScore); 298 } 299 300 String calcTimeS = getAttribute(rootElement,"time"); 301 Long calcTime = -1L; 302 if ( calcTimeS != null){ 303 304 try { 305 calcTime = Long.parseLong(calcTimeS); 306 307 } catch (Exception e){ 308 e.printStackTrace(); 309 } 310 } 311 a.setCalculationTime(calcTime); 312 313 Matrix[] ms = new Matrix[a.getBlockNum()]; 314 a.setBlockRotationMatrix(ms); 315 Atom[] blockShiftVector = new Atom[a.getBlockNum()]; 316 a.setBlockShiftVector(blockShiftVector); 317 318 int afpNum = Integer.parseInt(getAttribute(rootElement,"afpNum")); 319 List<AFP> afpSet = new ArrayList<AFP>(); 320 for (int afp=0;afp<afpNum;afp++){ 321 afpSet.add( new AFP()); 322 } 323 324 a.setAfpSet(afpSet); 325 326 int minLength = Math.min(ca1Length, ca2Length); 327 a.setFocusRes1(new int[minLength]); 328 a.setFocusRes2(new int[minLength]); 329 330 331 //NodeList listOfBlocks = doc.getElementsByTagName("block"); 332 NodeList listOfBlocks = rootElement.getChildNodes(); 333 334 //int numArrays = listOfArrays.getLength(); 335 336 // go over the blocks 337 for(int i=0; i<listOfBlocks.getLength() ; i++) 338 { 339 Node block = listOfBlocks.item(i); 340 341 // we only look at blocks. 342 if (! block.getNodeName().equals("block")) 343 continue; 344 345 processBlock(block, a, minLength); 346 347 348 } 349 350 afpChains.add(a); 351 } 352 } 353 354 // TODO these 2 exceptions should be thrown forward, it's not a good idea to catch them so early 355 catch (SAXException e) 356 { 357 Exception x = e.getException (); 358 ((x == null) ? e : x).printStackTrace (); 359 } 360 catch (ParserConfigurationException e) { 361 e.printStackTrace(); 362 } 363 364 return afpChains.toArray(new AFPChain[afpChains.size()]); 365 } 366 367 368 private static void processBlock(Node block, AFPChain a, int minLength){ 369 NodeList valList = block.getChildNodes(); 370 int numChildren = valList.getLength(); 371 372 NamedNodeMap map = block.getAttributes(); 373 374 int blockNum = a.getBlockNum(); 375 376 int[] optLen = a.getOptLen(); 377 if ( optLen == null ) 378 optLen = new int[blockNum]; 379 380 String[][][] pdbAln = a.getPdbAln(); 381 if ( pdbAln == null) 382 pdbAln = new String[blockNum][2][minLength]; 383 384 //int[][][] optAln = new int[blockNum][2][minLength]; 385 int[] blockGap = a.getBlockGap(); 386 if ( blockGap == null ) 387 blockGap = new int[blockNum]; 388 int[] blockSize= a.getBlockSize(); 389 if ( blockSize == null) 390 blockSize = new int[blockNum]; 391 392 double[] blockScore = a.getBlockScore(); 393 if ( blockScore == null) 394 blockScore = new double[blockNum]; 395 double[] blockRmsd = a.getBlockRmsd(); 396 if (blockRmsd == null ) 397 blockRmsd = new double[blockNum]; 398 Matrix[] ms = a.getBlockRotationMatrix(); 399 Atom[] shifts = a.getBlockShiftVector(); 400 401 int blockNr = Integer.parseInt( map.getNamedItem("blockNr").getTextContent()); 402 403 int thisBlockGap = Integer.parseInt(map.getNamedItem("blockGap").getTextContent()); 404 blockGap[blockNr] = thisBlockGap; 405 406 int thisBlockSize = Integer.parseInt(map.getNamedItem("blockSize").getTextContent()); 407 blockSize[blockNr] = thisBlockSize; 408 409 double thisBlockScore = Double.parseDouble(map.getNamedItem("blockScore").getTextContent()); 410 blockScore[blockNr] = thisBlockScore; 411 412 double thisBlockRmsd = Double.parseDouble(map.getNamedItem("blockRmsd").getTextContent()); 413 blockRmsd[blockNr] = thisBlockRmsd; 414 415 416 // parse out the equivalent positions from the file 417 int nrEqr = 0; 418 for ( int e =0; e< numChildren ; e++){ 419 Node eqr = valList.item(e); 420 421 if(!eqr.hasAttributes()) continue; 422 423 424 if ( eqr.getNodeName().equals("eqr")) { 425 nrEqr++; 426 NamedNodeMap atts = eqr.getAttributes(); 427 428 int eqrNr = Integer.parseInt(atts.getNamedItem("eqrNr").getTextContent()); 429 430 String pdbres1 = atts.getNamedItem("pdbres1").getTextContent(); 431 String chain1 = atts.getNamedItem("chain1").getTextContent(); 432 String pdbres2 = atts.getNamedItem("pdbres2").getTextContent(); 433 String chain2 = atts.getNamedItem("chain2").getTextContent(); 434 435 //System.out.println(blockNr + " " + eqrNr + " " + chain1+" " + pdbres1 + ":" + chain2 + " " + pdbres2); 436 437 pdbAln[blockNr][0][eqrNr] = chain1+":"+pdbres1; 438 pdbAln[blockNr][1][eqrNr] = chain2+":"+pdbres2; 439 440 // A WORK AROUND FOR THE PROBLEM THAT WE DON:T HAVE PDBs LOADED AT THIS TIME... 441 442 /* int pos1 = getPositionForPDBresunm(pdbres1,chain1,ca1); 443 int pos2 = getPositionForPDBresunm(pdbres2,chain2,ca2); 444 //System.out.println("settion optAln " + blockNr + " " + eqrNr + " " + pos1); 445 optAln[blockNr][0][eqrNr] = pos1; 446 optAln[blockNr][1][eqrNr] = pos2; 447 */ 448 } else if ( eqr.getNodeName().equals("matrix")){ 449 // process Matrix 450 Matrix m = new Matrix(3,3); 451 452 for (int i =1 ; i <= 3 ; i++){ 453 for (int j =1 ; j <= 3 ; j++){ 454 String att = getAttribute(eqr, "mat" +i + j); 455 double val = Double.parseDouble(att); 456 m.set(i-1,j-1,val); 457 458 } 459 } 460 ms[blockNr] = m; 461 462 } else if ( eqr.getNodeName().equals("shift")){ 463 Atom shift = new AtomImpl(); 464 double x = Double.parseDouble(getAttribute(eqr, "x")); 465 double y = Double.parseDouble(getAttribute(eqr, "y")); 466 double z = Double.parseDouble(getAttribute(eqr, "z")); 467 shift.setX(x); 468 shift.setY(y); 469 shift.setZ(z); 470 shifts[blockNr] = shift; 471 472 } 473 474 } 475 //System.out.println("setting block " + blockNr + " eqr: " + nrEqr); 476 optLen[blockNr] = nrEqr; 477 478 479 480 a.setOptLen(optLen); 481 //a.setOptAln(optAln); 482 a.setPdbAln(pdbAln); 483 a.setBlockGap(blockGap); 484 a.setBlockSize(blockSize); 485 486 a.setBlockScore(blockScore); 487 a.setBlockRmsd(blockRmsd); 488 489 490 491 } 492 493 private static String getAttribute(Node node, String attr){ 494 if( ! node.hasAttributes()) 495 return null; 496 497 NamedNodeMap atts = node.getAttributes(); 498 499 if ( atts == null) 500 return null; 501 502 Node att = atts.getNamedItem(attr); 503 if ( att == null) 504 return null; 505 506 String value = att.getTextContent(); 507 508 return value; 509 510 } 511 512 /** get the position of PDB residue nr X in the ato marray 513 * 514 * @param pdbresnum pdbresidue number 515 * @param authId chain name 516 * @param atoms atom array 517 * @return 518 */ 519 private static int getPositionForPDBresunm(String pdbresnum, String authId , Atom[] atoms){ 520 ResidueNumber residueNumber = ResidueNumber.fromString(pdbresnum); 521 residueNumber.setChainName(authId); 522 523 boolean blankChain = authId == null || authId.equalsIgnoreCase("null") || authId.equals("_"); 524 525 for ( int i =0; i< atoms.length ;i++){ 526 Group g = atoms[i].getGroup(); 527 528 // match _ to any chain 529 if( blankChain ) { 530 residueNumber.setChainName(g.getChain().getName()); 531 } 532 533 //System.out.println(g.getResidueNumber() + "< ? >" + residueNumber +"<"); 534 if ( g.getResidueNumber().equals(residueNumber)){ 535 //System.out.println(g + " == " + residueNumber ); 536 Chain c = g.getChain(); 537 if ( blankChain || c.getName().equals(authId)){ 538 return i; 539 } 540 } 541 } 542 return -1; 543 } 544 545 546 547} 548