001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 */
021package org.biojava.nbio.structure.align.xml;
022
023
024import org.biojava.nbio.structure.*;
025import org.biojava.nbio.structure.align.ce.CeCPMain;
026import org.biojava.nbio.structure.align.model.AFP;
027import org.biojava.nbio.structure.align.model.AFPChain;
028import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
029import org.biojava.nbio.structure.jama.Matrix;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032import org.w3c.dom.Document;
033import org.w3c.dom.NamedNodeMap;
034import org.w3c.dom.Node;
035import org.w3c.dom.NodeList;
036import org.xml.sax.InputSource;
037import org.xml.sax.SAXException;
038
039import javax.xml.parsers.DocumentBuilder;
040import javax.xml.parsers.DocumentBuilderFactory;
041import javax.xml.parsers.ParserConfigurationException;
042import java.io.IOException;
043import java.io.StringReader;
044import java.util.ArrayList;
045import java.util.List;
046
047//http://www.developerfusion.com/code/2064/a-simple-way-to-read-an-xml-file-in-java/
048
049public class AFPChainXMLParser
050{
051
052        private static final Logger logger = LoggerFactory.getLogger(AFPChainXMLParser.class);
053        public static final String DEFAULT_ALGORITHM_NAME = "jFatCat_rigid";
054
055        /** new utility method that checks that the order of the pair in the XML alignment is correct and flips the direction if needed
056         *
057         * @param xml
058         * @param name1
059         * @param name1
060         * @param ca1
061         * @param ca2
062         * @return
063         */
064         public static AFPChain fromXML(String xml, String name1, String name2, Atom[] ca1, Atom[] ca2) throws IOException, StructureException{
065                        AFPChain[] afps = parseMultiXML( xml);
066                        if ( afps.length > 0 ) {
067
068                                AFPChain afpChain = afps[0];
069
070                                String n1 = afpChain.getName1();
071                                String n2 = afpChain.getName2();
072
073                                if ( n1 == null )
074                                        n1 = "";
075                                if ( n2 == null)
076                                        n2 = "";
077
078                                //System.out.println("from AFPCHAIN: " + n1 + " " + n2);
079                                if ( n1.equals(name2) && n2.equals(name1)){
080                                        // flipped order
081                                        //System.out.println("AfpChain in wrong order, flipping...");
082                                        afpChain  = AFPChainFlipper.flipChain(afpChain);
083                                }
084                                rebuildAFPChain(afpChain, ca1, ca2);
085
086                                return afpChain;
087                        }
088                        return null;
089
090         }
091
092        public static AFPChain fromXML(String xml, Atom[] ca1, Atom[] ca2) throws IOException
093        {
094                AFPChain[] afps = parseMultiXML( xml);
095                if ( afps.length > 0 ) {
096
097                        AFPChain afpChain = afps[0];
098                        rebuildAFPChain(afpChain, ca1, ca2);
099
100                        return afpChain;
101                }
102                return null;
103        }
104
105        /** returns true if the alignment XML contains an error message
106         *
107         * @param xml
108         * @return flag if there was an Error while processing the alignment.
109         */
110        public static boolean isErrorXML(String xml){
111
112                if ( xml.contains("error=\""))
113                        return true;
114
115                return false;
116
117
118        }
119
120        /** Takes an XML representation of the alignment and flips the positions of name1 and name2
121         *
122         * @param xml String representing the alignment
123         * @return XML representation of the flipped alignment
124         */
125        public static String flipAlignment(String xml) throws IOException,StructureException{
126                AFPChain[] afps = parseMultiXML( xml);
127                if ( afps.length < 1 )
128                        return null;
129
130                if ( afps.length == 1) {
131                        AFPChain newChain = AFPChainFlipper.flipChain(afps[0]);
132                        if ( newChain.getAlgorithmName() == null) {
133                                newChain.setAlgorithmName(DEFAULT_ALGORITHM_NAME);
134                        }
135                        return AFPChainXMLConverter.toXML(newChain);
136                }
137                throw new StructureException("not Implemented yet!");
138        }
139
140
141        /**  replace the PDB res nums with atom positions:
142         *
143         * @param afpChain
144         * @param ca1
145         * @param ca2
146         */
147        public static void rebuildAFPChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2){
148
149                if ( afpChain.getAlgorithmName() == null) {
150                        afpChain.setAlgorithmName(DEFAULT_ALGORITHM_NAME);
151                }
152                if ( afpChain.getVersion() == null){
153                        afpChain.setVersion("1.0");
154                }
155
156                int blockNum  = afpChain.getBlockNum();
157                int ca1Length = afpChain.getCa1Length();
158                int ca2Length = afpChain.getCa2Length();
159
160                int minLength = Math.min(ca1Length, ca2Length);
161                int[][][] optAln = new int[blockNum][2][minLength];
162
163                int[][][] blockResList = afpChain.getBlockResList();
164                if ( blockResList == null){
165                        blockResList = new int[blockNum][2][minLength];
166                }
167                int[] optLen = afpChain.getOptLen();
168
169                String[][][] pdbAln = afpChain.getPdbAln();
170                int[] verifiedOptLen = null;
171                if ( optLen != null)
172                  verifiedOptLen = afpChain.getOptLen().clone();
173                else {
174                        logger.warn("did not find optimal alignment, building up empty alignment.");
175                        optLen = new int[1];
176                        optLen[0] = 0;
177                }
178                for (int blockNr = 0 ; blockNr < blockNum ; blockNr++){
179
180                        //System.out.println("got block " + blockNr + " size: " + optLen[blockNr]);
181                        int verifiedEQR = -1;
182                        for ( int eqrNr = 0 ; eqrNr < optLen[blockNr] ; eqrNr++ ){
183                                String pdbResnum1 = pdbAln[blockNr][0][eqrNr];
184                                String pdbResnum2 = pdbAln[blockNr][1][eqrNr];
185
186                                //System.out.println(blockNr + " " + eqrNr + " got resnum: " + pdbResnum1 + " " + pdbResnum2);
187                                String[] spl1 = pdbResnum1.split(":");
188                                String[] spl2 = pdbResnum2.split(":");
189
190                                String chain1 = spl1[0];
191                                String pdbres1 = spl1[1];
192
193                                String chain2 = spl2[0];
194                                String pdbres2 = spl2[1];
195
196                                int pos1 = getPositionForPDBresunm(pdbres1,chain1,ca1);
197                                int pos2 = getPositionForPDBresunm(pdbres2,chain2,ca2);
198
199                                if ( pos1 == -1 || pos2 == -1 ){
200                                        // this can happen when parsing old files that contained Calcium atoms...
201                                        logger.warn("pos1: {} (residue {}), pos2: {} (residue {}), should never be -1. Probably parsing an old file.",
202                                                        pos1, pdbResnum1, pos2, pdbResnum2);
203                                        verifiedOptLen[blockNr]-- ;
204                                        continue;
205                                }
206
207                                verifiedEQR++;
208                                //System.out.println(blockNr + " " + eqrNr + " " + pos1 + " " + pos2);
209                                optAln[blockNr][0][verifiedEQR] = pos1;
210                                optAln[blockNr][1][verifiedEQR] = pos2;
211                                blockResList[blockNr][0][verifiedEQR] = pos1;
212                                blockResList[blockNr][1][verifiedEQR] = pos2;
213                        }
214                }
215
216                afpChain.setOptLen(verifiedOptLen);
217                afpChain.setOptAln(optAln);
218                afpChain.setBlockResList(blockResList);
219                // build up alignment image:
220                AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2);
221
222
223        }
224
225        public static AFPChain[] parseMultiXML(String xml) throws IOException {
226                List<AFPChain> afpChains = new ArrayList<>();
227
228                try
229                {
230                        //Convert string to XML document
231                        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
232                        DocumentBuilder db = factory.newDocumentBuilder();
233                        InputSource inStream = new InputSource();
234                        inStream.setCharacterStream(new StringReader(xml));
235                        Document doc = db.parse(inStream);
236
237                        // normalize text representation
238                        doc.getDocumentElement().normalize();
239
240
241                        //Element rootElement = doc.getDocumentElement();
242
243                        NodeList listOfAFPChains = doc.getElementsByTagName("AFPChain");
244                        //int numArrays = listOfArrays.getLength();
245                        // go over the blocks
246                        for(int afpPos=0; afpPos<listOfAFPChains.getLength() ; afpPos++)
247                        {
248
249                                AFPChain a = new AFPChain(DEFAULT_ALGORITHM_NAME);
250                                a.setVersion("1.0");
251                                Node rootElement       = listOfAFPChains.item(afpPos);
252
253                                a.setName1(getAttribute(rootElement,"name1"));
254                                a.setName2(getAttribute(rootElement,"name2"));
255                                String algoname = getAttribute(rootElement,"method");
256                                if ( algoname != null) {
257                                        a.setAlgorithmName(algoname);
258                                }
259                                String version = getAttribute(rootElement,"version");
260                                if ( version != null)
261                                        a.setVersion(version);
262
263                                a.setAlnLength( Integer.parseInt(getAttribute(rootElement,"alnLength")));
264                                a.setBlockNum(          Integer.parseInt(getAttribute(rootElement,"blockNum")));
265                                a.setGapLen(            Integer.parseInt(getAttribute(rootElement,"gapLen")));
266                                a.setOptLength( Integer.parseInt(getAttribute(rootElement,"optLength")));
267                                a.setTotalLenIni(       Integer.parseInt(getAttribute(rootElement,"totalLenIni")));
268                                a.setBlockNum(          Integer.parseInt(getAttribute(rootElement,"blockNum")));
269
270                                if ( a.getAlgorithmName().equals(CeCPMain.algorithmName)){
271                                                 a.setSequentialAlignment(a.getBlockNum() == 1);
272                                         }
273
274                                a.setAlignScore(Double.parseDouble(getAttribute(rootElement,"alignScore")));
275                                a.setChainRmsd(Double.parseDouble(getAttribute(rootElement,"chainRmsd")));
276                                Double identity = Double.parseDouble(getAttribute(rootElement,"identity"));
277                                a.setIdentity(identity);
278
279                                a.setNormAlignScore(Double.parseDouble(getAttribute(rootElement,"normAlignScore")));
280                                a.setProbability(Double.parseDouble(getAttribute(rootElement,"probability")));
281                                a.setSimilarity(Double.parseDouble(getAttribute(rootElement,"similarity")));
282                                a.setTotalRmsdIni(Double.parseDouble(getAttribute(rootElement,"totalRmsdIni")));
283                                a.setTotalRmsdOpt(Double.parseDouble(getAttribute(rootElement,"totalRmsdOpt")));
284                                a.setAlignScoreUpdate(Double.parseDouble(getAttribute(rootElement,"alignScoreUpdate")));
285                                int ca1Length = Integer.parseInt(getAttribute(rootElement,"ca1Length"));
286                                a.setCa1Length(ca1Length);
287                                int ca2Length = Integer.parseInt(getAttribute(rootElement,"ca2Length"));
288                                a.setCa2Length(ca2Length);
289
290                                String tmScoreS = getAttribute(rootElement,"tmScore");
291                                if ( tmScoreS != null) {
292                                        Double tmScore = null;
293                                        try {
294                                         tmScore = Double.parseDouble(tmScoreS);
295                                        } catch (Exception e){
296                                        }
297                                        a.setTMScore(tmScore);
298                                }
299
300                                String calcTimeS = getAttribute(rootElement,"time");
301                                Long calcTime = -1L;
302                                if ( calcTimeS != null){
303
304                                        try {
305                                                calcTime = Long.parseLong(calcTimeS);
306
307                                        } catch (Exception e){
308                                                e.printStackTrace();
309                                        }
310                                }
311                                a.setCalculationTime(calcTime);
312
313                                Matrix[] ms = new Matrix[a.getBlockNum()];
314                                a.setBlockRotationMatrix(ms);
315                                Atom[] blockShiftVector = new Atom[a.getBlockNum()];
316                                a.setBlockShiftVector(blockShiftVector);
317
318                                int afpNum = Integer.parseInt(getAttribute(rootElement,"afpNum"));
319                                List<AFP> afpSet = new ArrayList<>();
320                                for (int afp=0;afp<afpNum;afp++){
321                                        afpSet.add( new AFP());
322                                }
323
324                                a.setAfpSet(afpSet);
325
326                                int minLength = Math.min(ca1Length, ca2Length);
327                                a.setFocusRes1(new int[minLength]);
328                                a.setFocusRes2(new int[minLength]);
329
330
331                                //NodeList listOfBlocks = doc.getElementsByTagName("block");
332                                NodeList listOfBlocks = rootElement.getChildNodes();
333
334                                //int numArrays = listOfArrays.getLength();
335
336                                // go over the blocks
337                                for(int i=0; i<listOfBlocks.getLength() ; i++)
338                                {
339                                        Node block       = listOfBlocks.item(i);
340
341                                        // we only look at blocks.
342                                        if (! "block".equals(block.getNodeName()))
343                                                continue;
344
345                                        processBlock(block, a, minLength);
346
347
348                                }
349
350                                afpChains.add(a);
351                        }
352                }
353
354                // TODO these 2 exceptions should be thrown forward, it's not a good idea to catch them so early
355                catch (SAXException e)
356                {
357                        Exception x = e.getException ();
358                        ((x == null) ? e : x).printStackTrace ();
359                }
360                catch (ParserConfigurationException e) {
361                        e.printStackTrace();
362                }
363
364                return afpChains.toArray(new AFPChain[afpChains.size()]);
365        }
366
367
368        private static  void processBlock(Node block, AFPChain a, int minLength){
369                NodeList valList = block.getChildNodes();
370                int numChildren  = valList.getLength();
371
372                NamedNodeMap map = block.getAttributes();
373
374                int blockNum = a.getBlockNum();
375
376                int[]     optLen                        = a.getOptLen();
377                if ( optLen == null )
378                        optLen = new int[blockNum];
379
380                String[][][] pdbAln = a.getPdbAln();
381                if ( pdbAln == null)
382                        pdbAln         = new String[blockNum][2][minLength];
383
384                //int[][][] optAln                      = new int[blockNum][2][minLength];
385                int[]     blockGap = a.getBlockGap();
386                if ( blockGap == null )
387                        blockGap = new int[blockNum];
388                int[]     blockSize= a.getBlockSize();
389                if ( blockSize == null)
390                        blockSize = new int[blockNum];
391
392                double[]  blockScore = a.getBlockScore();
393                if ( blockScore == null)
394                        blockScore = new double[blockNum];
395                double[]  blockRmsd = a.getBlockRmsd();
396                if (blockRmsd == null )
397                        blockRmsd = new double[blockNum];
398                Matrix[] ms     = a.getBlockRotationMatrix();
399                Atom[] shifts = a.getBlockShiftVector();
400
401                int blockNr = Integer.parseInt( map.getNamedItem("blockNr").getTextContent());
402
403                int thisBlockGap = Integer.parseInt(map.getNamedItem("blockGap").getTextContent());
404                blockGap[blockNr] = thisBlockGap;
405
406                int thisBlockSize = Integer.parseInt(map.getNamedItem("blockSize").getTextContent());
407                blockSize[blockNr] = thisBlockSize;
408
409                double thisBlockScore = Double.parseDouble(map.getNamedItem("blockScore").getTextContent());
410                blockScore[blockNr] = thisBlockScore;
411
412                double thisBlockRmsd = Double.parseDouble(map.getNamedItem("blockRmsd").getTextContent());
413                blockRmsd[blockNr] = thisBlockRmsd;
414
415
416                // parse out the equivalent positions from the file
417                int nrEqr = 0;
418                for ( int e =0; e< numChildren ; e++){
419                        Node  eqr = valList.item(e);
420
421                        if(!eqr.hasAttributes()) continue;
422
423
424                        if ( "eqr".equals(eqr.getNodeName())) {
425                                nrEqr++;
426                                NamedNodeMap atts = eqr.getAttributes();
427
428                                int eqrNr = Integer.parseInt(atts.getNamedItem("eqrNr").getTextContent());
429
430                                String pdbres1 = atts.getNamedItem("pdbres1").getTextContent();
431                                String chain1 = atts.getNamedItem("chain1").getTextContent();
432                                String pdbres2 = atts.getNamedItem("pdbres2").getTextContent();
433                                String chain2 = atts.getNamedItem("chain2").getTextContent();
434
435                                //System.out.println(blockNr + " " + eqrNr + " " + chain1+" " + pdbres1 + ":" + chain2 + " " + pdbres2);
436
437                                pdbAln[blockNr][0][eqrNr] = chain1+":"+pdbres1;
438                                pdbAln[blockNr][1][eqrNr] = chain2+":"+pdbres2;
439
440                                //  A WORK AROUND FOR THE PROBLEM THAT WE DON:T HAVE PDBs LOADED AT THIS TIME...
441
442                                /* int pos1 = getPositionForPDBresunm(pdbres1,chain1,ca1);
443                                int pos2 = getPositionForPDBresunm(pdbres2,chain2,ca2);
444                                //System.out.println("settion optAln " + blockNr + " " + eqrNr + " " + pos1);
445                                optAln[blockNr][0][eqrNr] = pos1;
446                                optAln[blockNr][1][eqrNr] = pos2;
447                                 */
448                        } else if ( "matrix".equals(eqr.getNodeName())){
449                                // process Matrix
450                                Matrix m = new Matrix(3,3);
451
452                                for (int i =1 ; i <= 3 ; i++){
453                                        for (int j =1 ; j <= 3 ; j++){
454                                                String att = getAttribute(eqr, "mat" +i + j);
455                                                double val = Double.parseDouble(att);
456                                                m.set(i-1,j-1,val);
457
458                                        }
459                                }
460                                ms[blockNr] = m;
461
462                        } else if ( "shift".equals(eqr.getNodeName())){
463                                Atom shift = new AtomImpl();
464                                double x = Double.parseDouble(getAttribute(eqr, "x"));
465                                double y = Double.parseDouble(getAttribute(eqr, "y"));
466                                double z = Double.parseDouble(getAttribute(eqr, "z"));
467                                shift.setX(x);
468                                shift.setY(y);
469                                shift.setZ(z);
470                                shifts[blockNr] = shift;
471
472                        }
473
474                }
475                //System.out.println("setting block " + blockNr + " eqr: " + nrEqr);
476                optLen[blockNr] = nrEqr;
477
478
479
480                a.setOptLen(optLen);
481                //a.setOptAln(optAln);
482                a.setPdbAln(pdbAln);
483                a.setBlockGap(blockGap);
484                a.setBlockSize(blockSize);
485
486                a.setBlockScore(blockScore);
487                a.setBlockRmsd(blockRmsd);
488
489
490
491        }
492
493        private static String getAttribute(Node node, String attr){
494                if( ! node.hasAttributes())
495                        return null;
496
497                NamedNodeMap atts = node.getAttributes();
498
499                if ( atts == null)
500                        return null;
501
502                Node att = atts.getNamedItem(attr);
503                if ( att == null)
504                        return null;
505
506                String value = att.getTextContent();
507
508                return value;
509
510        }
511
512        /** get the position of PDB residue nr X in the ato marray
513         *
514         * @param pdbresnum pdbresidue number
515         * @param authId chain name
516         * @param atoms atom array
517         * @return
518         */
519        private static int getPositionForPDBresunm(String pdbresnum, String authId , Atom[] atoms){
520                ResidueNumber residueNumber =  ResidueNumber.fromString(pdbresnum);
521                residueNumber.setChainName(authId);
522
523                boolean blankChain = authId == null || "null".equalsIgnoreCase(authId) || "_".equals(authId);
524
525                for ( int i =0; i< atoms.length ;i++){
526                        Group g = atoms[i].getGroup();
527
528                        // match _ to any chain
529                        if( blankChain ) {
530                                residueNumber.setChainName(g.getChain().getName());
531                        }
532
533                        //System.out.println(g.getResidueNumber() + "< ? >" + residueNumber +"<");
534                        if ( g.getResidueNumber().equals(residueNumber)){
535                                //System.out.println(g + " == " + residueNumber );
536                                Chain c = g.getChain();
537                                if ( blankChain || c.getName().equals(authId)){
538                                        return i;
539                                }
540                        }
541                }
542                return -1;
543        }
544
545
546
547}
548