001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021package org.biojava.nbio.phylo; 022 023/** 024 * This class provides static methods for the calculation of the percentage of 025 * identity between two aligned sequences. 026 * <p> 027 * Since 4.1.1 the methods for distance inference in forester are also used in 028 * BioJava, so this implementation of percentage of identity is not needed 029 * anymore. However, the code is maintained as the own BioJava implementation. 030 * 031 * @author Scooter Willis 032 * 033 */ 034public class Comparison { 035 036 private static final int caseShift = 'a' - 'A'; 037 038 /** 039 * this is a gapped PID calculation 040 * 041 * @param s1 042 * SequenceI 043 * @param s2 044 * SequenceI 045 * @return float 046 */ 047 public final static float PID(String seq1, String seq2) { 048 return PID(seq1, seq2, 0, seq1.length()); 049 } 050 051 // Another pid with region specification 052 public final static float PID(String seq1, String seq2, int start, int end) { 053 054 int s1len = seq1.length(); 055 int s2len = seq2.length(); 056 057 int len = Math.min(s1len, s2len); 058 059 if (end < len) { 060 len = end; 061 } 062 063 if (len < start) { 064 start = len - 1; // we just use a single residue for the difference 065 } 066 067 int bad = 0; 068 char chr1; 069 char chr2; 070 071 for (int i = start; i < len; i++) { 072 073 chr1 = seq1.charAt(i); 074 chr2 = seq2.charAt(i); 075 076 if ('a' <= chr1 && chr1 <= 'z') { 077 // TO UPPERCASE !!! 078 // Faster than toUpperCase 079 chr1 -= caseShift; 080 } 081 if ('a' <= chr2 && chr2 <= 'z') { 082 // TO UPPERCASE !!! 083 // Faster than toUpperCase 084 chr2 -= caseShift; 085 } 086 087 if (chr1 != chr2 && !isGap(chr1) && !isGap(chr2)) { 088 bad++; 089 } 090 } 091 092 return ((float) 100 * (len - bad)) / len; 093 } 094 095 /** 096 * Method that determines if a character means a gap in the alignment. 097 * 098 * @param c 099 * gap character is one of the symbols in {' ','-','.'} 100 * 101 * @return true if it is a gap, false otherwise 102 */ 103 public static final boolean isGap(char c) { 104 return (c == '-' || c == '.' || c == ' ') ? true : false; 105 } 106 107}