001/* @(#)FastaSequence.java 1.0 September 2009 002 * 003 * Copyright (c) 2009 Peter Troshin 004 * 005 * BioJava development code 006 * 007 * This code may be freely distributed and modified under the 008 * terms of the GNU Lesser General Public Licence. This should 009 * be distributed with the code. If you do not have a copy, 010 * see: 011 * 012 * http://www.gnu.org/copyleft/lesser.html 013 * 014 * Copyright for this code is held jointly by the individual 015 * authors. These should be listed in @author doc comments. 016 * 017 * For more information on the BioJava project and its aims, 018 * or to join the biojava-l mailing list, visit the home page 019 * at: 020 * 021 * http://www.biojava.org/ 022 * 023 */ 024 025package org.biojava.nbio.data.sequence; 026 027import javax.xml.bind.annotation.XmlAccessType; 028import javax.xml.bind.annotation.XmlAccessorType; 029import java.util.regex.Matcher; 030import java.util.regex.Pattern; 031 032/** 033 * A FASTA formatted sequence. Please note that this class does not make any 034 * assumptions as to what sequence it store e.g. it could be nucleotide, protein 035 * or even gapped alignment sequence! The only guarantee it makes is that the 036 * sequence does not contain white space characters e.g. spaces, new lines etc 037 * 038 * @author pvtroshin 039 * @version 1.0 040 * @since 3.0.2 041 */ 042 043@XmlAccessorType(XmlAccessType.FIELD) 044public final class FastaSequence implements Comparable<FastaSequence>{ 045 046 /** 047 * Sequence id 048 */ 049 private String id; 050 051 // TODO what about gapped sequence here! should be indicated 052 /** 053 * Returns the string representation of sequence 054 */ 055 private String sequence; 056 057 @SuppressWarnings("unused") 058 private FastaSequence() { 059 // Default constructor for JaxB 060 } 061 062 /** 063 * Upon construction the any whitespace characters are removed from the 064 * sequence 065 * 066 * @param id 067 * @param sequence 068 */ 069 public FastaSequence(final String id, final String sequence) { 070 this.id = id.trim(); 071 this.sequence = SequenceUtil.cleanSequence(sequence); 072 } 073 074 /** 075 * Gets the value of id 076 * 077 * @return the value of id 078 */ 079 public String getId() { 080 return id; 081 } 082 083 /** 084 * Gets the value of sequence 085 * 086 * @return the value of sequence 087 */ 088 public String getSequence() { 089 return sequence; 090 } 091 092 public static int countMatchesInSequence(final String theString, 093 final String theRegExp) { 094 final Pattern p = Pattern.compile(theRegExp); 095 final Matcher m = p.matcher(theString); 096 int cnt = 0; 097 while (m.find()) { 098 cnt++; 099 } 100 return cnt; 101 } 102 103 public String getFormattedFasta() { 104 return getFormatedSequence(80); 105 } 106 107 /** 108 * 109 * @return one line name, next line sequence, no matter what the sequence 110 * length is 111 */ 112 public String getOnelineFasta() { 113 String fasta = ">" + getId() + "\n"; 114 fasta += getSequence() + "\n"; 115 return fasta; 116 } 117 118 /** 119 * Format sequence per width letter in one string. Without spaces. 120 * 121 * @return multiple line formated sequence, one line width letters length 122 * 123 */ 124 public String getFormatedSequence(final int width) { 125 if (sequence == null) { 126 return ""; 127 } 128 129 assert width >= 0 : "Wrong width parameter "; 130 131 final StringBuilder sb = new StringBuilder(sequence); 132 int nchunks = sequence.length() / width; 133 // add up inserted new line chars 134 nchunks = (nchunks + sequence.length()) / width; 135 int nlineCharcounter = 0; 136 for (int i = 1; i <= nchunks; i++) { 137 final int insPos = width * i + nlineCharcounter; 138 // to prevent inserting new line in the very end of a sequence then 139 // it would have failed. 140 // Also covers the case when the sequences shorter than width 141 if (sb.length() <= insPos) { 142 break; 143 } 144 sb.insert(insPos, "\n"); 145 nlineCharcounter++; 146 } 147 return sb.toString(); 148 } 149 150 /** 151 * 152 * @return sequence length 153 */ 154 public int getLength() { 155 return sequence.length(); 156 } 157 158 /** 159 * Same as oneLineFasta 160 */ 161 @Override 162 public String toString() { 163 return this.getOnelineFasta(); 164 } 165 166 @Override 167 public int hashCode() { 168 final int prime = 31; 169 int result = 1; 170 result = prime * result + ((id == null) ? 0 : id.hashCode()); 171 result = prime * result 172 + ((sequence == null) ? 0 : sequence.hashCode()); 173 return result; 174 } 175 176 @Override 177 public boolean equals(final Object obj) { 178 if (this == obj) { 179 return true; 180 } 181 if (obj == null) { 182 return false; 183 } 184 if (getClass() != obj.getClass()) { 185 return false; 186 } 187 final FastaSequence other = (FastaSequence) obj; 188 if (id == null) { 189 if (other.id != null) { 190 return false; 191 } 192 } else if (!id.equals(other.id)) { 193 return false; 194 } 195 if (sequence == null) { 196 if (other.sequence != null) { 197 return false; 198 } 199 } else if (!sequence.equals(other.sequence)) { 200 return false; 201 } 202 return true; 203 } 204 205 @Override 206 public int compareTo(FastaSequence o) { 207 if(o==null || o.id==null) 208 return 1; 209 210 return this.getId().compareTo(o.id); 211 } 212 213}