001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 */ 021 022package org.biojava.bio.seq.db.emblcd; 023 024import java.io.File; 025import java.io.FileNotFoundException; 026import java.io.IOException; 027import java.io.RandomAccessFile; 028 029/** 030 * <p><code>EmblCDROMRandomAccess</code> is an abstract class whose 031 * concrete subclasses can perform fast lookups in EMBL CD-ROM format 032 * index files. As the format of the records varies between file 033 * types, subclasses should implement two methods; 034 * <code>readRecord()</code>, which should parse the record into an 035 * array of objects and <code>getRecordKey()</code> which should 036 * retrieve the the field from the parsed record on which the records 037 * were sorted in the index. This is used during the binary search in 038 * the <code>findRecord()</code> method.</p> 039 * 040 * <p>Implementing <code>readRecord()</code> is easy because it simply 041 * means delegating to the supplied <code>RecordParser</code> and 042 * calling the appropriate method on it.</p> 043 * 044 * @author Keith James 045 * @since 1.2 046 */ 047public abstract class EmblCDROMRandomAccess 048{ 049 private File indexFile; 050 protected RandomAccessFile raIndexFile; 051 052 private int headerLength; 053 private int recordLength; 054 private long recordCount; 055 056 /** 057 * A <code>recParser</code> for implementing 058 * <code>readRecord()</code> specific to each concrete subclass. 059 */ 060 protected RecordParser recParser; 061 062 protected byte [] recBytes; 063 064 /** 065 * Creates a new <code>EmblCDROMRandomAccess</code> object. 066 * 067 * @param indexFile a <code>File</code> to wrap. 068 * @param headerLength an <code>int</code> (normally 300 bytes). 069 * @param recordLength an <code>int</code> indicating the length 070 * of a single record. 071 * @param recordCount an <code>long</code> indicating the total 072 * number of records. 073 * 074 * @exception FileNotFoundException if indexFile cannot be found. 075 */ 076 public EmblCDROMRandomAccess(File indexFile, 077 int headerLength, 078 int recordLength, 079 long recordCount) 080 throws FileNotFoundException 081 { 082 this.indexFile = indexFile; 083 raIndexFile = new RandomAccessFile(indexFile, "r"); 084 085 this.headerLength = headerLength; 086 this.recordLength = recordLength; 087 this.recordCount = recordCount; 088 089 recBytes = new byte [recordLength]; 090 recParser = new RecordParser(); 091 } 092 093 /** 094 * <code>getFile</code> returns the <code>File</code> wrapped. 095 * 096 * @return a <code>File</code>. 097 */ 098 public File getFile() 099 { 100 return indexFile; 101 } 102 103 /** 104 * <code>findRecord</code> performs a binary search within the 105 * file for a record specified by an identifier String. 106 * 107 * @param identifier a <code>String</code> identifier (sequence ID 108 * or accession number). 109 * 110 * @return an <code>Object []</code> array containing the 111 * record. If there is no such record an empty array is returned. 112 * 113 * @exception IOException if an error occurs. 114 */ 115 public Object [] findRecord(String identifier) 116 throws IOException 117 { 118 long startRecord = 0; 119 long endRecord = recordCount - 1; 120 121 while (startRecord <= endRecord) 122 { 123 long midPoint = (startRecord + endRecord) / 2; 124 raIndexFile.seek(headerLength + (midPoint * recordLength)); 125 126 Object [] record = readRecord(); 127 String recordKey = getRecordKey(record).trim(); 128 129 if (recordKey.equals(identifier)) 130 return record; 131 else if (recordKey.compareTo(identifier) < 0) 132 startRecord = midPoint + 1; 133 else 134 endRecord = midPoint - 1; 135 } 136 137 // No such record 138 return new Object [0]; 139 } 140 141 /** 142 * <code>close</code> closes the underlying 143 * <code>RandomAccessFile</code>. 144 * 145 * @exception IOException if an error occurs. 146 */ 147 public void close() throws IOException 148 { 149 raIndexFile.close(); 150 } 151 152 /** 153 * <code>readRecord</code> returns an array of objects parsed from 154 * a single record. Its content will depend on the type of index 155 * file. Concrete subclasses must provide an implementation of 156 * this method. 157 * 158 * @return an <code>Object []</code> array. 159 * 160 * @exception IOException if an error occurs. 161 */ 162 protected abstract Object [] readRecord() throws IOException; 163 164 /** 165 * <code>getRecordKey</code> returns the field from the record on 166 * which the records were sorted in the index. (i.e. sequence ID 167 * or accession number). 168 * 169 * @return a <code>String</code>. 170 */ 171 protected abstract String getRecordKey(Object [] record); 172}