/** * Distribution License: * JSword is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License, version 2.1 as published by * the Free Software Foundation. This program is distributed in the hope * that it will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * The License is available on the internet at: * http://www.gnu.org/copyleft/lgpl.html * or by writing to: * Free Software Foundation, Inc. * 59 Temple Place - Suite 330 * Boston, MA 02111-1307, USA * * Copyright: 2005 * The copyright to this program is held by it's authors. * * ID: $Id: ZVerseBackend.java 2099 2011-03-07 17:13:00Z dmsmith $ */ package org.crosswire.jsword.book.sword; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; import java.net.URI; import org.crosswire.common.activate.Activator; import org.crosswire.common.activate.Lock; import org.crosswire.common.compress.CompressorType; import org.crosswire.common.util.FileUtil; import org.crosswire.common.util.Logger; import org.crosswire.common.util.NetUtil; import org.crosswire.jsword.JSMsg; import org.crosswire.jsword.book.BookCategory; import org.crosswire.jsword.book.BookException; import org.crosswire.jsword.book.DataPolice; import org.crosswire.jsword.passage.Key; import org.crosswire.jsword.passage.KeyUtil; import org.crosswire.jsword.passage.Verse; /** * A backend to read compressed data verse based files. While the text file * contains data compressed with ZIP or LZSS, it cannot be uncompressed using a * stand alone zip utility, such as WinZip or gzip. The reason for this is that * the data file is a concatenation of blocks of compressed data. * * <p> * The blocks can either be "b", book (aka testament); "c", chapter or "v", * verse. The choice is a matter of trade offs. The program needs to uncompress * a block into memory. Having it at the book level is very memory expensive. * Having it at the verse level is very disk expensive, but takes the least * amount of memory. The most common is chapter. * </p> * * <p> * In order to find the data in the text file, we need to find the block. The * first index (comp) is used for this. Each verse is indexed to a tuple (block * number, verse start, verse size). This data allows us to find the correct * block, and to extract the verse from the uncompressed block, but it does not * help us uncompress the block. * </p> * * <p> * Once the block is known, then the next index (idx) gives the location of the * compressed block, its compressed size and its uncompressed size. * </p> * * <p> * There are 3 files for each testament, 2 (comp and idx) are indexes into the * third (text) which contains the data. The key into each index is the verse * index within that testament, which is determined by book, chapter and verse * of that key. * </p> * * <p> * All numbers are stored 2-complement, little endian. * </p> * <p> * Then proceed as follows, at all times working on the set of files for the * testament in question: * </p> * * <pre> * in the comp file, seek to the index * 10 * read 10 bytes. * the block-index is the first 4 bytes (32-bit number) * the next bytes are the verse offset and length of the uncompressed block. * in the idx file seek to block-index * 12 * read 12 bytes * the text-block-index is the first 4 bytes * the data-size is the next 4 bytes * the uncompressed-size is the next 4 bytes * in the text file seek to the text-block-index * read data-size bytes * decipher them if they are encrypted * unGZIP them into a byte array of uncompressed-size * </pre> * * TODO(DM): Testament 0 is used to index an README file for the bible. At this * time it is ignored. * * @see gnu.lgpl.License for license details.<br> * The copyright to this program is held by it's authors. * @author Joe Walker [joe at eireneh dot com] */ public class ZVerseBackend extends AbstractBackend { private static final String SUFFIX_COMP = "v"; private static final String SUFFIX_INDEX = "s"; private static final String SUFFIX_PART1 = "z"; private static final String SUFFIX_TEXT = "z"; /** * Simple ctor */ public ZVerseBackend(SwordBookMetaData sbmd, BlockType blockType) { super(sbmd); this.blockType = blockType; // Some commentaries like MHC show the same content for several adjacent verses, but merged verses should not be duplicated this.isPreventDuplicateVerseContent = BookCategory.BIBLE.equals(sbmd.getBookCategory()); } /* * (non-Javadoc) * * @see * org.crosswire.common.activate.Activatable#activate(org.crosswire.common * .activate.Lock) */ public final void activate(Lock lock) { try { if (idxFile[SwordConstants.TESTAMENT_OLD] == null) { URI path = getExpandedDataPath(); String otAllButLast = NetUtil.lengthenURI(path, File.separator + SwordConstants.FILE_OT + '.' + blockType.getIndicator() + SUFFIX_PART1) .getPath(); idxFile[SwordConstants.TESTAMENT_OLD] = new File(otAllButLast + SUFFIX_INDEX); textFile[SwordConstants.TESTAMENT_OLD] = new File(otAllButLast + SUFFIX_TEXT); compFile[SwordConstants.TESTAMENT_OLD] = new File(otAllButLast + SUFFIX_COMP); String ntAllButLast = NetUtil.lengthenURI(path, File.separator + SwordConstants.FILE_NT + '.' + blockType.getIndicator() + SUFFIX_PART1) .getPath(); idxFile[SwordConstants.TESTAMENT_NEW] = new File(ntAllButLast + SUFFIX_INDEX); textFile[SwordConstants.TESTAMENT_NEW] = new File(ntAllButLast + SUFFIX_TEXT); compFile[SwordConstants.TESTAMENT_NEW] = new File(ntAllButLast + SUFFIX_COMP); } } catch (BookException e) { idxFile[SwordConstants.TESTAMENT_OLD] = null; textFile[SwordConstants.TESTAMENT_OLD] = null; compFile[SwordConstants.TESTAMENT_OLD] = null; idxFile[SwordConstants.TESTAMENT_NEW] = null; textFile[SwordConstants.TESTAMENT_NEW] = null; compFile[SwordConstants.TESTAMENT_NEW] = null; return; } if (idxFile[SwordConstants.TESTAMENT_OLD].canRead()) { try { idxRaf[SwordConstants.TESTAMENT_OLD] = new RandomAccessFile(idxFile[SwordConstants.TESTAMENT_OLD], FileUtil.MODE_READ); textRaf[SwordConstants.TESTAMENT_OLD] = new RandomAccessFile(textFile[SwordConstants.TESTAMENT_OLD], FileUtil.MODE_READ); compRaf[SwordConstants.TESTAMENT_OLD] = new RandomAccessFile(compFile[SwordConstants.TESTAMENT_OLD], FileUtil.MODE_READ); } catch (FileNotFoundException ex) { assert false : ex; log.error("Could not open OT", ex); idxRaf[SwordConstants.TESTAMENT_OLD] = null; textRaf[SwordConstants.TESTAMENT_OLD] = null; compRaf[SwordConstants.TESTAMENT_OLD] = null; } } if (idxFile[SwordConstants.TESTAMENT_NEW].canRead()) { try { idxRaf[SwordConstants.TESTAMENT_NEW] = new RandomAccessFile(idxFile[SwordConstants.TESTAMENT_NEW], FileUtil.MODE_READ); textRaf[SwordConstants.TESTAMENT_NEW] = new RandomAccessFile(textFile[SwordConstants.TESTAMENT_NEW], FileUtil.MODE_READ); compRaf[SwordConstants.TESTAMENT_NEW] = new RandomAccessFile(compFile[SwordConstants.TESTAMENT_NEW], FileUtil.MODE_READ); } catch (FileNotFoundException ex) { assert false : ex; log.error("Could not open NT", ex); idxRaf[SwordConstants.TESTAMENT_NEW] = null; textRaf[SwordConstants.TESTAMENT_NEW] = null; compRaf[SwordConstants.TESTAMENT_NEW] = null; } } active = true; } /* * (non-Javadoc) * * @see * org.crosswire.common.activate.Activatable#deactivate(org.crosswire.common * .activate.Lock) */ public final void deactivate(Lock lock) { if (idxRaf[SwordConstants.TESTAMENT_NEW] != null) { try { idxRaf[SwordConstants.TESTAMENT_NEW].close(); textRaf[SwordConstants.TESTAMENT_NEW].close(); compRaf[SwordConstants.TESTAMENT_NEW].close(); } catch (IOException ex) { log.error("failed to close nt files", ex); } finally { idxRaf[SwordConstants.TESTAMENT_NEW] = null; textRaf[SwordConstants.TESTAMENT_NEW] = null; compRaf[SwordConstants.TESTAMENT_NEW] = null; } } if (idxRaf[SwordConstants.TESTAMENT_OLD] != null) { try { idxRaf[SwordConstants.TESTAMENT_OLD].close(); textRaf[SwordConstants.TESTAMENT_OLD].close(); compRaf[SwordConstants.TESTAMENT_OLD].close(); } catch (IOException ex) { log.error("failed to close ot files", ex); } finally { idxRaf[SwordConstants.TESTAMENT_OLD] = null; textRaf[SwordConstants.TESTAMENT_OLD] = null; compRaf[SwordConstants.TESTAMENT_OLD] = null; } } active = false; } /* * (non-Javadoc) * * @see * org.crosswire.jsword.passage.Key#contains(org.crosswire.jsword.passage * .Key) */ @Override public boolean contains(Key key) { checkActive(); try { DataPolice.setKey(key); Verse verse = KeyUtil.getVerse(key); try { int testament = SwordConstants.getTestament(verse); long index = SwordConstants.getIndex(verse); // If Bible does not contain the desired testament, then false if (compRaf[testament] == null) { return false; } // 10 because the index is 10 bytes long for each verse byte[] temp = SwordUtil.readRAF(compRaf[testament], index * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE); // If the Bible does not contain the desired verse, return // nothing. // Some Bibles have different versification, so the requested // verse // may not exist. if (temp == null || temp.length == 0) { return false; } // The data is little endian - extract the blockNum, verseStart // and // verseSize int verseSize = SwordUtil.decodeLittleEndian16(temp, 8); return verseSize > 0; } catch (IOException e) { return false; } } finally { DataPolice.setKey(null); } } /* * (non-Javadoc) * * @see * org.crosswire.jsword.book.sword.AbstractBackend#getRawText(org.crosswire * .jsword.passage.Key, java.lang.String) */ @Override public String getRawText(Key key) throws BookException { checkActive(); try { DataPolice.setKey(key); SwordBookMetaData sbmd = getBookMetaData(); String charset = sbmd.getBookCharset(); String compressType = (String) sbmd.getProperty(ConfigEntryType.COMPRESS_TYPE); Verse verse = KeyUtil.getVerse(key); try { int testament = SwordConstants.getTestament(verse); long index = SwordConstants.getIndex(verse); // If Bible does not contain the desired testament, return // nothing. if (compRaf[testament] == null) { return ""; } // 10 because the index is 10 bytes long for each verse byte[] temp = SwordUtil.readRAF(compRaf[testament], index * COMP_ENTRY_SIZE, COMP_ENTRY_SIZE); // If the Bible does not contain the desired verse, return // nothing. // Some Bibles have different versification, so the requested // verse // may not exist. if (temp == null || temp.length == 0) { return ""; } // The data is little endian - extract the blockNum, verseStart // and // verseSize long blockNum = SwordUtil.decodeLittleEndian32(temp, 0); int verseStart = SwordUtil.decodeLittleEndian32(temp, 4); int verseSize = SwordUtil.decodeLittleEndian16(temp, 8); //MJD start // do not return duplicate text for merged verses if (isPreventDuplicateVerseContent && index==lastIndex+1 && blockNum==lastBlockNum && verseStart==lastVerseStart && verseSize==lastVerseSize) { lastIndex = index; return ""; } lastIndex = index; lastVerseStart = verseStart; lastVerseSize = verseSize; //MJD end // Can we get the data from the cache byte[] uncompressed = null; if (blockNum == lastBlockNum && testament == lastTestament) { uncompressed = lastUncompressed; } else { // Then seek using this index into the idx file temp = SwordUtil.readRAF(idxRaf[testament], blockNum * IDX_ENTRY_SIZE, IDX_ENTRY_SIZE); if (temp == null || temp.length == 0) { return ""; } int blockStart = SwordUtil.decodeLittleEndian32(temp, 0); int blockSize = SwordUtil.decodeLittleEndian32(temp, 4); int uncompressedSize = SwordUtil.decodeLittleEndian32(temp, 8); // Read from the data file. byte[] data = SwordUtil.readRAF(textRaf[testament], blockStart, blockSize); decipher(data); uncompressed = CompressorType.fromString(compressType).getCompressor(data).uncompress(uncompressedSize).toByteArray(); // cache the uncompressed data for next time lastBlockNum = blockNum; lastTestament = testament; lastUncompressed = uncompressed; } // and cut out the required section. byte[] chopped = new byte[verseSize]; System.arraycopy(uncompressed, verseStart, chopped, 0, verseSize); return SwordUtil.decode(key.getName(), chopped, charset); } catch (IOException e) { // TRANSLATOR: Common error condition: The file could not be read. There can be many reasons. // {0} is a placeholder for the file. throw new BookException(JSMsg.gettext("Error reading {0}", verse.getName()), e); } } finally { DataPolice.setKey(key); } } /* (non-Javadoc) * @see org.crosswire.jsword.book.sword.AbstractBackend#setAliasKey(org.crosswire.jsword.passage.Key, org.crosswire.jsword.passage.Key) */ @Override public void setAliasKey(Key alias, Key source) throws IOException { throw new UnsupportedOperationException(); } /* (non-Javadoc) * @see org.crosswire.jsword.book.sword.AbstractBackend#setRawText(org.crosswire.jsword.passage.Key, java.lang.String) */ @Override public void setRawText(Key key, String text) throws BookException, IOException { throw new UnsupportedOperationException(); } /** * Helper method so we can quickly activate ourselves on access */ protected final void checkActive() { if (!active) { Activator.activate(this); } } /** * Whether the book is blocked by Book, Chapter or Verse. */ private BlockType blockType; /** * */ private int lastTestament = -1; private boolean isPreventDuplicateVerseContent; private long lastIndex = -1; private int lastVerseStart = -1; private int lastVerseSize = -1; /** * */ private long lastBlockNum = -1; /** * */ private byte[] lastUncompressed; /** * Are we active */ private boolean active; /** * The log stream */ private static final Logger log = Logger.getLogger(ZVerseBackend.class); /** * The array of index random access files */ private RandomAccessFile[] idxRaf = new RandomAccessFile[3]; /** * The array of data random access files */ private RandomAccessFile[] textRaf = new RandomAccessFile[3]; /** * The array of compressed random access files */ private RandomAccessFile[] compRaf = new RandomAccessFile[3]; /** * The array of index random access files */ private File[] idxFile = new File[3]; /** * The array of data random access files */ private File[] textFile = new File[3]; /** * The array of compressed random access files */ private File[] compFile = new File[3]; /** * How many bytes in the comp index? */ private static final int COMP_ENTRY_SIZE = 10; /** * How many bytes in the idx index? */ private static final int IDX_ENTRY_SIZE = 12; }