/* * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007. * * Licensed under the Aduna BSD-style license. */ package org.openrdf.sail.nativerdf.datastore; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.zip.CRC32; import info.aduna.io.ByteArrayUtil; /** * Class that provides indexed storage and retrieval of arbitrary length data. * * @author Arjohn Kampman */ public class DataStore { /*-----------* * Variables * *-----------*/ private DataFile dataFile; private IDFile idFile; private HashFile hashFile; /** * The checksum to use for calculating data hashes. */ private CRC32 crc32 = new CRC32(); /*--------------* * Constructors * *--------------*/ public DataStore(File dataDir, String filePrefix) throws IOException { this(dataDir, filePrefix, false); } public DataStore(File dataDir, String filePrefix, boolean forceSync) throws IOException { dataFile = new DataFile(new File(dataDir, filePrefix + ".dat"), forceSync); idFile = new IDFile(new File(dataDir, filePrefix + ".id"), forceSync); hashFile = new HashFile(new File(dataDir, filePrefix + ".hash"), forceSync); } /*---------* * Methods * *---------*/ /** * Gets the value for the specified ID. * * @param id * A value ID, should be larger than 0. * @return The value for the ID, or <tt>null</tt> if no such value could be * found. * @exception IOException * If an I/O error occurred. */ public byte[] getData(int id) throws IOException { assert id > 0 : "id must be larger than 0, is: " + id; // Data not in cache or cache not used, fetch from file long offset = idFile.getOffset(id); if (offset != 0L) { return dataFile.getData(offset); } return null; } /** * Gets the ID for the specified value. * * @param queryData * The value to get the ID for, must not be <tt>null</tt>. * @return The ID for the specified value, or <tt>-1</tt> if no such ID * could be found. * @exception IOException * If an I/O error occurred. */ public int getID(byte[] queryData) throws IOException { assert queryData != null : "queryData must not be null"; int id = -1; // Value not in cache or cache not used, fetch from file int hash = getDataHash(queryData); HashFile.IDIterator iter = hashFile.getIDIterator(hash); while ((id = iter.next()) >= 0) { long offset = idFile.getOffset(id); byte[] data = dataFile.getData(offset); if (Arrays.equals(queryData, data)) { // Matching data found break; } } return id; } /** * Returns the maximum value-ID that is in use. * * @return The largest ID, or <tt>0</tt> if the store does not contain any * values. * @throws IOException * If an I/O error occurs. */ public int getMaxID() throws IOException { return idFile.getMaxID(); } /** * Stores the supplied value and returns the ID that has been assigned to it. * In case the data to store is already present, the ID of this existing data * is returned. * * @param data * The data to store, must not be <tt>null</tt>. * @return The ID that has been assigned to the value. * @exception IOException * If an I/O error occurred. */ public int storeData(byte[] data) throws IOException { assert data != null : "data must not be null"; int id = getID(data); if (id == -1) { // Data not stored yet, store it under a new ID. long offset = dataFile.storeData(data); id = idFile.storeOffset(offset); hashFile.storeID(getDataHash(data), id); } return id; } /** * Synchronizes any recent changes to the data to disk. * * @exception IOException * If an I/O error occurred. */ public void sync() throws IOException { hashFile.sync(); idFile.sync(); dataFile.sync(); } /** * Removes all values from the DataStore. * * @exception IOException * If an I/O error occurred. */ public void clear() throws IOException { hashFile.clear(); idFile.clear(); dataFile.clear(); } /** * Closes the DataStore, releasing any file references, etc. In case a * transaction is currently open, it will be rolled back. Once closed, the * DataStore can no longer be used. * * @exception IOException * If an I/O error occurred. */ public void close() throws IOException { hashFile.close(); idFile.close(); dataFile.close(); } /** * Gets a hash code for the supplied data. * * @param data * The data to calculate the hash code for. * @return A hash code for the supplied data. */ private int getDataHash(byte[] data) { synchronized (crc32) { crc32.update(data); int crc = (int)crc32.getValue(); crc32.reset(); return crc; } } /*--------------------* * Test/debug methods * *--------------------*/ public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: java org.openrdf.sesame.sailimpl.nativerdf.datastore.DataStore <data-dir> <file-prefix>"); return; } System.out.println("Dumping DataStore contents..."); File dataDir = new File(args[0]); DataStore dataStore = new DataStore(dataDir, args[1]); DataFile.DataIterator iter = dataStore.dataFile.iterator(); while (iter.hasNext()) { byte[] data = iter.next(); System.out.println(ByteArrayUtil.toHexString(data)); } } }