/**
* BufferedRandomAccessFile
* Copyright 2015 by Michael Peter Christen
* First released 30.09.2015
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package org.loklak.tools;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import junit.framework.TestCase;
import org.junit.After;
import org.junit.Before;
/**
* This class is inspired by https://code.google.com/p/jmzreader/source/browse/tools/braf/trunk/src/main/java/uk/ac/ebi/pride/tools/braf/BufferedRandomAccessFile.java
* which is in turn an optimized version of the RandomAccessFile class as described by Nick Zhang on JavaWorld.com. The article can be found at http://www.javaworld.com/javaworld/javatips/jw-javatip26.html
* The getNextLine method was rewritten in such a way that it returns a byte[] rather than a string which was not UTF-8 - friendly in it's original version.
* The whole class was not concurrency-safe. Synchronization has been added to ensure consistency of buffer and seek position.
*/
public class BufferedRandomAccessFile extends RandomAccessFile {
private byte buffer[];
private int buf_end = 0;
private int buf_pos = 0;
private long real_pos = 0;
private final int BUF_SIZE;
/**
* Creates a new instance of the BufferedRandomAccessFile.
* @param filename The path of the file to open.
* @param mode Specifies the mode to use ("r", "rw", etc.) See the
* BufferedLineReader documentation for more information.
* @param bufsize The buffer size (in bytes) to use.
* @throws IOException
*/
public BufferedRandomAccessFile(String filename, String mode, int bufsize) throws IOException {
super(filename, mode);
invalidate();
BUF_SIZE = bufsize;
buffer = new byte[BUF_SIZE];
}
public BufferedRandomAccessFile(File file, String mode, int bufsize) throws IOException {
this(file.getAbsolutePath(), mode, bufsize);
}
public BufferedRandomAccessFile(File file, String mode) throws IOException {
this(file.getAbsolutePath(), mode, 1 << 20);
}
/**
* Reads one byte form the current position
* @return The read byte or -1 in case the end was reached.
*/
@Override
public synchronized final int read() throws IOException {
if (buf_pos >= buf_end) {
if (fillBuffer() < 0) return -1;
}
return buf_end == 0 ? -1 : buffer[buf_pos++];
}
/**
* Reads the next BUF_SIZE bytes into the internal buffer.
* @return
* @throws IOException
*/
private int fillBuffer() throws IOException {
int n = super.read(buffer, 0, BUF_SIZE);
if (n >= 0) {
real_pos += n;
buf_end = n;
buf_pos = 0;
}
return n;
}
/**
* Clears the local buffer.
* @throws IOException
*/
private void invalidate() throws IOException {
buf_end = 0;
buf_pos = 0;
real_pos = super.getFilePointer();
}
/**
* Reads the set number of bytes into the passed buffer.
* @param b The buffer to read the bytes into.
* @param off Byte offset within the file to start reading from
* @param len Number of bytes to read into the buffer.
* @return Number of bytes read.
*/
@Override
public synchronized int read(byte b[], int off, int len) throws IOException {
int leftover = buf_end - buf_pos;
if (len <= leftover) {
System.arraycopy(buffer, buf_pos, b, off, len);
buf_pos += len;
return len;
}
for (int i = 0; i < len; i++) {
int c = this.read();
if (c != -1)
b[off + i] = (byte) c;
else {
return i;
}
}
return len;
}
/**
* Returns the current position of the pointer in the file.
* @return The byte position of the pointer in the file.
*/
@Override
public synchronized long getFilePointer() throws IOException {
return real_pos - buf_end + buf_pos;
}
/**
* Overridden seek method always throws exception: this would not work in concurrent environments.
* All seek operations must be encapsulated here in synchronized methods.
*/
@Override
public synchronized void seek(long pos) throws IOException {
throw new UnsupportedOperationException("seek cannot be called public to avoid synchronization issues");
}
/**
* Moves the internal pointer to the passed (byte) position in the file.
* @param pos the byte position to move to.
*/
private void seekPrivate(long pos) throws IOException {
int n = (int) (real_pos - pos);
if (n >= 0 && n <= buf_end) {
buf_pos = buf_end - n;
} else {
super.seek(pos);
invalidate();
}
}
public synchronized void read(final byte[] b, final long pos) throws IOException {
seekPrivate(pos);
read(b, 0, b.length);
}
/**
* add a line at the end of the file
* @param b
* @return the seek position where the line started
* @throws IOException
*/
public synchronized long appendLine(final byte[] b) throws IOException {
long seekpos = this.length();
this.seekPrivate(seekpos); // go to end of file
this.write(b);
this.writeByte((byte) '\n');
this.invalidate(); // instead of invalidate it could be better to refresh the buffer with the latest byte[]
return seekpos;
}
/**
* Reading of text lines will produce index information along with the parsed text.
* To get the exact number of bytes, we do not depend on a utf-8 - parsing string but
* instead the line is read as byte[] to determine the exact length of the line.
* @return a IndexedLine object with the text and the read index.
* @throws IOException
*/
public synchronized IndexedLine readIndexedLine() throws IOException {
long pos = real_pos - buf_end + buf_pos;
byte[] text = this.getNextLine();
return text == null ? null : new IndexedLine(pos, text);
}
public static class IndexedLine {
private long pos;
private byte[] text;
public IndexedLine(long pos, byte[] text) {
this.pos = pos;
this.text = text;
}
public long getPos() {
return pos;
}
public byte[] getText() {
return text;
}
public String toString() {
return UTF8.String(this.text);
}
}
/**
* Returns the next line from the file. In case no data could be loaded
* (generally as the end of the file was reached) null is returned.
*
* @return The next string on the file or null in case the end of the file
* was reached.
*/
private final byte[] getNextLine() throws IOException {
if (buf_end - buf_pos <= 0) {
if (fillBuffer() < 0) return null;
}
int lineend = -1; // final position of the char considering \n
for (int i = buf_pos; i < buf_end; i++) {
if (buffer[i] == '\n') {
lineend = i;
break;
}
// check for only '\r' as line end
if ((i - buf_pos > 0) && buffer[i - 1] == '\r') {
lineend = i - 1;
break;
}
}
if (lineend < 0) {
ByteBuffer line = new ByteBuffer();
int c;
int lastC = 0;
while (((c = read()) != -1) && (c != '\n') && (lastC != '\r')) {
line.append((char) c);
lastC = c;
}
if (c == -1 && line.length() == 0) {line.close(); return new byte[0];}
byte[] b = line.getBytes();
line.close();
return b;
}
byte[] b = null;
if (lineend > 0 && buffer[lineend] == '\n' && buffer[lineend - 1] == '\r' && lineend - buf_pos - 1 >= 0) {
b = new byte[lineend - buf_pos - 1];
System.arraycopy(buffer, buf_pos, b, 0, lineend - buf_pos - 1);
} else {
b = new byte[lineend - buf_pos];
System.arraycopy(buffer, buf_pos, b, 0, lineend - buf_pos);
}
buf_pos = lineend + 1;
return b;
}
public static void main(String[] args) {
junit.textui.TestRunner.run(Test.class);
}
public static class Test extends TestCase {
public static File getTestFile() {
return new File("/tmp/test_" + System.currentTimeMillis());
}
public static String[] getTestLines(int count) {
Random r = new Random(0);
String[] lines = new String[count];
for (int i = 0; i < count; i++) lines[i] = "{\"" + Long.toString(r.nextLong()) + "\":\"X\"}";
return lines;
}
public static void writeLines(File f, String[] l) throws IOException {
if (f.exists()) f.delete();
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f)));
for (String s: l) {writer.write(s); writer.write('\n');}
writer.close();
}
private File testFile;
private String[] testLines;
@Before
public void setUp() throws Exception {
this.testFile = getTestFile();
this.testLines = getTestLines(1000000);
}
@After
public void tearDown() throws Exception {
this.testFile.delete();
}
public void testSimultanousWriteAndRead() throws IOException {
if (this.testFile.exists()) this.testFile.delete();
BufferedRandomAccessFile braf = new BufferedRandomAccessFile(this.testFile, "rw", 5000);
for (int i = 0; i < this.testLines.length; i++) {
long pos = braf.getFilePointer();
braf.appendLine(UTF8.getBytes(this.testLines[i]));
braf.seekPrivate(pos);
byte[] b = braf.getNextLine();
if (!ASCII.String(b).equals(this.testLines[i])) System.out.println(ASCII.String(b) + " != " + this.testLines[i]);
assertTrue(ASCII.String(b).equals(this.testLines[i]));
}
braf.close();
}
public void testSequentialWriteThenRead() throws IOException {
writeLines(this.testFile, this.testLines);
BufferedRandomAccessFile braf = new BufferedRandomAccessFile(this.testFile, "rw", 5000);
Map<Long, String> m = new HashMap<>();
// test if sequential read is identical to original
for (int i = 0; i < this.testLines.length; i++) {
long pos = braf.getFilePointer();
byte[] b = braf.getNextLine();
if (!ASCII.String(b).equals(this.testLines[i])) System.out.println(ASCII.String(b) + " != " + this.testLines[i]);
assertTrue(ASCII.String(b).equals(this.testLines[i]));
m.put(pos, this.testLines[i]);
}
// test if random read is identical to original
for (Map.Entry<Long, String> e: m.entrySet()) {
braf.seekPrivate(e.getKey());
byte[] b = braf.getNextLine();
assertTrue(ASCII.String(b).equals(e.getValue()));
}
braf.close();
}
}
}