/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.io;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
/**
* a disk-backed list in which only the index is stored in memory with
* objects/bytes kept on disk. sparse and delete are punted. when a
* value is updated, a new object is appended and the old data is left
* in place. compaction creates an new backing store and copies over
* data, deleting the old.
*
* @param <K> a codable object type
*/
@Deprecated
public class DiskBackedList<K> implements List<K> {
/** */
public static interface ItemCodec<K> {
public K decode(byte[] row) throws IOException;
public byte[] encode(K row) throws IOException;
}
private static final int headerSize = 64;
private final LinkedList<DiskBackedListEntry> master = new LinkedList<>();
private ItemCodec<K> codec;
private RandomAccessFile access;
private AccessFileHandler accessFileHandler;
private File data;
private long nextOffset;
private long firstElement;
private int cruft;
private int numSeeks = 0;
public DiskBackedList(File data, ItemCodec<K> codec) throws IOException {
this(data, codec, 1000);
}
public DiskBackedList(File data, ItemCodec<K> codec, int maxReadBufferSize) throws IOException {
this.data = data;
this.codec = codec;
boolean create = !data.exists() || data.length() == 0;
access = new RandomAccessFile(data, "rw");
this.accessFileHandler = new AccessFileHandler(access, maxReadBufferSize);
if (create) {
clear();
} else {
readHeader();
System.out.println("importing " + data + " first=" + firstElement + " next=" + nextOffset);
if (firstElement > 0) {
DiskBackedListEntry e1 = getEntry(firstElement);
e1.read();
master.add(e1);
while ((e1 = e1.getNext()) != null) {
master.add(e1);
}
}
}
}
public void setCodec(ItemCodec<K> codec) {
this.codec = codec;
}
@Override
protected void finalize() {
if (access != null) {
System.err.println("finalizing open DiskBackedList rows=" + size() + " size=" + data.length() + " @ " + data);
try {
close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public boolean add(K element) {
add(master.size(), element);
return true;
}
@Override
public void add(int index, K element) {
try {
DiskBackedListEntry added = allocate(element, index < master.size() ? master.get(index) : null);
master.add(index, added);
if (index == 0) {
setFirst(added);
} else {
DiskBackedListEntry prev = master.get(index - 1);
prev.setNext(added);
prev.update();
}
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
@Override
public boolean addAll(Collection<? extends K> c) {
for (K k : c) {
add(k);
}
return true;
}
@Override
public boolean addAll(int index, Collection<? extends K> c) {
for (K k : c) {
add(index++, k);
}
return true;
}
@Override
public void clear() {
master.clear();
firstElement = 0;
nextOffset = headerSize;
}
@Override
public boolean contains(Object o) {
return indexOf(o) >= 0;
}
@Override
public boolean containsAll(Collection<?> c) {
for (Iterator<?> iter = c.iterator(); iter.hasNext();) {
if (!contains(iter.next())) {
return false;
}
}
return true;
}
@Override
public K get(int index) {
return master.get(index).getObjectSafe();
}
@Override
public int indexOf(Object o) {
int index = 0;
for (DiskBackedListEntry next : master) {
if (next.getObjectSafe().equals(o)) {
return index;
}
index++;
}
return -1;
}
@Override
public boolean isEmpty() {
return master.size() == 0;
}
@Override
public Iterator<K> iterator() {
return listIterator();
}
@Override
public int lastIndexOf(Object o) {
int index = 0;
int found = -1;
for (DiskBackedListEntry next : master) {
if (next.getObjectSafe().equals(o)) {
found = index;
}
index++;
}
return found;
}
@Override
public ListIterator<K> listIterator() {
return listIterator(0);
}
@Override
public ListIterator<K> listIterator(final int index) {
return new ListIterator<K>() {
ListIterator<DiskBackedListEntry> listIter = master.listIterator(index);
@Override
public boolean hasNext() {
return listIter.hasNext();
}
@Override
public K next() {
return listIter.next().getObjectSafe();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public void add(K e) {
DiskBackedList.this.add(e);
}
@Override
public boolean hasPrevious() {
return listIter.hasPrevious();
}
@Override
public int nextIndex() {
return listIter.nextIndex();
}
@Override
public K previous() {
return listIter.previous().getObjectSafe();
}
@Override
public int previousIndex() {
return listIter.previousIndex();
}
@Override
public void set(K e) {
throw new UnsupportedOperationException();
}
};
}
@Override
public boolean remove(Object o) {
if (remove(indexOf(o)) != null) {
cruft++;
return true;
}
return false;
}
@Override
public K remove(int index) {
if (index >= 0) {
DiskBackedListEntry e = master.remove(index);
try {
DiskBackedListEntry prev = index > 0 ? master.get(index - 1) : null;
DiskBackedListEntry next = index < master.size() ? master.get(index) : null;
if (prev != null) {
prev.setNext(next);
prev.update();
}
if (index == 0) {
setFirst(next);
}
cruft++;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
return e.getObjectSafe();
}
return null;
}
@Override
public boolean removeAll(Collection<?> c) {
boolean success = true;
for (Iterator<?> iter = c.iterator(); iter.hasNext();) {
if (!remove(iter.next())) {
success = false;
}
}
cruft++;
return success;
}
@Override
public boolean retainAll(Collection<?> c) {
throw new UnsupportedOperationException();
}
@Override
public K set(int index, K element) {
DiskBackedListEntry e = master.get(index);
try {
DiskBackedListEntry prev = index > 0 ? master.get(index - 1) : null;
DiskBackedListEntry next = index + 1 < master.size() ? master.get(index + 1) : null;
DiskBackedListEntry swap = allocate(element, next);
master.set(index, swap);
if (prev != null) {
prev.setNext(swap);
prev.update();
}
if (index == 0) {
setFirst(swap);
}
cruft++;
return e.getObjectSafe();
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
@Override
public int size() {
return master.size();
}
@Override
public List<K> subList(int fromIndex, int toIndex) {
throw new UnsupportedOperationException();
}
@Override
public Object[] toArray() {
throw new UnsupportedOperationException();
}
@Override
public <T> T[] toArray(T[] a) {
throw new UnsupportedOperationException();
}
/**
* write DiskBackedList magic preamble and pointers
*/
private void writeHeader() throws IOException {
access.seek(0);
access.writeLong(0x12345678L);
access.writeLong(firstElement);
access.writeLong(nextOffset);
numSeeks += 1;
}
/**
* read DiskBackedList magic preamble and pointers
*/
private void readHeader() throws IOException {
access.seek(0);
access.readLong();
firstElement = access.readLong();
nextOffset = access.readLong();
numSeeks += 1;
}
/**
* fix pointers in Entries to align with master list. run after a sort()
*/
private void updatePointers() throws IOException {
DiskBackedListEntry prev = null;
for (DiskBackedListEntry next : master) {
if (prev != null) {
prev.setNext(next);
prev.update();
}
prev = next;
}
if (prev != null) {
prev.setNext(null);
prev.update();
}
}
private void setFirst(DiskBackedListEntry e) {
if (e != null) {
firstElement = e.off;
} else {
firstElement = 0;
}
}
/**
* read Entry from disk based on an offset
*/
private DiskBackedListEntry getEntry(long off) throws IOException {
if (off > 0) {
DiskBackedListEntry e = new DiskBackedListEntry(off);
e.read();
return e;
}
return null;
}
/**
* allocate a new Entry based on object data and prev/next pointers
*/
private DiskBackedListEntry allocate(K val, DiskBackedListEntry next) throws Exception {
return allocate(codec.encode(val), next);
}
/**
* allocate a new Entry based on raw data and prev/next pointers
*/
private DiskBackedListEntry allocate(byte[] data, DiskBackedListEntry next) throws Exception {
return accessFileHandler.writeToAccess(data, next);
}
/**
* create new DiskBackedList and copy over data compacting out holes
*/
public void compact() throws Exception {
if (cruft > 0) {
File newdata = new File(data.getParentFile(), data.getName().concat(".new"));
DiskBackedList<K> alt = new DiskBackedList<>(newdata, codec);
alt.addEncodedData(getEncodedData());
clear();
close();
data = alt.data;
nextOffset = alt.nextOffset;
firstElement = alt.firstElement;
access = alt.access;
cruft = 0;
}
}
/**
* clean up and close
*/
public void close() throws IOException {
if (access != null) {
writeHeader();
access.close();
access = null;
}
}
/**
* perform sort based on object comparisons
*/
public void sort(final Comparator<? super K> comp) {
long starttime = System.currentTimeMillis();
dumbSort(comp);
long stoptime = System.currentTimeMillis() - starttime;
System.out.println("dumbSort took " + stoptime + " ms");
starttime = System.currentTimeMillis();
Collections.sort(master, new Comparator<DiskBackedListEntry>() {
@Override
public int compare(DiskBackedListEntry o1, DiskBackedListEntry o2) {
return comp.compare(o1.getObjectSafe(), o2.getObjectSafe());
}
});
try {
updatePointers();
} catch (IOException e) {
throw new RuntimeException(e);
}
stoptime = System.currentTimeMillis() - starttime;
System.out.println("existing sort took " + stoptime + " ms");
}
public <K> void dumbSort(final Comparator<K> comp) {
List<K> memoryList = new ArrayList<>();
Iterator it = this.iterator();
while (it.hasNext()) {
memoryList.add((K) it.next());
}
Collections.sort(memoryList, comp);
StringBuilder buf = new StringBuilder();
for (K thing : memoryList.subList(0, 20)) {
buf.append(thing);
}
System.out.println("Here's dumb sort: " + buf.toString());
}
/**
* consume a list of encoded list elements
*/
public void addEncodedData(Iterator<byte[]> stream) throws Exception {
DiskBackedListEntry prev = master.size() > 0 ? master.getLast() : null;
while (stream.hasNext()) {
DiskBackedListEntry next = allocate(stream.next(), null);
if (prev != null) {
prev.setNext(next);
prev.update();
}
prev = next;
}
if (prev != null) {
prev.update();
}
}
/**
* produce a list of encoded list elements
*/
public Iterator<byte[]> getEncodedData() {
return new Iterator<byte[]>() {
Iterator<DiskBackedListEntry> iter = master.iterator();
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public byte[] next() {
try {
return iter.next().getData();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
public int getSeeks() {
return numSeeks;
}
private class AccessFileHandler {
private RandomAccessFile access;
private int maxReadBufferSize;
private int maxWriteBufferSize;
private LinkedHashMap<Long, byte[]> readBuffer = new LinkedHashMap<>();
public HashMap<Long, byte[]> writeBuffer = new HashMap<>();
public AccessFileHandler(RandomAccessFile access, int maxReadBufferSize) {
this.access = access;
this.maxReadBufferSize = maxReadBufferSize;
this.maxWriteBufferSize = 1000;
}
public byte[] getFromAccess(Long offset) throws IOException {
byte[] result = readBuffer.get(offset);
if (result == null) {
access.seek(offset + 8);
result = new byte[access.readInt()];
access.readFully(result);
numSeeks += 1;
}
putInReadBuffer(offset, result);
return result;
}
public void putInReadBuffer(Long offset, byte[] data) {
if (readBuffer.size() >= maxReadBufferSize) {
readBuffer.clear();
}
readBuffer.put(offset, data);
}
public DiskBackedListEntry writeToAccess(byte[] bytes, DiskBackedListEntry next) throws IOException {
putInReadBuffer(nextOffset, bytes);
DiskBackedListEntry e = new DiskBackedListEntry(nextOffset);
e.setNext(next);
e.write(bytes);
nextOffset += 8 + 4 + bytes.length;
return e;
}
}
/**
* pointer bag with reader/write utilities
*/
private class DiskBackedListEntry {
private long off;
private long next;
private boolean updated;
private DiskBackedListEntry(long off) {
this.off = off;
}
public void read() throws IOException {
access.seek(off);
next = access.readLong();
numSeeks += 1;
}
public void update() throws IOException {
if (updated) {
write(null);
updated = false;
}
}
public void write(byte[] data) throws IOException {
access.seek(off);
access.writeLong(next);
numSeeks += 1;
if (data != null) {
access.writeInt(data.length);
access.write(data);
}
}
public byte[] getData() throws IOException {
return accessFileHandler.getFromAccess(off);
}
@SuppressWarnings("unchecked")
public K getObject() throws Exception {
return codec.decode(getData());
}
public K getObjectSafe() {
try {
return getObject();
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
public DiskBackedListEntry getNext() throws IOException {
return getEntry(next);
}
public void setNext(DiskBackedListEntry next) {
if (next == null || next.off != this.next) {
this.next = next != null ? next.off : 0;
updated = true;
}
}
}
}