package com.milaboratory.util;
import cc.redberry.pipe.CUtils;
import cc.redberry.pipe.OutputPort;
import cc.redberry.pipe.OutputPortCloseable;
import cc.redberry.pipe.util.Chunk;
import gnu.trove.list.array.TLongArrayList;
import org.apache.commons.io.output.CloseShieldOutputStream;
import org.apache.commons.io.output.CountingOutputStream;
import java.io.*;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.PriorityQueue;
/**
* Created by poslavsky on 28/02/2017.
*/
public final class Sorter<T> {
private final OutputPort<T> initialSource;
private final Comparator<T> comparator;
private final int chunkSize;
private final ObjectSerializer<T> serializer;
private final File tempFile;
private final TLongArrayList chunkOffsets = new TLongArrayList();
private int lastChunkSize = -1;
Sorter(OutputPort<T> initialSource, Comparator<T> comparator, int chunkSize, ObjectSerializer<T> serializer, File tempFile) {
this.initialSource = initialSource;
this.comparator = comparator;
this.chunkSize = chunkSize;
this.serializer = serializer;
this.tempFile = tempFile;
}
public static <T> OutputPortCloseable<T> sort(
OutputPort<T> initialSource,
Comparator<T> comparator,
int chunkSize,
ObjectSerializer<T> serializer,
File tempFile) throws IOException {
Sorter<T> sorter = new Sorter<>(initialSource, comparator, chunkSize, serializer, tempFile);
sorter.build();
return sorter.getSorted();
}
void build() throws IOException {
try(CountingOutputStream output = new CountingOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile), 1024 * 1024))) {
OutputPort<Chunk<T>> chunked = CUtils.buffered(CUtils.chunked(initialSource, chunkSize), 1);
Chunk<T> chunk;
while ((chunk = chunked.take()) != null) {
Object[] data = chunk.toArray();
Arrays.sort(data, (Comparator) comparator);
chunkOffsets.add(output.getByteCount());
serializer.write((Collection) Arrays.asList(data), new CloseShieldOutputStream(output));
lastChunkSize = data.length;
}
}
}
OutputPortCloseable<T> getSorted() throws IOException {
return new MergeSortingPort();
}
private final class MergeSortingPort implements OutputPortCloseable<T> {
final PriorityQueue<SortedBlockReader> queue = new PriorityQueue<>();
public MergeSortingPort() throws IOException {
for (int i = 0; i < chunkOffsets.size(); i++) {
SortedBlockReader block = new SortedBlockReader(tempFile, chunkOffsets.get(i), i == chunkOffsets.size() - 1 ? lastChunkSize : chunkSize);
block.advance();
queue.add(block);
}
}
@Override
public synchronized T take() {
if (queue.isEmpty())
return null;
SortedBlockReader head = queue.poll();
T current = head.current();
try {
// Advance the reader
head.advance();
if (head.current() != null) // If reader has more records put it back to queue
queue.add(head);
else // If reader was completely drained close it and don't put it back to queue
head.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
return current;
}
private boolean closed = false;
@Override
public synchronized void close() {
if (closed)
return;
for (SortedBlockReader block : queue)
try {
block.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
tempFile.delete();
closed = true;
}
}
private final class SortedBlockReader implements Comparable<SortedBlockReader>, AutoCloseable, Closeable {
final DataInputStream input;
final int chunkSize;
private int position = 0;
private final OutputPort<T> port;
private T current = null;
public SortedBlockReader(File file,
long chunkOffset,
int chunkSize) throws IOException {
this.chunkSize = chunkSize;
final FileInputStream fo = new FileInputStream(file);
// Setting file position to the beginning of the chunkId-th chunk
fo.getChannel().position(chunkOffset);
this.input = new DataInputStream(new BufferedInputStream(fo, 1024));
this.port = serializer.read(this.input);
}
public void advance() throws IOException {
if (position == chunkSize)
current = null;
else {
++position;
current = port.take();
}
}
public T current() {
return current;
}
@Override
public void close() throws IOException {
this.input.close();
}
@Override
public int compareTo(SortedBlockReader o) {
return comparator.compare(current, o.current);
}
}
public interface ObjectSerializer<O> {
/**
* Implementation may close stream.
*
* @param data objects
* @param stream output stream
*/
void write(Collection<O> data, OutputStream stream);
OutputPort<O> read(InputStream stream);
}
}