/* * Copyright 2012 Future Systems * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.krakenapps.logdb.sort; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; import java.util.Queue; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class ParallelMergeSorter { private final Logger logger = LoggerFactory.getLogger(ParallelMergeSorter.class); private Queue<Run> runs = new LinkedBlockingDeque<Run>(); private Queue<PartitionMergeTask> merges = new LinkedBlockingQueue<PartitionMergeTask>(); private LinkedList<Item> buffer; private Comparator<Item> comparer; private int runLength = 20000; private AtomicInteger runIndexer; private volatile int flushTaskCount; private AtomicInteger cacheCount; private Object flushDoneSignal = new Object(); private ExecutorService executor; private CyclicBarrier mergeBarrier; public ParallelMergeSorter(Comparator<Item> comparer) { this.comparer = comparer; this.buffer = new LinkedList<Item>(); this.runIndexer = new AtomicInteger(); this.executor = new ThreadPoolExecutor(8, 8, 10, TimeUnit.SECONDS, new LimitedQueue<Runnable>(8)); this.cacheCount = new AtomicInteger(10000); } public class LimitedQueue<E> extends ArrayBlockingQueue<E> { private static final long serialVersionUID = 1L; public LimitedQueue(int maxSize) { super(maxSize); } @Override public boolean offer(E e) { // turn offer() and add() into a blocking calls (unless interrupted) try { put(e); return true; } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } return false; } } public void add(Item item) throws IOException { buffer.add(item); if (buffer.size() >= runLength) flushRun(); } public void addAll(List<? extends Item> items) throws IOException { buffer.addAll(items); if (buffer.size() >= runLength) flushRun(); } private void flushRun() throws IOException, FileNotFoundException { LinkedList<Item> buffered = buffer; if (buffered.isEmpty()) return; buffer = new LinkedList<Item>(); synchronized (flushDoneSignal) { flushTaskCount++; } executor.submit(new FlushWorker(buffered)); } public CloseableIterator sort() throws IOException { // flush rest objects flushRun(); buffer = null; // wait flush done while (true) { synchronized (flushDoneSignal) { if (flushTaskCount == 0) break; try { flushDoneSignal.wait(); } catch (InterruptedException e) { } logger.debug("kraken logdb: remaining runs {}, task count: {}", runs.size(), flushTaskCount); } } // partition logger.debug("kraken logdb: start partitioning"); long begin = new Date().getTime(); Partitioner partitioner = new Partitioner(comparer); List<SortedRun> sortedRuns = new LinkedList<SortedRun>(); for (Run run : runs) sortedRuns.add(new SortedRunImpl(run)); runs.clear(); int partitionCount = getProperPartitionCount(); List<Partition> partitions = partitioner.partition(partitionCount, sortedRuns); for (SortedRun r : sortedRuns) ((SortedRunImpl) r).close(); long elapsed = new Date().getTime() - begin; logger.debug("kraken logdb: [{}] partitioning completed in {}ms", partitionCount, elapsed); // n-way merge Run run = mergeAll(partitions); executor.shutdown(); if (run.cached != null) return new CacheRunIterator(run.cached.iterator()); else return new FileRunIterator(run.dataFile); } private static int getProperPartitionCount() { int processors = Runtime.getRuntime().availableProcessors(); int count = 2; while (count < processors) count <<= 1; return count; } private static class SortedRunImpl implements SortedRun { private RunInputRandomAccess ra; public SortedRunImpl(Run run) throws IOException { this.ra = new RunInputRandomAccess(run); } @Override public int length() { return ra.run.length; } @Override public Item get(int offset) { try { return ra.get(offset); } catch (IOException e) { throw new IllegalStateException(e); } } public void close() { ra.close(); } } private Run mergeAll(List<Partition> partitions) throws IOException { // enqueue partition merge int id = 0; List<PartitionMergeTask> tasks = new ArrayList<PartitionMergeTask>(); for (Partition p : partitions) { List<Run> runParts = new LinkedList<Run>(); for (SortedRunRange range : p.getRunRanges()) { SortedRunImpl ri = (SortedRunImpl) range.getRun(); Run run = ri.ra.run; int newId = runIndexer.incrementAndGet(); if (run.cached != null) { List<Item> sublist = run.cached.subList(range.getFrom(), range.getTo() + 1); Run r = new Run(newId, sublist); runParts.add(r); } else { Run r = new Run(newId, range.length(), run.indexFile.share(), run.dataFile.share(), range.getFrom()); runParts.add(r); } } if (runParts.size() > 0) { PartitionMergeTask task = new PartitionMergeTask(id++, runParts); tasks.add(task); } } mergeBarrier = new CyclicBarrier(tasks.size() + 1); for (PartitionMergeTask task : tasks) { merges.add(task); executor.submit(new MergeWorker(task)); } // wait partition merge try { mergeBarrier.await(); } catch (InterruptedException e) { } catch (BrokenBarrierException e) { logger.error("kraken logdb: barrier assumption fail", e); } // final merge ArrayList<PartitionMergeTask> l = new ArrayList<PartitionMergeTask>(); while (true) { PartitionMergeTask t = merges.poll(); if (t == null) break; l.add(t); } Collections.sort(l); ArrayList<Run> finalRuns = new ArrayList<Run>(); for (PartitionMergeTask t : l) { finalRuns.add(t.output); } return concat(finalRuns); } private class FlushWorker implements Runnable { private LinkedList<Item> buffered; public FlushWorker(LinkedList<Item> list) { buffered = list; } @Override public void run() { try { doFlush(); } catch (Throwable t) { logger.error("kraken logdb: failed to flush", t); } finally { synchronized (flushDoneSignal) { flushTaskCount--; flushDoneSignal.notifyAll(); } } } private void doFlush() throws IOException { Collections.sort(buffered, comparer); int id = runIndexer.incrementAndGet(); RunOutput out = new RunOutput(id, buffered.size(), cacheCount); try { for (Item o : buffered) out.write(o); } finally { Run run = out.finish(); runs.add(run); } } } private class MergeWorker implements Runnable { private PartitionMergeTask task; public MergeWorker(PartitionMergeTask task) { this.task = task; } @Override public void run() { try { Run merged = mergeRuns(task.runs); logger.debug("kraken logdb: merged run {}, input runs: {}", merged, task.runs); task.output = merged; } catch (Throwable t) { logger.error("kraken logdb: failed to merge " + task.runs, t); } finally { try { mergeBarrier.await(); } catch (InterruptedException e) { } catch (BrokenBarrierException e) { logger.error("kraken logdb: merge barrier assumption fail", e); } } } private Run mergeRuns(List<Run> runs) throws IOException { if (runs.size() == 1) return runs.get(0); List<Run> phase = new ArrayList<Run>(); for (int i = 0; i < 8; i++) { if (!runs.isEmpty()) phase.add(runs.remove(0)); } runs.add(merge(phase)); return mergeRuns(runs); } } private void writeRestObjects(RunInput in, RunOutput out) throws IOException { int count = 0; while (in.hasNext()) { out.write(in.next()); count++; } logger.debug("kraken logdb: final output writing from run #{}, count={}", in.getId(), count); } private Run concat(List<Run> finalRuns) throws IOException { logger.debug("kraken logdb: concat begins"); RunOutput out = null; List<RunInput> inputs = new LinkedList<RunInput>(); try { int total = 0; for (Run r : finalRuns) { total += r.length; inputs.add(new RunInput(r, cacheCount)); logger.debug("kraken logdb: concat run #{}", r.id); } int id = runIndexer.incrementAndGet(); out = new RunOutput(id, total, cacheCount, true); for (RunInput in : inputs) { writeRestObjects(in, out); if (out.dataBos != null) out.dataBos.flush(); } } catch (Exception e) { logger.error("kraken logdb: failed to concat " + finalRuns, e); } finally { for (RunInput input : inputs) { input.purge(); } if (out != null) return out.finish(); } return null; } private Run merge(List<Run> runs) throws IOException { if (runs.size() == 0) throw new IllegalArgumentException("runs should not be empty"); if (runs.size() == 1) { return runs.get(0); } logger.debug("kraken logdb: begin {}way merge, {}", runs.size(), runs); ArrayList<RunInput> inputs = new ArrayList<RunInput>(); PriorityQueue<RunItem> q = new PriorityQueue<RunItem>(runs.size(), new RunItemComparater()); RunOutput r3 = null; try { int total = 0; for (Run r : runs) { inputs.add(new RunInput(r, cacheCount)); total += r.length; } int id = runIndexer.incrementAndGet(); r3 = new RunOutput(id, total, cacheCount, true); while (true) { // load next inputs for (RunInput input : inputs) { if (input.loaded == null && input.hasNext()) { input.loaded = input.next(); q.add(new RunItem(input, input.loaded)); } } RunItem item = q.poll(); if (item == null) break; r3.write(item.item); item.runInput.loaded = null; } } finally { for (RunInput input : inputs) input.purge(); if (r3 != null) return r3.finish(); } logger.error("kraken logdb: merge cannot reach here, bug check!"); return null; } private class RunItemComparater implements Comparator<RunItem> { @Override public int compare(RunItem o1, RunItem o2) { return comparer.compare(o1.item, o2.item); } } private static class RunItem { private RunInput runInput; private Item item; public RunItem(RunInput runInput, Item item) { this.runInput = runInput; this.item = item; } } private class PartitionMergeTask implements Comparable<PartitionMergeTask> { private int id; private List<Run> runs; private Run output; public PartitionMergeTask(int id, List<Run> runs) { this.id = id; this.runs = runs; } @Override public int compareTo(PartitionMergeTask o) { return id - o.id; } } }