/*
* Copyright 2011 Future Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.araqne.logdb.query.command;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import org.araqne.logdb.ObjectComparator;
import org.araqne.logdb.QueryCommand;
import org.araqne.logdb.QueryStopReason;
import org.araqne.logdb.Row;
import org.araqne.logdb.RowBatch;
import org.araqne.logdb.impl.TopSelector;
import org.araqne.logdb.query.parser.ParseResult;
import org.araqne.logdb.query.parser.QueryTokenizer;
import org.araqne.logdb.sort.CloseableIterator;
import org.araqne.logdb.sort.Item;
import org.araqne.logdb.sort.ParallelMergeSorter;
public class Sort extends QueryCommand {
private static final int GROUPBY_LIMIT_THRESHOLD = 100;
private static final int TOP_OPTIMIZE_THRESHOLD = 10000;
private static final int FLUSH_THRESHOLD = 100000;
private Integer limit;
private SortField[] fields;
private List<String> partitionFields;
private SortField[] compareFields;
private ObjectComparator cmp;
private ParallelMergeSorter sorter;
private TopSelector<Item> top;
private Map<List<Object>, PriorityQueue<Item>> sortBuffer;
private Integer itemCount;
private PartitionComparator pComparator;
public Sort(Integer limit, SortField[] fields, List<String> partitionFields) {
this.limit = limit;
this.fields = fields;
this.partitionFields = partitionFields;
this.cmp = new ObjectComparator();
if (partitionFields.size() > 0) {
// merge partition fields + sort fields
List<SortField> l = new ArrayList<SortField>();
for (String partition : partitionFields) {
l.add(new SortField(partition));
}
for (SortField field : fields) {
l.add(field);
}
compareFields = l.toArray(new SortField[0]);
} else
compareFields = fields;
}
@Override
public String getName() {
return "sort";
}
@Override
public void onStart() {
if (partitionFields.size() > 0) {
this.sorter = new ParallelMergeSorter(new PartitionComparator(false));
initSorter();
if (limit != null && limit <= GROUPBY_LIMIT_THRESHOLD) {
sortBuffer = new HashMap<List<Object>, PriorityQueue<Item>>();
itemCount = 0;
pComparator = new PartitionComparator(true);
}
} else {
if (limit != null && limit <= TOP_OPTIMIZE_THRESHOLD)
this.top = new TopSelector<Item>(limit, new DefaultComparator());
else {
this.sorter = new ParallelMergeSorter(new DefaultComparator());
initSorter();
}
}
}
private void initSorter() {
int queryId = 0;
if (getQuery() != null)
queryId = getQuery().getId();
SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd_HHmmss");
sorter.setTag("_" + queryId + "_" + df.format(new Date()) + "_");
}
public Integer getLimit() {
return limit;
}
public SortField[] getFields() {
return fields;
}
@Override
public void onPush(Row m) {
try {
if (partitionFields.size() > 0) {
sortbyPartitionFields(m);
} else if (top != null) {
top.add(new Item(m.map(), null));
} else if (sorter != null) {
// onClose() thread can interfere
synchronized (sorter) {
sorter.add(new Item(m.map(), null));
}
}
} catch (IOException e) {
throw new IllegalStateException("sort failed, query " + query, e);
}
}
@Override
public void onPush(RowBatch rowBatch) {
try {
if (rowBatch.selectedInUse) {
for (int i = 0; i < rowBatch.size; i++) {
int p = rowBatch.selected[i];
Row row = rowBatch.rows[p];
if (partitionFields.size() > 0)
sortbyPartitionFields(row);
else if (top != null)
top.add(new Item(row.map(), null));
else if (sorter != null)
sorter.add(new Item(row.map(), null));
}
} else {
for (int i = 0; i < rowBatch.size; i++) {
Row row = rowBatch.rows[i];
if (partitionFields.size() > 0)
sortbyPartitionFields(row);
else if (top != null)
top.add(new Item(row.map(), null));
else if (sorter != null) {
synchronized (sorter) {
sorter.add(new Item(row.map(), null));
}
}
}
}
} catch (IOException e) {
throw new IllegalStateException("sort failed, query " + query, e);
}
}
@Override
public boolean isReducer() {
return true;
}
@SuppressWarnings("unchecked")
@Override
public void onClose(QueryStopReason reason) {
this.status = Status.Finalizing;
if (top != null) {
Iterator<Item> it = top.getTopEntries();
while (it.hasNext()) {
Item item = it.next();
pushPipe(new Row((Map<String, Object>) item.getKey()));
}
// support sorter cache GC when query processing is ended
top = null;
} else if (sorter != null) {
// TODO: use LONG instead!
int count = limit != null ? limit : Integer.MAX_VALUE;
CloseableIterator it = null;
try {
if (reason != QueryStopReason.End && reason != QueryStopReason.PartialFetch) {
synchronized (sorter) {
sorter.cancel();
}
return;
}
if (sortBuffer != null) {
synchronized (sorter) {
for (PriorityQueue<Item> flushItems : sortBuffer.values()) {
for (Item flushItem : flushItems) {
sorter.add(flushItem);
}
}
}
}
synchronized (sorter) {
it = sorter.sort();
}
if (partitionFields.size() > 0) {
Object[] currentPK = null;
int currentCount = 0;
while (it.hasNext()) {
Object o = it.next();
Object[] partitionSortKey = (Object[]) ((Item) o).getKey();
if (currentPK == null || !compareTwoPartitionKeys(currentPK, partitionSortKey)) {
currentCount = 0;
currentPK = partitionSortKey;
}
if (currentCount++ < count) {
Map<String, Object> value = (Map<String, Object>) ((Item) o).getValue();
int i = 0;
for (SortField field : compareFields) {
value.put(field.getName(), partitionSortKey[i++]);
}
pushPipe(new Row(value));
}
}
} else {
while (it.hasNext()) {
Object o = it.next();
if (--count < 0)
break;
Map<String, Object> value = (Map<String, Object>) ((Item) o).getKey();
pushPipe(new Row(value));
}
}
} catch (Throwable t) {
getQuery().cancel(t);
} finally {
// close and delete sorted run file
if (it != null) {
try {
it.close();
} catch (IOException e) {
}
}
// support sorter cache GC when query processing is ended
sorter = null;
if (sortBuffer != null)
sortBuffer = null;
}
}
}
private synchronized void sortbyPartitionFields(Row m) throws IOException {
Object[] partitionSortKey = getPartitionSortKey(m);
Object[] partitionKey = getPartitionKey(m);
Map<String, Object> vMap = m.map();
for (SortField field : compareFields) {
vMap.remove(field.getName());
}
if (limit != null && limit <= GROUPBY_LIMIT_THRESHOLD) {
PriorityQueue<Item> items = sortBuffer.get(Arrays.asList(partitionKey));
if (items == null) {
items = new PriorityQueue<Item>(limit, pComparator);
items.add(new Item(partitionSortKey, vMap));
sortBuffer.put(Arrays.asList(partitionKey), items);
itemCount++;
} else {
if (items.size() == limit) {
Item item = items.peek();
Item newItem = new Item(partitionSortKey, vMap);
if (pComparator.compare(newItem, item) > 0) {
items.poll();
items.add(newItem);
}
} else {
items.add(new Item(partitionSortKey, vMap));
itemCount++;
}
}
if (itemCount >= FLUSH_THRESHOLD) {
for (PriorityQueue<Item> flushItems : sortBuffer.values()) {
for (Item flushItem : flushItems) {
sorter.add(flushItem);
}
}
sortBuffer.clear();
itemCount = 0;
}
} else {
Item newItem = new Item(partitionSortKey, vMap);
sorter.add(newItem);
}
}
private boolean compareTwoPartitionKeys(Object[] currentPK, Object[] partitionSortKey) {
for (int i = 0; i < partitionFields.size(); ++i) {
Object v1 = currentPK[i];
Object v2 = partitionSortKey[i];
int diff = cmp.compare(v1, v2);
if (diff != 0)
return false;
}
return true;
}
private class DefaultComparator implements Comparator<Item> {
@SuppressWarnings("unchecked")
@Override
public int compare(Item o1, Item o2) {
Map<String, Object> m1 = (Map<String, Object>) o1.getKey();
Map<String, Object> m2 = (Map<String, Object>) o2.getKey();
for (SortField field : compareFields) {
Object v1 = m1.get(field.name);
Object v2 = m2.get(field.name);
boolean lhsNull = v1 == null;
boolean rhsNull = v2 == null;
if (lhsNull && rhsNull)
continue;
else if (lhsNull)
return field.asc ? -1 : 1;
else if (rhsNull)
return field.asc ? 1 : -1;
int diff = cmp.compare(v1, v2);
if (diff != 0) {
if (!field.asc)
diff *= -1;
return diff;
}
}
return 0;
}
}
private class PartitionComparator implements Comparator<Item> {
private boolean reverse = false;
public PartitionComparator(boolean reverse) {
this.reverse = reverse;
}
@Override
public int compare(Item o1, Item o2) {
Object[] m1 = (Object[]) o1.getKey();
Object[] m2 = (Object[]) o2.getKey();
int i = 0;
for (SortField field : compareFields) {
Object v1 = m1[i];
Object v2 = m2[i];
boolean lhsNull = v1 == null;
boolean rhsNull = v2 == null;
if (lhsNull && rhsNull)
continue;
else if (lhsNull)
return field.asc ? -1 : 1;
else if (rhsNull)
return field.asc ? 1 : -1;
int diff = cmp.compare(v1, v2);
if (diff != 0) {
if (!field.asc)
diff *= -1;
return (reverse) ? diff *= -1 : diff;
}
i++;
}
return 0;
}
}
public static class SortField {
private String name;
private boolean asc;
public static List<SortField> parseSortFields(String line, ParseResult r) {
List<SortField> fields = new ArrayList<SortField>();
int next = r.next;
while (true) {
r = QueryTokenizer.nextString(line, next, ',');
String token = (String) r.value;
boolean asc = true;
char sign = token.charAt(0);
if (sign == '-') {
token = token.substring(1);
asc = false;
} else if (sign == '+') {
token = token.substring(1);
}
SortField field = new SortField(token.trim(), asc);
fields.add(field);
next = r.next;
if (line.length() == r.next)
break;
}
return fields;
}
public static String serialize(SortField[] sortFields) {
StringBuilder sb = new StringBuilder();
int i = 0;
for (SortField f : sortFields) {
if (i++ != 0)
sb.append(", ");
if (!f.isAsc())
sb.append("-");
sb.append(f.getName());
}
return sb.toString();
}
public SortField(String name) {
this(name, true);
}
public SortField(String name, boolean asc) {
this.name = name;
this.asc = asc;
}
public String getName() {
return name;
}
public boolean isAsc() {
return asc;
}
public void reverseAsc() {
asc = !asc;
}
@Override
public String toString() {
return "SortField [name=" + name + ", asc=" + asc + "]";
}
}
@Override
public String toString() {
String limitOpt = "";
if (limit != null)
limitOpt = " limit=" + limit;
int i = 0;
String fieldOpt = "";
for (SortField f : fields) {
if (i++ != 0)
fieldOpt += ",";
fieldOpt += " " + (f.isAsc() ? "" : "-") + f.getName();
}
String partitionOpt = "";
for (String partition : partitionFields) {
if (i++ != 0)
partitionOpt += ",";
else
partitionOpt += "by";
partitionOpt += " " + partition;
}
return "sort" + limitOpt + fieldOpt + partitionOpt;
}
private Object[] getPartitionKey(Row m) {
Object[] partitionKey = new Object[partitionFields.size()];
for (int i = 0; i < partitionKey.length; ++i) {
partitionKey[i] = m.get(partitionFields.get(i));
}
return partitionKey;
}
private Object[] getPartitionSortKey(Row m) {
Object[] partitionSortKey = new Object[compareFields.length];
for (int i = 0; i < compareFields.length; ++i) {
partitionSortKey[i] = m.get(compareFields[i].getName());
}
return partitionSortKey;
}
}