/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.core.iterators.user;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.lexicoder.AbstractEncoder;
import org.apache.accumulo.core.client.lexicoder.impl.AbstractLexicoder;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.LongCombiner;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.TypedValueCombiner;
import org.apache.accumulo.core.iterators.ValueFormatException;
import org.apache.hadoop.io.WritableUtils;
/**
* A Combiner that interprets Values as arrays of Longs and returns an array of element-wise sums.
*/
public class SummingArrayCombiner extends TypedValueCombiner<List<Long>> {
public static final Encoder<List<Long>> FIXED_LONG_ARRAY_ENCODER = new FixedLongArrayEncoder();
public static final Encoder<List<Long>> VAR_LONG_ARRAY_ENCODER = new VarLongArrayEncoder();
public static final Encoder<List<Long>> STRING_ARRAY_ENCODER = new StringArrayEncoder();
private static final String TYPE = "type";
private static final String CLASS_PREFIX = "class:";
public static enum Type {
/**
* indicates a variable-length encoding of a list of Longs using {@link SummingArrayCombiner.VarLongArrayEncoder}
*/
VARLEN,
/**
* indicates a fixed-length (8 bytes for each Long) encoding of a list of Longs using {@link SummingArrayCombiner.FixedLongArrayEncoder}
*/
FIXEDLEN,
/**
* indicates a string (comma-separated) representation of a list of Longs using {@link SummingArrayCombiner.StringArrayEncoder}
*/
STRING
}
@Override
public List<Long> typedReduce(Key key, Iterator<List<Long>> iter) {
List<Long> sum = new ArrayList<>();
while (iter.hasNext()) {
sum = arrayAdd(sum, iter.next());
}
return sum;
}
public static List<Long> arrayAdd(List<Long> la, List<Long> lb) {
if (la.size() > lb.size()) {
for (int i = 0; i < lb.size(); i++) {
la.set(i, LongCombiner.safeAdd(la.get(i), lb.get(i)));
}
return la;
} else {
for (int i = 0; i < la.size(); i++) {
lb.set(i, LongCombiner.safeAdd(lb.get(i), la.get(i)));
}
return lb;
}
}
@Override
public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
super.init(source, options, env);
setEncoder(options);
}
private void setEncoder(Map<String,String> options) {
String type = options.get(TYPE);
if (type == null)
throw new IllegalArgumentException("no type specified");
if (type.startsWith(CLASS_PREFIX)) {
setEncoder(type.substring(CLASS_PREFIX.length()));
testEncoder(Arrays.asList(0l, 1l));
} else {
switch (Type.valueOf(options.get(TYPE))) {
case VARLEN:
setEncoder(VAR_LONG_ARRAY_ENCODER);
return;
case FIXEDLEN:
setEncoder(FIXED_LONG_ARRAY_ENCODER);
return;
case STRING:
setEncoder(STRING_ARRAY_ENCODER);
return;
default:
throw new IllegalArgumentException();
}
}
}
@Override
public IteratorOptions describeOptions() {
IteratorOptions io = super.describeOptions();
io.setName("sumarray");
io.setDescription("SummingArrayCombiner can interpret Values as arrays of Longs using a variety of encodings "
+ "(arrays of variable length longs or fixed length longs, or comma-separated strings) before summing element-wise.");
io.addNamedOption(TYPE, "<VARLEN|FIXEDLEN|STRING|fullClassName>");
return io;
}
@Override
public boolean validateOptions(Map<String,String> options) {
if (super.validateOptions(options) == false)
return false;
try {
setEncoder(options);
} catch (Exception e) {
throw new IllegalArgumentException("bad encoder option", e);
}
return true;
}
public abstract static class DOSArrayEncoder<V> extends AbstractLexicoder<List<V>> {
public abstract void write(DataOutputStream dos, V v) throws IOException;
public abstract V read(DataInputStream dis) throws IOException;
@Override
public byte[] encode(List<V> vl) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
try {
WritableUtils.writeVInt(dos, vl.size());
for (V v : vl) {
write(dos, v);
}
} catch (IOException e) {
throw new NumberFormatException(e.getMessage());
}
return baos.toByteArray();
}
@Override
public List<V> decode(byte[] b) {
// This concrete implementation is provided for binary compatibility with 1.6; it can be removed in 2.0. See ACCUMULO-3789.
return super.decode(b);
}
@Override
protected List<V> decodeUnchecked(byte[] b, int offset, int origLen) {
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(b, offset, origLen));
try {
int len = WritableUtils.readVInt(dis);
List<V> vl = new ArrayList<>(len);
for (int i = 0; i < len; i++) {
vl.add(read(dis));
}
return vl;
} catch (IOException e) {
throw new ValueFormatException(e);
}
}
}
public static class VarLongArrayEncoder extends DOSArrayEncoder<Long> {
@Override
public void write(DataOutputStream dos, Long v) throws IOException {
WritableUtils.writeVLong(dos, v);
}
@Override
public Long read(DataInputStream dis) throws IOException {
return WritableUtils.readVLong(dis);
}
}
public static class FixedLongArrayEncoder extends DOSArrayEncoder<Long> {
@Override
public void write(DataOutputStream dos, Long v) throws IOException {
dos.writeLong(v);
}
@Override
public Long read(DataInputStream dis) throws IOException {
return dis.readLong();
}
}
public static class StringArrayEncoder extends AbstractEncoder<List<Long>> {
@Override
public byte[] encode(List<Long> la) {
if (la.size() == 0)
return new byte[] {};
StringBuilder sb = new StringBuilder(Long.toString(la.get(0)));
for (int i = 1; i < la.size(); i++) {
sb.append(",");
sb.append(Long.toString(la.get(i)));
}
return sb.toString().getBytes(UTF_8);
}
@Override
public List<Long> decode(byte[] b) {
// This concrete implementation is provided for binary compatibility with 1.6; it can be removed in 2.0. See ACCUMULO-3789.
return super.decode(b);
}
@Override
protected List<Long> decodeUnchecked(byte[] b, int offset, int len) {
String[] longstrs = new String(b, offset, len, UTF_8).split(",");
List<Long> la = new ArrayList<>(longstrs.length);
for (String s : longstrs) {
if (s.length() == 0)
la.add(0l);
else
try {
la.add(Long.parseLong(s));
} catch (NumberFormatException nfe) {
throw new ValueFormatException(nfe);
}
}
return la;
}
}
/**
* A convenience method for setting the encoding type.
*
* @param is
* IteratorSetting object to configure.
* @param type
* SummingArrayCombiner.Type specifying the encoding type.
*/
public static void setEncodingType(IteratorSetting is, Type type) {
is.addOption(TYPE, type.toString());
}
/**
* A convenience method for setting the encoding type.
*
* @param is
* IteratorSetting object to configure.
* @param encoderClass
* {@code Class<? extends Encoder<List<Long>>>} specifying the encoding type.
*/
public static void setEncodingType(IteratorSetting is, Class<? extends Encoder<List<Long>>> encoderClass) {
is.addOption(TYPE, CLASS_PREFIX + encoderClass.getName());
}
/**
* A convenience method for setting the encoding type.
*
* @param is
* IteratorSetting object to configure.
* @param encoderClassName
* name of a class specifying the encoding type.
*/
public static void setEncodingType(IteratorSetting is, String encoderClassName) {
is.addOption(TYPE, CLASS_PREFIX + encoderClassName);
}
}