/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
import org.apache.jena.hadoop.rdf.types.CharacteristicWritable;
import org.apache.jena.hadoop.rdf.types.NodeWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Abstract reducer which takes in tuples grouped by some node and generating
* initial characteristic sets.
* <p>
* This produces the characteristic sets as both the key and value so that in a
* subsequent job the characteristic steps may be further combined together to
* total up the usage counts appropriately.
* </p>
* <p>
* It is important to note that the output from this mapper can be very large
* and since it typically needs to be written to HDFS before being processed by
* further jobs it is strongly recommended that you use appropriate output
* compression
* </p>
*
*
*
* @param <TValue>
* Tuple type
* @param <T>
* Writable tuple type
*/
public abstract class AbstractCharacteristicSetGeneratingReducer<TValue, T extends AbstractNodeTupleWritable<TValue>> extends
Reducer<NodeWritable, T, CharacteristicSetWritable, NullWritable> {
private static final Logger LOG = LoggerFactory.getLogger(AbstractCharacteristicSetGeneratingReducer.class);
private boolean tracing = false;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
this.tracing = LOG.isTraceEnabled();
}
@Override
protected void reduce(NodeWritable key, Iterable<T> values, Context context) throws IOException, InterruptedException {
Map<NodeWritable, CharacteristicWritable> characteristics = new TreeMap<NodeWritable, CharacteristicWritable>();
// Firstly need to find individual characteristics
Iterator<T> iter = values.iterator();
while (iter.hasNext()) {
T tuple = iter.next();
NodeWritable predicate = this.getPredicate(tuple);
if (characteristics.containsKey(predicate)) {
characteristics.get(predicate).increment();
} else {
characteristics.put(predicate, new CharacteristicWritable(predicate.get()));
}
}
// Then we need to produce all the possible characteristic sets based on
// this information
List<CharacteristicWritable> cs = new ArrayList<CharacteristicWritable>(characteristics.values());
if (cs.size() == 0)
return;
for (int i = 1; i <= cs.size(); i++) {
this.outputSets(cs, i, context);
}
}
/**
* Output all sets of a given size
*
* @param cs
* Characteristics
* @param perSet
* Set size
* @param context
* Context to output sets to
* @throws IOException
* @throws InterruptedException
*/
protected void outputSets(List<CharacteristicWritable> cs, int perSet, Context context) throws IOException,
InterruptedException {
if (perSet == 1) {
for (CharacteristicWritable c : cs) {
CharacteristicSetWritable set = new CharacteristicSetWritable(c);
context.write(set, NullWritable.get());
if (this.tracing) {
LOG.trace("Key = {}", set);
}
}
} else if (perSet == cs.size()) {
CharacteristicSetWritable set = new CharacteristicSetWritable();
for (CharacteristicWritable c : cs) {
set.add(c);
}
context.write(set, NullWritable.get());
if (this.tracing) {
LOG.trace("Key = {}", set);
}
} else {
CharacteristicWritable[] members = new CharacteristicWritable[perSet];
this.combinations(cs, perSet, 0, members, context);
}
}
/**
* Calculate all available combinations of N elements from the given
* characteristics
*
* @param cs
* Characteristics
* @param len
* Desired number of elements
* @param startPosition
* Start position
* @param result
* Result array to fill
* @param context
* Context to write completed combinations to
* @throws IOException
* @throws InterruptedException
*/
protected final void combinations(List<CharacteristicWritable> cs, int len, int startPosition,
CharacteristicWritable[] result, Context context) throws IOException, InterruptedException {
if (len == 0) {
CharacteristicSetWritable set = new CharacteristicSetWritable(result);
context.write(set, NullWritable.get());
if (this.tracing) {
LOG.trace("Key = {}", set);
}
return;
}
for (int i = startPosition; i <= cs.size() - len; i++) {
result[result.length - len] = cs.get(i);
combinations(cs, len - 1, i + 1, result, context);
}
}
/**
* Gets the predicate for the tuple
*
* @param tuple
* Tuple
* @return
*/
protected abstract NodeWritable getPredicate(T tuple);
}