/*
* chombo: Hadoop Map Reduce utility
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.chombo.transformer;
import java.util.HashMap;
import java.util.Map;
import org.chombo.util.BaseAttribute;
import org.chombo.util.BinaryCategoryCreator;
import org.chombo.util.ProcessorAttribute;
import org.chombo.util.Utility;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigValue;
import groovy.lang.Binding;
import groovy.lang.GroovyShell;
/**
* @author pranab
*
*/
public class NumericTransformer {
/**
* polynomoial expression
* @author pranab
*
*/
public static class LongPolynomial extends AttributeTransformer {
private long a;
private long b;
private long c;
public LongPolynomial(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
this.a = config.getInt("a");
this.b = config.getInt("b");
this.c = config.getInt("c");
}
public LongPolynomial(long a, long b, long c) {
super(1);
this.a = a;
this.b = b;
this.c = c;
}
@Override
public String[] tranform(String value) {
long in = Long.parseLong(value);
long out = a * in * in + b * in + c;
transformed[0] = "" + out;
return transformed;
}
}
/**
* polynomoial expression
* @author pranab
*
*/
public static class DoublePolynomial extends AttributeTransformer {
private double a;
private double b;
private double c;
public DoublePolynomial(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
this.a = config.getInt("a");
this.b = config.getInt("b");
this.c = config.getInt("c");
}
public DoublePolynomial(double a, double b, double c) {
super(1);
this.a = a;
this.b = b;
this.c = c;
}
@Override
public String[] tranform(String value) {
double in = Double.parseDouble(value);
double out = a * in * in + b * in + c;
transformed[0] = "" + out;
return transformed;
}
}
/**
* Custom groovy script
* @author pranab
*
*/
public static abstract class Custom extends AttributeTransformer {
private String script;
private Map<String, Object> params = new HashMap<String, Object>();
private Binding binding = new Binding();
public Custom(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
this.script = config.getString("script");
for (Map.Entry<String, ConfigValue> entry : config.entrySet()) {
Object value = entry.getValue().unwrapped();
binding.setVariable(entry.getKey(), value);
}
}
public Custom(String script, Map<String, Object> params) {
super(1);
this.script = script;
this.params = params;
for (String name : params.keySet()) {
binding.setVariable(name, params.get(name));
}
}
@Override
public String[] tranform(String value) {
Object in = getFieldValue(value);
binding.setVariable("field", in);
GroovyShell shell = new GroovyShell(binding);
Object out = shell.evaluate(script);
transformed[0] = getOutput(out);
return transformed;
}
protected abstract Object getFieldValue(String value);
protected abstract String getOutput(Object out);
}
/**
* @author pranab
*
*/
public static class LongCustom extends Custom {
public LongCustom(ProcessorAttribute prAttr, Config config) {
super(prAttr, config);
}
public LongCustom(String script, Map<String, Object> params) {
super(script, params);
}
protected Object getFieldValue(String value) {
Long in = null;
try {
in = Long.parseLong(value);
} catch (Exception ex) {
throw new IllegalArgumentException("long input expected");
}
return in;
}
protected String getOutput(Object out) {
String ret = null;
if (out instanceof Long || out instanceof Integer) {
ret = "" + out;
} else {
throw new IllegalArgumentException("int or long output expected");
}
return ret;
}
}
/**
* @author pranab
*
*/
public static class DoubleCustom extends Custom {
public DoubleCustom(ProcessorAttribute prAttr, Config config) {
super(prAttr, config);
}
public DoubleCustom(String script, Map<String, Object> params) {
super(script, params);
}
protected Object getFieldValue(String value) {
Double in = null;
try {
in = Double.parseDouble(value);
} catch (Exception ex) {
throw new IllegalArgumentException("double input expected");
}
return in;
}
protected String getOutput(Object out) {
String ret = null;
if (out instanceof Double || out instanceof Float) {
ret = "" + out;
} else {
throw new IllegalArgumentException("double or float output expected");
}
return ret;
}
}
/**
* @author pranab
*
*/
public static class Discretizer extends AttributeTransformer {
private double bucketWidth;
private String dataType;
public Discretizer(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
bucketWidth = prAttr.getBuckeWidth();
dataType = prAttr.getDataType();
}
@Override
public String[] tranform(String value) {
int bucket = 0;
if (dataType.equals(BaseAttribute.DATA_TYPE_INT)) {
int iVal = Integer.parseInt(value);
bucket = (int)(iVal / bucketWidth);
} else if (dataType.equals(BaseAttribute.DATA_TYPE_LONG)) {
long lVal = Long.parseLong(value);
bucket = (int)(lVal / bucketWidth);
} else if (dataType.equals(BaseAttribute.DATA_TYPE_DOUBLE)) {
double dVal = Double.parseDouble(value);
bucket = (int)(dVal / bucketWidth);
} else {
throw new IllegalArgumentException("only numeric data can be discretized");
}
transformed[0] = "" + bucket;;
return transformed;
}
}
/**
* @author pranab
*
*/
public static class BinaryCreator extends AttributeTransformer {
private BinaryCategoryCreator binaryCategoryCreator;
private String dataType;
public BinaryCreator(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
dataType = prAttr.getDataType();
long threshold = config.getLong("threshold");
String lowerToken = config.getString("lowerToken");
String upperToken = config.getString("upperToken");
binaryCategoryCreator = new BinaryCategoryCreator(threshold, lowerToken, upperToken);
}
@Override
public String[] tranform(String value) {
String token = null;
if (dataType.equals(BaseAttribute.DATA_TYPE_INT) || dataType.equals(BaseAttribute.DATA_TYPE_LONG)) {
long lVal = Long.parseLong(value);
token = binaryCategoryCreator.findToken(lVal);
} else {
throw new IllegalArgumentException("only numeric integer data can be discretized");
}
transformed[0] = token;
return transformed;
}
}
/**
* @author pranab
*
*/
public static abstract class Operator extends AttributeTransformer {
private boolean isInt;
protected int iOperand;
protected double dOperand;
private int precision;
public Operator(ProcessorAttribute prAttr, Config config) {
super(prAttr.getTargetFieldOrdinals().length);
isInt = prAttr.isInteger();
if (isInt) {
iOperand = config.getInt("intOperand");
} else {
dOperand = config.getDouble("dblOperand");
precision = config.getInt("precision");
}
}
@Override
public String[] tranform(String value) {
if (isInt) {
int iValue = Integer.parseInt(value);
iValue = operate(iValue);
transformed[0] = "" + iValue;
} else {
double dValue = Double.parseDouble(value);
dValue = operate(dValue);
transformed[0] = Utility.formatDouble(dValue, precision);
}
return transformed;
}
protected abstract int operate(int value);
protected abstract double operate(double value);
}
/**
* @author pranab
*
*/
public static class Adder extends Operator {
public Adder(ProcessorAttribute prAttr, Config config) {
super(prAttr, config);
}
@Override
protected int operate(int value) {
return value + iOperand;
}
@Override
protected double operate(double value) {
return value + dOperand;
}
}
/**
* @author pranab
*
*/
public static class Subtracter extends Operator {
public Subtracter(ProcessorAttribute prAttr, Config config) {
super(prAttr, config);
}
@Override
protected int operate(int value) {
return value - iOperand;
}
@Override
protected double operate(double value) {
return value - dOperand;
}
}
/**
* @author pranab
*
*/
public static class Multiplier extends Operator {
public Multiplier(ProcessorAttribute prAttr, Config config) {
super(prAttr, config);
}
@Override
protected int operate(int value) {
return value * iOperand;
}
@Override
protected double operate(double value) {
return value * dOperand;
}
}
/**
* @author pranab
*
*/
public static class Divider extends Operator {
public Divider(ProcessorAttribute prAttr, Config config) {
super(prAttr, config);
}
@Override
protected int operate(int value) {
return value / iOperand;
}
@Override
protected double operate(double value) {
return value / dOperand;
}
}
}