package org.maltparser.core.feature.map;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.maltparser.core.exception.MaltChainedException;
import org.maltparser.core.feature.FeatureException;
import org.maltparser.core.feature.function.FeatureFunction;
import org.maltparser.core.feature.function.FeatureMapFunction;
import org.maltparser.core.feature.value.FeatureValue;
import org.maltparser.core.feature.value.FunctionValue;
import org.maltparser.core.feature.value.MultipleFeatureValue;
import org.maltparser.core.feature.value.SingleFeatureValue;
import org.maltparser.core.io.dataformat.ColumnDescription;
import org.maltparser.core.io.dataformat.DataFormatInstance;
import org.maltparser.core.symbol.SymbolTable;
import org.maltparser.core.symbol.SymbolTableHandler;
/**
*
*
* @author Johan Hall
*/
public class SplitFeature implements FeatureMapFunction {
protected FeatureFunction parentFeature;
protected MultipleFeatureValue multipleFeatureValue;
protected DataFormatInstance dataFormatInstance;
protected ColumnDescription column;
protected SymbolTable table;
protected String separators;
protected Pattern separatorsPattern;
public SplitFeature(DataFormatInstance dataFormatInstance) throws MaltChainedException {
super();
setDataFormatInstance(dataFormatInstance);
multipleFeatureValue = new MultipleFeatureValue(this);
}
public void initialize(Object[] arguments) throws MaltChainedException {
if (arguments.length != 2) {
throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. ");
}
if (!(arguments[0] instanceof FeatureFunction)) {
throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. ");
}
if (!(arguments[1] instanceof String)) {
throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. ");
}
setParentFeature((FeatureFunction) arguments[0]);
setSeparators((String) arguments[1]);
ColumnDescription parentColumn = dataFormatInstance.getColumnDescriptionByName(parentFeature.getSymbolTable().getName());
if (parentColumn.getType() != ColumnDescription.STRING) {
throw new FeatureException("Could not initialize SplitFeature: the first argument must be a string. ");
}
setColumn(dataFormatInstance.addInternalColumnDescription("SPLIT_" + parentFeature.getSymbolTable().getName(), parentColumn));
setSymbolTable(column.getSymbolTable());
// setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable()));
}
public Class<?>[] getParameterTypes() {
Class<?>[] paramTypes = {org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class};
return paramTypes;
}
public FeatureValue getFeatureValue() {
return multipleFeatureValue;
}
public String getSymbol(int code) throws MaltChainedException {
return table.getSymbolCodeToString(code);
}
public int getCode(String symbol) throws MaltChainedException {
return table.getSymbolStringToCode(symbol);
}
public void update() throws MaltChainedException {
multipleFeatureValue.reset();
parentFeature.update();
FunctionValue value = parentFeature.getFeatureValue();
if (value instanceof SingleFeatureValue) {
String symbol = ((SingleFeatureValue) value).getSymbol();
if (((FeatureValue) value).isNullValue()) {
multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol);
multipleFeatureValue.setNullValue(true);
} else {
String items[];
try {
items = separatorsPattern.split(symbol);
} catch (PatternSyntaxException e) {
throw new FeatureException("The split feature '" + this.toString() + "' could not split the value using the following separators '" + separators + "'", e);
}
for (int i = 0; i < items.length; i++) {
if (items[i].length() > 0) {
multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
}
}
multipleFeatureValue.setNullValue(false);
}
} else if (value instanceof MultipleFeatureValue) {
if (((MultipleFeatureValue) value).isNullValue()) {
multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue) value).getFirstSymbol()), ((MultipleFeatureValue) value).getFirstSymbol());
multipleFeatureValue.setNullValue(true);
} else {
for (String symbol : ((MultipleFeatureValue) value).getSymbols()) {
String items[];
try {
items = separatorsPattern.split(symbol);
} catch (PatternSyntaxException e) {
throw new FeatureException("The split feature '" + this.toString() + "' could not split the value using the following separators '" + separators + "'", e);
}
for (int i = 0; i < items.length; i++) {
multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
}
multipleFeatureValue.setNullValue(false);
}
}
}
}
public void updateCardinality() throws MaltChainedException {
// parentFeature.updateCardinality();
// multipleFeatureValue.setCardinality(table.getValueCounter());
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
return obj.toString().equals(this.toString());
}
public FeatureFunction getParentFeature() {
return parentFeature;
}
public void setParentFeature(FeatureFunction parentFeature) {
this.parentFeature = parentFeature;
}
public String getSeparators() {
return separators;
}
public void setSeparators(String separators) {
this.separators = separators;
separatorsPattern = Pattern.compile(separators);
}
public SymbolTable getSymbolTable() {
return table;
}
public void setSymbolTable(SymbolTable table) {
this.table = table;
}
public SymbolTableHandler getTableHandler() {
return dataFormatInstance.getSymbolTables();
}
public DataFormatInstance getDataFormatInstance() {
return dataFormatInstance;
}
public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
this.dataFormatInstance = dataFormatInstance;
}
public ColumnDescription getColumn() {
return column;
}
protected void setColumn(ColumnDescription column) {
this.column = column;
}
public int getType() {
return column.getType();
}
public String getMapIdentifier() {
return getSymbolTable().getName();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("Split(");
sb.append(parentFeature.toString());
sb.append(", ");
sb.append(separators);
sb.append(')');
return sb.toString();
}
}