/*
* Sifarish: Recommendation Engine
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.sifarish.util;
import java.util.HashSet;
import java.util.Set;
import org.chombo.util.Pair;
import org.sifarish.feature.SingleTypeSchema;
/**
* Defines profile for various attributes. For numerical attribute it's a range. For
* categorical attribute it's a set. For text, it's just the text.
* @author pranab
*
*/
public class MatchingProfile {
private Object[] profile;
/**
* @param line
* @param fieldDelim
* @param subFieldDelim
* @param schema
*/
public MatchingProfile(String line, String fieldDelim, String subFieldDelim, SingleTypeSchema schema) {
String[] record = line.split(fieldDelim);
profile = new Object[record.length];
for (int i = 0; i < profile.length; ++i) {
profile[i] = null;
}
//build profile
for (Field field : schema.getEntity().getFields()) {
String dataType = field.getDataType();
int ord = field.getOrdinal();
String[] items = record[ord].split(subFieldDelim);
if (field.isId()) {
profile[ord] = record[ord];
} else if (dataType.equals(Field.DATA_TYPE_INT)) {
if (2 != items.length) {
throw new IllegalStateException("numerical profile attribute has only range");
}
Pair<Integer, Integer> range = new Pair<Integer, Integer>(Integer.parseInt(items[0]),
Integer.parseInt(items[1]));
profile[ord] = range;
} else if (dataType.equals(Field.DATA_TYPE_DOUBLE)) {
if (2 != items.length) {
throw new IllegalStateException("numerical profile attribute has only range");
}
Pair<Double, Double> range = new Pair<Double, Double>(Double.parseDouble(items[0]),
Double.parseDouble(items[1]));
profile[ord] = range;
} else if (dataType.equals(Field.DATA_TYPE_CATEGORICAL)) {
Set<String> values = new HashSet<String>();
for (String value : items) {
values.add(value);
}
profile[ord] = values;
} else if (dataType.equals(Field.DATA_TYPE_TEXT)) {
profile[ord] = line;
} else {
throw new IllegalStateException("unsupported data type");
}
}
}
/**
* @param fieldOrd
* @return
*/
public String getId(int fieldOrd) {
return (String)profile[fieldOrd];
}
/**
* @param fieldOrd
* @return
*/
public Pair<Integer, Integer> getIntRange(int fieldOrd) {
return (Pair<Integer, Integer>)profile[fieldOrd];
}
/**
* @param fieldOrd
* @return
*/
public Pair<Double, Double> getDoubleRange(int fieldOrd) {
return (Pair<Double, Double>)profile[fieldOrd];
}
/**
* @param fieldOrd
* @return
*/
public Set<String> getCategoricalSet(int fieldOrd) {
return (Set<String>)profile[fieldOrd];
}
/**
* @param fieldOrd
* @return
*/
public String getText(int fieldOrd) {
return (String)profile[fieldOrd];
}
}