/*
* chombo: Hadoop Map Reduce utility
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.chombo.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.chombo.mr.FeatureField;
import org.chombo.mr.FeatureFieldCollection;
/**
* Metadata based on schema JSON file. Enriched by stats data
* @author pranab
*
*/
public class FeatureSchema {
private List<FeatureField> fields;
private FeatureFieldCollection fieldCollection;
public List<FeatureField> getFields() {
return fields;
}
public void setFields(List<FeatureField> fields) {
this.fields = fields;
}
/**
* process field collection element
*/
public void initialize() {
if (null == fields) {
fields = new ArrayList<FeatureField>();
}
//create fields from collection field element
if (null != fieldCollection) {
for (int thisOrdinal : fieldCollection.getOrdinals() ) {
FeatureField field = fieldCollection.createFeatureField(thisOrdinal);
fields.add(field);
}
}
}
/**
* Enhance schema with stats data
* @param config
* @param statsFilePath
* @param delim
* @throws IOException
*/
public void processStats(Configuration config, String statsFilePath, String delim) throws IOException {
InputStream fs = Utility.getFileStream(config, statsFilePath);
BufferedReader reader = new BufferedReader(new InputStreamReader(fs));
String line = null;
String[] items = null;
while((line = reader.readLine()) != null) {
items = line.split(delim);
if (items[1].equals("0")) {
int ordinal = Integer.parseInt(items[0]);
double mean = Double.parseDouble(items[4]);
double variance = Double.parseDouble(items[5]);
double stdDev = Double.parseDouble(items[6]);
FeatureField field = findFieldByOrdinal(ordinal);
field.setMean(mean);
field.setVariance(variance);
field.setStdDev(stdDev);
}
}
reader.close();
}
/**
* get field from ordinal
* @param ordinal
* @return
*/
public FeatureField findFieldByOrdinal(int ordinal) {
FeatureField selField = null;
for (FeatureField field : fields) {
if (field.getOrdinal() == ordinal) {
selField = field;
break;
}
}
return selField;
}
/**
* find class attribute field
* @return
*/
public FeatureField findClassAttrField() {
FeatureField classAttrField = null;
for (FeatureField field : fields) {
if (!field.isId() && !field.isFeature()) {
classAttrField = field;
break;
}
}
return classAttrField;
}
/**
* returns ordinals of feature fields
* @return
*/
public int[] getFeatureFieldOrdinals() {
int[] ordinals = null;
List<Integer> ordinalList = new ArrayList<Integer>();
for (FeatureField field : fields) {
if (field.isFeature()) {
ordinalList.add(field.getOrdinal());
}
}
Collections.sort(ordinalList);
ordinals = new int[ordinalList.size()];
for (int i = 0; i < ordinalList.size(); ++i) {
ordinals[i] = ordinalList.get(i);
}
return ordinals;
}
/**
* Get all feature fields
* @return
*/
public List<FeatureField> getFeatureAttrFields() {
List<FeatureField> featureFields = new ArrayList<FeatureField>();
for (FeatureField field : fields) {
if (field.isFeature()) {
featureFields.add(field);
}
}
//sort by ordinal
Collections.sort(featureFields);
return featureFields;
}
/**
* @param attrOrd
* @return
*/
public int getCardinalitySize(int attrOrd) {
FeatureField field = findFieldByOrdinal(attrOrd);
return field.getCardinality().size();
}
/**
* @param attrOrd
* @param attrVal
* @return
*/
public int getCardinalityIndex(int attrOrd, String attrVal) {
FeatureField field = findFieldByOrdinal(attrOrd);
return field.cardinalityIndex(attrVal);
}
}