/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package edu.harvard.iq.dvn.ingest.thedata.helpers; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; // R-specific import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; import org.rosuda.REngine.RList; /** * Variable Meta Data * An object storing meta-data for a collection of values of a DataTable. This * is used to properly restore file from the TAB format to other formats. * Notably, this helps characterize data in the Rdata format. * Additionally, this is a good place to start formalizing the TAB -> Other format * out-gest (opposite of ingest) procedure. * @author Matt Owen * @since 2013-04-10 * @version 1.0 */ public class VariableMetaData { public static ArrayList <Integer> VARIABLE_TYPES = new ArrayList <Integer> () {{ add(0); add(1); }}; public static Map VARIABLE_TYPE_STRINGS = new HashMap <Integer, String> () {{ put(-1, null); put(0, "character"); put(1, "integer"); put(2, "numeric"); }}; // Types of "strings" that have special meta-data associated with them public static String [] VARIABLE_CHARACTER_SUBCLASSES = { "Date", "Time", "DateTime", "Factor" }; Integer mType, mSubType; String mTypeString, mDateTimeFormat; String [] mClass, mFactorLevels; private int [] intFactorValues; private boolean isFactor = false; private boolean isBoolean = false; private boolean isOrderedFactor = false; /** * Construct a Meta-data-less Object * @since 2013-04-10 */ public VariableMetaData () { mType = -1; mTypeString = null; mFactorLevels = new String[0]; } /** * Construct an Variable Meta Data Object from its Numeric Type * @param type */ public VariableMetaData (int type) { mType = VARIABLE_TYPES.contains(type) ? type : -1; mTypeString = (String) VARIABLE_TYPE_STRINGS.get(mType); mFactorLevels = new String[0]; } /** * Construct a Variable Meta Data Object with Levels * @param type * @param factorLevels */ public VariableMetaData (int type, String [] factorLevels) { mType = VARIABLE_TYPES.contains(type) ? type : -1; mTypeString = (String) VARIABLE_TYPE_STRINGS.get(mType); mFactorLevels = mType == 0 ? factorLevels : new String[0]; intFactorValues = new int[0]; } public VariableMetaData (int type, String [] factorLevels, int [] factorValues) { mType = VARIABLE_TYPES.contains(type) ? type : -1; mTypeString = (String) VARIABLE_TYPE_STRINGS.get(mType); mFactorLevels = mType == 0 ? factorLevels : new String[0]; intFactorValues = mType == 0 ? factorValues : new int[0]; } /** * Whether Column is a Character * @return true if variable stores meta-data for a character-variable. Note * that meta-data could also represent date, time, datetime, or factor values. */ public boolean isCharacter () { return mType == 0; } /** * Whether Column is an Integer * @return true if variable stores meta-data for an integer-variable */ public boolean isInteger () { return mType == 1; } /** * Whether Column is a Continuous Number * @return true if variable stores meta-data for a a continuous-type * variable - double, float, etc. */ public boolean isContinuous () { return mType == 2; } /** * Whether Column is a Date Value * @return true if variable stores meta-data for a Date variable */ public boolean isDate () { return false; } /** * Whether Column is a Time Value * @return true if variable stores meta-data for a Date variable */ public boolean isTime () { return false; } /** * Whether Column is a Date-Time Value * @return true if variable stores meta-data for a Date-Time variable */ public boolean isDateTime () { return mType == 0 && mDateTimeFormat != null && ! mDateTimeFormat.equals(""); } /** * Whether Column is a Factor Value * @return true if variable stores meta-data for a Factor variable. Note this * data-type is only in the Rdata format */ public boolean isFactor () { return isFactor; } public void setFactor (boolean isf) { isFactor = isf; } public boolean isBoolean () { return isBoolean; } public void setBoolean (boolean isb) { isBoolean = isb; } public boolean isOrderedFactor() { return isOrderedFactor; } public void setOrderedFactor (boolean iso) { isOrderedFactor = iso; } /** * */ public RList asRList () { /** * This List has a Special Format: * list( * type = NUMBER, * class = CHARACTER-VECTOR, * levels = CHARACTER-VECTOR * ) */ return new RList() {{ put("type", mType); put("class", null); put("levels", mFactorLevels); }}; } /** * Set the Date-Time Format for a Column of Data * @param format a string specifying the date-time format of the column. This * isn't always reliable. */ public void setDateTimeFormat (String format) { mDateTimeFormat = format; } public void setFactorLevels (String [] levels) { mFactorLevels = levels; } public String [] getFactorLevels () { return mFactorLevels; } public void setIntFactorValues (int [] values) { intFactorValues = values; } public int [] getIntFactorValues () { return intFactorValues; } public int getFactorLevelOrder (String level) { // For ordered categorical values ("ordinals"), the order in which // the levels are listed corresponds to their ordered values: if (isOrderedFactor) { for (int i = 0; i < mFactorLevels.length; i++) { if (mFactorLevels[i].equals(level)) { return i; } } } return -1; } }