package org.datavec.transform.debugging; import org.datavec.api.transform.TransformProcess; import org.datavec.api.transform.condition.ConditionOp; import org.datavec.api.transform.condition.column.CategoricalColumnCondition; import org.datavec.api.transform.condition.column.DoubleColumnCondition; import org.datavec.api.transform.filter.ConditionFilter; import org.datavec.api.transform.schema.Schema; import org.datavec.api.transform.transform.time.DeriveColumnsFromTimeTransform; import org.datavec.api.writable.DoubleWritable; import org.joda.time.DateTimeFieldType; import org.joda.time.DateTimeZone; import java.util.Arrays; import java.util.HashSet; /** * This is a simple example for the DataVec transformation functionality (building on BasicDataVecExample) * It is designed to simply demonstrate that it is possible to obtain the schema after each step of a transform process. * This can be useful for debugging your TransformProcess scripts. * * @author Alex Black */ public class PrintSchemasAtEachStep { public static void main(String[] args){ //Define the Schema and TransformProcess as per BasicDataVecExample Schema inputDataSchema = new Schema.Builder() .addColumnsString("DateTimeString", "CustomerID", "MerchantID") .addColumnInteger("NumItemsInTransaction") .addColumnCategorical("MerchantCountryCode", Arrays.asList("USA","CAN","FR","MX")) .addColumnDouble("TransactionAmountUSD",0.0,null,false,false) //$0.0 or more, no maximum limit, no NaN and no Infinite values .addColumnCategorical("FraudLabel", Arrays.asList("Fraud","Legit")) .build(); TransformProcess tp = new TransformProcess.Builder(inputDataSchema) .removeColumns("CustomerID","MerchantID") .filter(new ConditionFilter(new CategoricalColumnCondition("MerchantCountryCode", ConditionOp.NotInSet, new HashSet<>(Arrays.asList("USA","CAN"))))) .conditionalReplaceValueTransform( "TransactionAmountUSD", //Column to operate on new DoubleWritable(0.0), //New value to use, when the condition is satisfied new DoubleColumnCondition("TransactionAmountUSD",ConditionOp.LessThan, 0.0)) //Condition: amount < 0.0 .stringToTimeTransform("DateTimeString","YYYY-MM-DD HH:mm:ss.SSS", DateTimeZone.UTC) .renameColumn("DateTimeString", "DateTime") .transform(new DeriveColumnsFromTimeTransform.Builder("DateTime").addIntegerDerivedColumn("HourOfDay", DateTimeFieldType.hourOfDay()).build()) .removeColumns("DateTime") .build(); //Now, print the schema after each time step: int numActions = tp.getActionList().size(); for(int i=0; i<numActions; i++ ){ System.out.println("\n\n=================================================="); System.out.println("-- Schema after step " + i + " (" + tp.getActionList().get(i) + ") --"); System.out.println(tp.getSchemaAfterStep(i)); } System.out.println("DONE."); } }