CascadingStatsTest.java example

Explorer
cascading-master
- src
/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading.stats;

import java.io.File;

import cascading.ClusterTestCase;
import cascading.cascade.Cascade;
import cascading.cascade.CascadeConnector;
import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.flow.hadoop.HadoopStepStats;
import cascading.operation.regex.RegexParser;
import cascading.operation.state.Counter;
import cascading.pipe.Each;
import cascading.pipe.GroupBy;
import cascading.pipe.Pipe;
import cascading.scheme.TextLine;
import cascading.tap.Hfs;
import cascading.tap.Tap;
import cascading.tuple.Fields;

/**
 *
 */
public class CascadingStatsTest extends ClusterTestCase
  {

  String inputFileApache = "build/test/data/apache.10.txt";
  String outputPath = "build/test/output/flowstats/";

  enum TestEnum
    {
      FIRST, SECOND
    }

  public CascadingStatsTest()
    {
    super( "flow stats tests", true );
    }

  public void testStatsCounters() throws Exception
    {
    if( !new File( inputFileApache ).exists() )
      fail( "data file not found" );

    copyFromLocal( inputFileApache );

    Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache );

    Pipe pipe = new Pipe( "first" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
    pipe = new GroupBy( pipe, new Fields( "ip" ) );
    pipe = new Each( pipe, new Counter( TestEnum.FIRST ) );
    pipe = new GroupBy( pipe, new Fields( "ip" ) );
    pipe = new Each( pipe, new Counter( TestEnum.FIRST ) );
    pipe = new Each( pipe, new Counter( TestEnum.SECOND ) );

    Tap sink1 = new Hfs( new TextLine(), outputPath + "flowstats1", true );
    Tap sink2 = new Hfs( new TextLine(), outputPath + "flowstats2", true );

    Flow flow1 = new FlowConnector( getProperties() ).connect( "stats1 test", source, sink1, pipe );
    Flow flow2 = new FlowConnector( getProperties() ).connect( "stats2 test", source, sink2, pipe );

    Cascade cascade = new CascadeConnector().connect( flow1, flow2 );

    cascade.complete();

    CascadeStats cascadeStats = cascade.getCascadeStats();

    assertNotNull( cascadeStats.getID() );

    assertEquals( isEnableCluster() ? 5 : 4, cascadeStats.getCounterGroups().size() );
    assertEquals( 1, cascadeStats.getCounterGroupsMatching( "cascading\\.stats\\..*" ).size() );
    assertEquals( 2, cascadeStats.getCountersFor( TestEnum.class.getName() ).size() );
    assertEquals( 2, cascadeStats.getCountersFor( TestEnum.class ).size() );
    assertEquals( 40, cascadeStats.getCounterValue( TestEnum.FIRST ) );
    assertEquals( 20, cascadeStats.getCounterValue( TestEnum.SECOND ) );

    FlowStats flowStats1 = flow1.getFlowStats();

    assertNotNull( flowStats1.getID() );

    assertEquals( 20, flowStats1.getCounterValue( TestEnum.FIRST ) );
    assertEquals( 10, flowStats1.getCounterValue( TestEnum.SECOND ) );

    FlowStats flowStats2 = flow2.getFlowStats();

    assertNotNull( flowStats2.getID() );

    assertEquals( 20, flowStats2.getCounterValue( TestEnum.FIRST ) );
    assertEquals( 10, flowStats2.getCounterValue( TestEnum.SECOND ) );

    cascadeStats.captureDetail();

    assertEquals( 2, flowStats1.getStepsCount() );
    assertEquals( 2, flowStats2.getStepsCount() );

    HadoopStepStats stats1 = (HadoopStepStats) flowStats1.getStepStats().get( 0 );

    assertNotNull( stats1.getID() );
    assertNotNull( stats1.getJobID() );

    assertEquals( 2, stats1.getNumMapTasks() );
    assertEquals( 1, stats1.getNumReducerTasks() );

    if( isEnableCluster() )
      {
      assertEquals( 7, stats1.getTaskStats().size() );
      assertNotNull( stats1.getTaskStats().get( 5 ) );
      assertTrue( stats1.getTaskStats().get( 5 ).getCounterValue( TestEnum.FIRST ) > 0 ); // in reducer
      }

    HadoopStepStats stats2 = (HadoopStepStats) flowStats2.getStepStats().get( 0 );

    assertNotNull( stats2.getID() );
    assertNotNull( stats2.getJobID() );

    assertEquals( 2, stats2.getNumMapTasks() );
    assertEquals( 1, stats2.getNumReducerTasks() );

    if( isEnableCluster() )
      {
      assertEquals( 7, stats2.getTaskStats().size() );
      assertNotNull( stats2.getTaskStats().get( 0 ) );
      }
    }
  }