/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.examples.webanalytics;
import co.cask.cdap.api.annotation.ProcessInput;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.flow.flowlet.AbstractFlowlet;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import java.nio.charset.Charset;
/**
* UniqueVisitor, a data processing node that processes collected data.
* In this scenario the input is directly from the stream DataStream itself, the
* this process function then splits the line using spaces and records the first element
* into the Dataset UniqueVisitCount. If the element has been recorded before, the
* Dataset will increment the value of the element.
*/
public class UniqueVisitor extends AbstractFlowlet {
// Request an instance of UniqueVisitCount Dataset
@UseDataSet("UniqueVisitCount")
private UniqueVisitCount table;
@ProcessInput
public void process(StreamEvent streamEvent) {
// Decode the log line as String
String event = Charset.forName("UTF-8").decode(streamEvent.getBody()).toString();
// The first entry in the log event is the IP address
String ip = event.substring(0, event.indexOf(' '));
// Increments the visit count of a given IP by 1
table.increment(ip, 1L);
}
}