/*
* Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
*
* Cloudera, Inc. licenses this file to you under the Apache License,
* Version 2.0 (the "License"). You may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for
* the specific language governing permissions and limitations under the
* License.
*/
package com.cloudera.oryx.contrib.flume;
import java.util.List;
import org.apache.flume.Event;
public interface OryxEventParser {
/**
* <p>
* Extracts the given {@code fields} from the given {@code event} and adds them to the given
* {@code batch} in the required Oryx format.
* </p>
* <p>
* Oryx expects data of the form {@code user,item} or {@code user,item,strength}. Identifiers in
* the first two columns can be numeric or non-numeric, and represent any kind of entity. A
* numeric strength value is optional, and provides simple rating information.
* </p>
* @param event the Flume {@link Event}
* @param fields a list of keys to identify the {@code user} and {@code item} fields to extract
* from the {@link Event}. If the optional {@code strength} field is specified, it is
* taken literally (i.e. added to the Oryx record as-is). For example:
* <ul>
* <li>username,product_id,2.0</li>
* <li>username,search_term</li>
* </ul>
* @param batch the batch of records to send to Oryx
*/
void parseEvent(Event event, List<List<String>> fields, List<String> batch);
}