/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.converter.filter;
import gobblin.converter.AvroToAvroConverterBase;
import gobblin.converter.Converter;
import gobblin.converter.SchemaConversionException;
import gobblin.converter.SingleRecordIterable;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.WorkUnitState;
import gobblin.converter.DataConversionException;
import gobblin.converter.EmptyIterable;
import gobblin.util.AvroUtils;
/**
* Basic implementation of a filter converter for Avro data. It filters out Avro records based on a specified Avro
* field name, and its expected value. The converter only supports equality operations and only performs the comparison
* based on the string representation of the value.
*/
public class AvroFilterConverter extends AvroToAvroConverterBase {
private String fieldName;
private String fieldValue;
/**
* The config must specify {@link ConfigurationKeys#CONVERTER_FILTER_FIELD_NAME} to indicate which field to retrieve
* from the Avro record and {@link ConfigurationKeys#CONVERTER_FILTER_FIELD_VALUE} to indicate the expected value of
* the field.
* {@inheritDoc}
* @see gobblin.converter.Converter#init(gobblin.configuration.WorkUnitState)
*/
@Override
public Converter<Schema, Schema, GenericRecord, GenericRecord> init(WorkUnitState workUnit) {
Preconditions.checkArgument(workUnit.contains(ConfigurationKeys.CONVERTER_FILTER_FIELD_NAME),
"Missing required property converter.filter.field for the AvroFilterConverter class.");
Preconditions.checkArgument(workUnit.contains(ConfigurationKeys.CONVERTER_FILTER_FIELD_VALUE),
"Missing required property converter.filter.value for the AvroFilterConverter class.");
this.fieldName = workUnit.getProp(ConfigurationKeys.CONVERTER_FILTER_FIELD_NAME);
this.fieldValue = workUnit.getProp(ConfigurationKeys.CONVERTER_FILTER_FIELD_VALUE);
return super.init(workUnit);
}
/**
* Returns the inputSchema unmodified.
* {@inheritDoc}
* @see gobblin.converter.AvroToAvroConverterBase#convertSchema(org.apache.avro.Schema, gobblin.configuration.WorkUnitState)
*/
@Override
public Schema convertSchema(Schema inputSchema, WorkUnitState workUnit) throws SchemaConversionException {
return inputSchema;
}
/**
* Retrieves the specified field from the inputRecord, and checks if it is equal to the expected value
* {@link #fieldValue}. If it is then it returns a {@link gobblin.converter.SingleRecordIterable} for the input record.
* Otherwise it returns a {@link EmptyIterable}.
* {@inheritDoc}
* @see gobblin.converter.AvroToAvroConverterBase#convertRecord(org.apache.avro.Schema, org.apache.avro.generic.GenericRecord, gobblin.configuration.WorkUnitState)
*/
@Override
public Iterable<GenericRecord> convertRecord(Schema outputSchema, GenericRecord inputRecord, WorkUnitState workUnit)
throws DataConversionException {
Optional<Object> fieldValue = AvroUtils.getFieldValue(inputRecord, this.fieldName);
if (fieldValue.isPresent() && fieldValue.get().toString().equals(this.fieldValue)) {
return new SingleRecordIterable<>(inputRecord);
}
return new EmptyIterable<>();
}
}