/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.filter.value;
import com.addthis.basis.util.LessBytes;
import com.addthis.bundle.util.ValueUtil;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.bundle.value.ValueObject;
import com.addthis.codec.annotations.FieldConfig;
import com.addthis.codec.json.CodecJSON;
import com.addthis.hydra.store.util.Raw;
import com.addthis.hydra.store.util.SeenFilterBasic;
/**
* This {@link AbstractValueFilter ValueFilter} <span class="hydra-summary">filters to elements seen or not seen by a basic bloom filter</span>.
* <p/>
* <p>{@link ValueFilterBloom ValueFilterBloom} is similar and uses a better bloom implementation,
* but requires a lot work to make jobs 360 wrt gen/use of blooms.
* <p/>
* <p>Example:</p>
* <pre>
* </pre>
*
* @user-reference
*/
public class ValueFilterSeen extends AbstractValueFilter {
/**
* If true, then return elements detected in the Bloom filter. Otherwise return elements
* not detected in the Bloom filter. Default is true.
*/
@FieldConfig(codable = true)
private boolean seen = true;
/**
* Retrieve the Bloom filter from a URL.
*/
@FieldConfig(codable = true)
private String url;
/**
* Apply this filter on the Bloom filter retrieved from a URL.
*/
@FieldConfig(codable = true)
private ValueFilter filter;
/**
* The bloom filter.
*/
@FieldConfig(codable = true)
protected SeenFilterBasic<Raw> bloom;
@Override
public ValueObject filterValue(ValueObject value) {
if (value == null) {
return value;
}
if (!initialize()) {
return value;
}
boolean match = bloom.getSeen(Raw.get(ValueUtil.asNativeString(value)));
if (seen) {
return match ? value : null;
}
{
return match ? null : value;
}
}
private boolean initialize() {
if (bloom == null && url != null) {
SeenFilterBasic<Raw> newbloom = new SeenFilterBasic<>();
String raw;
try {
byte[] bytes = ValueFilterHttpGet.httpGet(url, null, null, 30000, false);
if (bytes == null) {
System.err.println("url " + url + " empty. killing bloom filter");
url = null;
return false;
}
raw = LessBytes.toString(bytes);
if (filter != null) {
raw = ValueUtil.asNativeString(filter.filter(ValueFactory.create(raw)));
}
CodecJSON.decodeString(newbloom, raw);
bloom = newbloom;
return true;
} catch (Exception e) {
throw new RuntimeException(e);
}
} else {
return bloom != null;
}
}
}