/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.task.map;
import java.util.HashMap;
import java.util.Map;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.core.BundleField;
import com.addthis.bundle.core.list.ListBundle;
import com.addthis.bundle.core.list.ListBundleFormat;
import com.addthis.bundle.value.ValueObject;
import com.addthis.codec.annotations.FieldConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is a util to join 2 datasets with similar keys
* This is intended to run on a single thread and hence is not thread safe
*/
public class StreamJoin extends StreamBuilder {
@FieldConfig(codable = true, required = true)
private String key;
@FieldConfig(codable = true, required = true)
private String[] fields;
@FieldConfig(codable = true, required = true)
private String hash;
private Map<String, ListBundle> keyMap;
private String currentHash;
private int numExpectedFields;
private static final Logger logger = LoggerFactory.getLogger(StreamJoin.class);
@Override
public void init() {
numExpectedFields = fields.length;
keyMap = new HashMap<>(10000, 0.75f);
}
@Override
public void process(Bundle row, StreamEmitter emitter) {
String keyValue = row.getValue(row.getFormat().getField(key)).asString().asNative();
String hashValue = row.getValue(row.getFormat().getField(hash)).asString().asNative();
if (keyValue != null) {
if (currentHash != null && hashValue != null && !currentHash.equals(hashValue)) {
releaseMap(emitter);
}
currentHash = hashValue;
joinAndEmit(keyValue, row, emitter);
}
}
private synchronized void releaseMap(StreamEmitter emitter) {
for (ListBundle bundle : keyMap.values()) {
if (bundle.getFormat().getFieldCount() == numExpectedFields) {
emitter.emit(bundle);
}
}
keyMap = new HashMap<>(10000, 0.75f);
}
private void joinAndEmit(String keyValue, Bundle row, StreamEmitter emitter) {
ListBundle bundle = null;
ValueObject fieldValue = null;
BundleField bundleField = null;
ListBundleFormat form = new ListBundleFormat();
boolean newBundle = false;
if ((bundle = keyMap.get(keyValue)) == null) {
bundle = new ListBundle();
keyMapPut(keyValue, bundle);
newBundle = true;
}
for (String field : fields) {
bundleField = row.getFormat().getField(field);
if (bundleField != null) {
fieldValue = row.getValue(bundleField);
}
if (fieldValue != null) {
bundleField = form.getField(field);
bundle.setValue(bundleField, fieldValue);
}
}
if (!newBundle && bundle.getFormat().getFieldCount() == numExpectedFields) {
emitBundle(keyValue, bundle, emitter);
}
}
private synchronized void keyMapPut(String keyValue, ListBundle bundle) {
keyMap.put(keyValue, bundle);
}
private void emitBundle(String keyValue, ListBundle bundle, StreamEmitter emitter) {
if (bundle != null) {
emitter.emit(bundle);
// remove from map
keyMapRemove(keyValue);
}
}
private synchronized void keyMapRemove(String keyValue) {
keyMap.remove(keyValue);
}
@Override
public void streamComplete(StreamEmitter streamEmitter) {
if (keyMap.size() > 0) {
for (String key : keyMap.keySet()) {
ListBundle bundle = keyMap.get(key);
if (bundle.getFormat().getFieldCount() == numExpectedFields) {
emitBundle(key, bundle, streamEmitter);
}
}
}
}
}