package eu.dnetlib.iis.common.java.jsonworkflownodes;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.specific.SpecificRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.collect.Maps;
import eu.dnetlib.iis.common.java.PortBindings;
import eu.dnetlib.iis.common.java.Process;
import eu.dnetlib.iis.common.java.io.DataStore;
import eu.dnetlib.iis.common.java.io.FileSystemPath;
import eu.dnetlib.iis.common.java.jsonworkflownodes.StringPortSpecificationExtractor.PortSpecification;
import eu.dnetlib.iis.common.java.porttype.AvroPortType;
import eu.dnetlib.iis.common.java.porttype.PortType;
import eu.dnetlib.iis.common.utils.AvroUtils;
/**
* Consumer of avro data stores.
* It checks if size of provided data store fits in a provided range.
*
* @author madryk
*/
public class RecordCountTestConsumer implements Process {
private final Map<String, RecordCountPortSpecification> inputPortsSpecification = Maps.newHashMap();
//------------------------ CONSTRUCTORS --------------------------
/**
* @param inputSpecifications specifications of input. Each element of
* the array corresponds to a single specification. Single specification
* conforms to the following template:
* "{input port name, schema reference, minimum records count in input avro data store,
* maximum records count in input avro data store}",
* e.g. "{person, eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person, 10, 100}"
*/
public RecordCountTestConsumer(String[] inputSpecifications){
StringPortSpecificationExtractor specificationExtractor = new StringPortSpecificationExtractor(new String[]{"[\\w\\.]+", "[\\d]+", "[\\d]+"});
for (String inputSpecification : inputSpecifications) {
PortSpecification portSpec = specificationExtractor.getSpecification(inputSpecification);
Schema schema = AvroUtils.toSchema(portSpec.getProperties()[0]);
int minRecords = Integer.valueOf(portSpec.getProperties()[1]);
int maxRecords = Integer.valueOf(portSpec.getProperties()[2]);
inputPortsSpecification.put(portSpec.getName(), new RecordCountPortSpecification(portSpec.getName(), schema, minRecords, maxRecords));
}
}
//------------------------ GETTERS --------------------------
@Override
public Map<String, PortType> getInputPorts() {
Map<String, PortType> inputPorts = Maps.newHashMap();
for (Map.Entry<String, RecordCountPortSpecification> inputPortSpecification : inputPortsSpecification.entrySet()) {
inputPorts.put(inputPortSpecification.getKey(), new AvroPortType(inputPortSpecification.getValue().getSchema()));
}
return inputPorts;
}
@Override
public Map<String, PortType> getOutputPorts() {
return new HashMap<String, PortType>();
}
//------------------------ LOGIC --------------------------
@Override
public void run(PortBindings portBindings, Configuration configuration,
Map<String, String> parameters) throws Exception {
Map<String, Path> input = portBindings.getInput();
FileSystem fs = FileSystem.get(configuration);
for(Map.Entry<String, Path> e: input.entrySet()){
RecordCountPortSpecification specs = inputPortsSpecification.get(e.getKey());
check(new FileSystemPath(fs, e.getValue()), specs);
}
}
private static void check(FileSystemPath actualPath, RecordCountPortSpecification specs) throws IOException{
List<SpecificRecord> actual = DataStore.read(actualPath, specs.getSchema());
assertTrue("Expected at least " + specs.getMinimumRecordCount() + " records in " + specs.getName() + " avro data store, but was " + actual.size(),
actual.size() >= specs.getMinimumRecordCount());
assertTrue("Expected at most " + specs.getMaximumRecordCount() + " records in " + specs.getName() + " avro data store, but was " + actual.size(),
actual.size() <= specs.getMaximumRecordCount());
}
}