/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.remote;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.metamodel.schema.ColumnTypeImpl;
import org.apache.metamodel.util.EqualsBuilder;
import org.datacleaner.Version;
import org.datacleaner.api.Close;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Validate;
import org.datacleaner.configuration.RemoteServerData;
import org.datacleaner.job.concurrent.PreviousErrorsExistException;
import org.datacleaner.restclient.ComponentConfiguration;
import org.datacleaner.restclient.ComponentRESTClient;
import org.datacleaner.restclient.ComponentsRestClientUtils;
import org.datacleaner.restclient.CreateInput;
import org.datacleaner.restclient.ProcessStatelessInput;
import org.datacleaner.restclient.ProcessStatelessOutput;
import org.datacleaner.restclient.RESTClientException;
import org.datacleaner.restclient.Serializator;
import org.datacleaner.util.batch.BatchRowCollectingTransformer;
import org.datacleaner.util.batch.BatchSink;
import org.datacleaner.util.batch.BatchSource;
import org.datacleaner.util.convert.StringConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.module.jsonSchema.JsonSchema;
import com.fasterxml.jackson.module.jsonSchema.types.ArraySchema;
import com.fasterxml.jackson.module.jsonSchema.types.ValueTypeSchema;
/**
* Transformer that is actually a proxy to a remote transformer sitting at DataCleaner Monitor server.
* Instances of this transformer can be created only by
* {@link org.datacleaner.descriptors.RemoteTransformerDescriptorImpl} component descriptors.
*
* @Since 9/1/15
*/
public class RemoteTransformer extends BatchRowCollectingTransformer {
private static final Logger logger = LoggerFactory.getLogger(RemoteTransformer.class);
private static final ObjectMapper mapper = Serializator.getJacksonObjectMapper();
private final RemoteServerData serverData;
private final AtomicBoolean failed = new AtomicBoolean(false);
private String componentDisplayName;
private ComponentRESTClient client;
private final SingleValueErrorAwareCache<CreateInput, OutputColumns> cachedOutputColumns =
new SingleValueErrorAwareCache<CreateInput, OutputColumns>() {
@Override
protected OutputColumns fetch(final CreateInput input) throws Exception {
return getOutputColumnsInternal(input);
}
};
private Map<String, Object> configuredProperties = new TreeMap<>();
public RemoteTransformer(final RemoteServerData serverData, final String componentDisplayName) {
this.serverData = serverData;
this.componentDisplayName = componentDisplayName;
}
@Initialize
public void initClient() throws RemoteComponentException {
try {
logger.debug("Initializing '{}' @{}", componentDisplayName, this.hashCode());
client = new ComponentRESTClient(serverData.getUrl(), serverData.getUsername(), serverData.getPassword(),
Version.getVersion());
} catch (final Exception e) {
throw new RemoteComponentException(
"Remote component '" + componentDisplayName + "' is temporarily unavailable. \n" + e.getMessage());
}
}
@Close
public void closeClient() {
logger.debug("closing '{}' @{}", componentDisplayName, this.hashCode());
client = null;
}
@Validate
public void validate() throws Exception {
final CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
cachedOutputColumns.getCachedValue(createInput);
} catch (final RESTClientException e) {
if (e.getCode() == 422) {
// Validation failed - simplify the error message
throw new RuntimeException(e.getReason());
}
}
}
@Override
public OutputColumns getOutputColumns() {
final CreateInput createInput = new CreateInput();
createInput.configuration = getConfiguration(getUsedInputColumns());
try {
return cachedOutputColumns.getCachedValue(createInput);
} catch (final Exception e) {
logger.debug("Error retrieving columns of transformer '" + componentDisplayName + "': " + e.toString());
return OutputColumns.NO_OUTPUT_COLUMNS;
}
}
private boolean isOutputColumnEnumeration(final JsonSchema schema) {
if (schema == null) {
return false;
}
final boolean isArray = schema.isArraySchema();
final JsonSchema baseSchema;
if (isArray) {
baseSchema = ((ArraySchema) schema).getItems().asSingleItems().getSchema();
} else {
baseSchema = schema;
}
if (baseSchema instanceof ValueTypeSchema) {
final Set<String> enums = ((ValueTypeSchema) baseSchema).getEnums();
if (enums != null && !enums.isEmpty()) {
return true;
}
}
return false;
}
private ComponentConfiguration getConfiguration(final List<InputColumn<?>> inputColumns) {
final ComponentConfiguration configuration = new ComponentConfiguration();
for (final Map.Entry<String, Object> propertyE : configuredProperties.entrySet()) {
configuration.getProperties().put(propertyE.getKey(), mapper.valueToTree(propertyE.getValue()));
}
for (final InputColumn<?> col : inputColumns) {
configuration.getColumns().add(ComponentsRestClientUtils
.createInputColumnSpecification(col.getName(), col.getDataType(),
ColumnTypeImpl.convertColumnType(col.getDataType()).getName(), mapper.getNodeFactory()));
}
return configuration;
}
private List<InputColumn<?>> getUsedInputColumns() {
final ArrayList<InputColumn<?>> columns = new ArrayList<>();
for (final Object propValue : configuredProperties.values()) {
if (propValue instanceof InputColumn) {
columns.add((InputColumn<?>) propValue);
} else if (propValue instanceof InputColumn[]) {
for (final InputColumn<?> col : ((InputColumn[]) propValue)) {
columns.add(col);
}
} else if (propValue instanceof Collection) {
for (final Object value : ((Collection<?>) propValue)) {
if (value instanceof InputColumn) {
columns.add((InputColumn<?>) value);
} else {
// don't iterate the rest if the first item is not an input column.
break;
}
}
}
// TODO: are maps possible?
}
return columns;
}
private void convertOutputRows(final JsonNode rowSets, final BatchSink<Collection<Object[]>> sink,
final int sinkSize) {
final OutputColumns outCols = getOutputColumns();
if (rowSets == null || rowSets.size() < 1) {
throw new RuntimeException("Expected exactly 1 row in response");
}
int rowI = 0;
for (final JsonNode rowSet : rowSets) {
if (rowI >= sinkSize) {
throw new RuntimeException("Expected " + sinkSize + " rows, but got more");
}
final List<Object[]> outRowSet = new ArrayList<>();
for (final JsonNode row : rowSet) {
final List<Object> values = new ArrayList<>();
int i = 0;
for (final JsonNode value : row) {
// TODO: should JsonNode be the default?
Class<?> cl = String.class;
if (i < outCols.getColumnCount()) {
cl = outCols.getColumnType(i);
}
values.add(convertOutputValue(value, cl));
i++;
}
outRowSet.add(values.toArray(new Object[values.size()]));
}
sink.setOutput(rowI, outRowSet);
rowI++;
}
if (rowI < sinkSize) {
throw new RuntimeException("Expected " + sinkSize + " rows, but got only " + rowI);
}
}
private Object convertOutputValue(final JsonNode value, final Class<?> cl) {
try {
if (cl == JsonNode.class) {
return value;
}
if (cl == File.class) {
return StringConverter.simpleInstance().deserialize(value.asText(), cl);
}
return mapper.readValue(value.traverse(), cl);
} catch (final Exception e) {
throw new RuntimeException("Cannot convert table value of type '" + cl + "': " + value.toString(), e);
}
}
public void setPropertyValue(final String propertyName, final Object value) {
if (EqualsBuilder.equals(value, configuredProperties.get(propertyName))) {
return;
}
logger.debug("Setting '{}'.'{}' = {}", componentDisplayName, propertyName, value);
if (value == null) {
configuredProperties.remove(propertyName);
} else {
configuredProperties.put(propertyName, value);
}
}
public Object getPropertyValue(final String propertyName) {
return configuredProperties.get(propertyName);
}
@Override
public void map(final BatchSource<InputRow> source, final BatchSink<Collection<Object[]>> sink) {
final List<InputColumn<?>> cols = getUsedInputColumns();
final int size = source.size();
final Object[] rows = new Object[size];
for (int i = 0; i < size; i++) {
final InputRow inputRow = source.getInput(i);
final Object[] values = new Object[cols.size()];
for (int j = 0; j < cols.size(); j++) {
values[j] = inputRow.getValue(cols.get(j));
}
rows[i] = values;
}
final ProcessStatelessInput input = new ProcessStatelessInput();
input.configuration = getConfiguration(cols);
input.data = mapper.valueToTree(rows);
logger.debug("Processing remotely {} rows", size);
if (client == null) {
if (failed.get()) {
throw new PreviousErrorsExistException();
}
throw new RuntimeException("Remote transformer's connection has already been closed. ");
}
final ProcessStatelessOutput out;
try {
out = client.processStateless(componentDisplayName, input);
} catch (final RuntimeException e) {
final boolean alreadyFailed = failed.getAndSet(true);
if (!alreadyFailed) {
throw new RuntimeException("Remote transformer failed: " + e.getMessage(), e);
} else {
throw new PreviousErrorsExistException();
}
}
convertOutputRows(out.rows, sink, size);
}
private OutputColumns getOutputColumnsInternal(final CreateInput createInput) throws Exception {
logger.debug("Getting output columns from server");
boolean wasInit = false;
if (client == null) {
wasInit = true;
initClient();
}
try {
final org.datacleaner.restclient.OutputColumns columnsSpec =
client.getOutputColumns(componentDisplayName, createInput);
final OutputColumns outCols = new OutputColumns(columnsSpec.getColumns().size(), Object.class);
int i = 0;
for (final org.datacleaner.restclient.OutputColumns.OutputColumn colSpec : columnsSpec.getColumns()) {
outCols.setColumnName(i, colSpec.name);
try {
outCols.setColumnType(i, Class.forName(colSpec.type));
} catch (final ClassNotFoundException e) {
final Class<?> type;
if (isOutputColumnEnumeration(colSpec.schema)) {
type = String.class;
} else {
// For unknown types we specify "Object" as a class
// This causes that Jackson will deserialize it not as a JsonNode,
// but simple Java Maps, Lists, Strings etc.
// We NEED it, because we need the values to be Serializable.
// This is because some analyzer results contain the values
// and analyzer results must be serializable (e.g. to save it in DC monitor, or
// to send it over wire when doing ditributed computing etc.)
type = Object.class;
}
outCols.setColumnType(i, type);
}
i++;
}
return outCols;
} finally {
if (wasInit) {
closeClient();
}
}
}
}