/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.yandex;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.Validator;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.yandex.model.Translation;
import org.apache.nifi.processors.yandex.util.Languages;
import org.apache.nifi.processors.yandex.util.ObjectMapperResolver;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.StopWatch;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.ClientResponse.Status;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.api.client.config.ClientConfig;
import com.sun.jersey.api.client.config.DefaultClientConfig;
import com.sun.jersey.api.json.JSONConfiguration;
import com.sun.jersey.core.util.MultivaluedMapImpl;
@SupportsBatching
@InputRequirement(Requirement.INPUT_REQUIRED)
@Tags({"yandex", "translate", "translation", "language"})
@CapabilityDescription("Translates content and attributes from one language to another")
@WritesAttributes({
@WritesAttribute(attribute = "yandex.translate.failure.reason", description = "If the text cannot be translated, this attribute will be set indicating the reason for the failure"),
@WritesAttribute(attribute = "language", description = "When the translation succeeds, if the content was translated, this attribute will be set indicating the new language of the content")
})
@DynamicProperty(name = "The name of an attribute to set that will contain the translated text of the value",
value = "The value to translate",
supportsExpressionLanguage = true,
description = "User-defined properties are used to translate arbitrary text based on attributes.")
public class YandexTranslate extends AbstractProcessor {
public static final PropertyDescriptor KEY = new PropertyDescriptor.Builder()
.name("Yandex API Key")
.description("The API Key that is registered with Yandex")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.required(true)
.build();
public static final PropertyDescriptor SOURCE_LANGUAGE = new PropertyDescriptor.Builder()
.name("Input Language")
.description("The language of incoming data")
.required(true)
.defaultValue("es")
.expressionLanguageSupported(true)
.addValidator(new LanguageNameValidator())
.build();
public static final PropertyDescriptor TARGET_LANGUAGE = new PropertyDescriptor.Builder()
.name("Target Language")
.description("The language to translate the text into")
.required(true)
.defaultValue("en")
.expressionLanguageSupported(true)
.addValidator(new LanguageNameValidator())
.build();
public static final PropertyDescriptor TRANSLATE_CONTENT = new PropertyDescriptor.Builder()
.name("Translate Content")
.description("Specifies whether or not the content should be translated. If false, only the text specified by user-defined properties will be translated.")
.required(true)
.allowableValues("true", "false")
.defaultValue("false")
.build();
public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder()
.name("Character Set")
.description("Specifies the character set of the data to be translated")
.required(true)
.defaultValue("UTF-8")
.expressionLanguageSupported(true)
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.build();
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("This relationship is used when the translation is successful")
.build();
public static final Relationship REL_COMMS_FAILURE = new Relationship.Builder()
.name("comms.failure")
.description("This relationship is used when the translation fails due to a problem such as a network failure, and for which the translation should be attempted again")
.build();
public static final Relationship REL_TRANSLATION_FAILED = new Relationship.Builder()
.name("translation.failure")
.description("This relationship is used if the translation cannot be performed for some reason other than communications failure")
.build();
private List<PropertyDescriptor> descriptors;
private Set<Relationship> relationships;
private volatile Client client;
private static final String URL = "https://translate.yandex.net/api/v1.5/tr.json/translate";
@Override
protected void init(final ProcessorInitializationContext context) {
final List<PropertyDescriptor> descriptors = new ArrayList<>();
descriptors.add(KEY);
descriptors.add(SOURCE_LANGUAGE);
descriptors.add(TARGET_LANGUAGE);
descriptors.add(TRANSLATE_CONTENT);
descriptors.add(CHARACTER_SET);
this.descriptors = Collections.unmodifiableList(descriptors);
final Set<Relationship> relationships = new HashSet<>();
relationships.add(REL_SUCCESS);
relationships.add(REL_COMMS_FAILURE);
relationships.add(REL_TRANSLATION_FAILED);
this.relationships = Collections.unmodifiableSet(relationships);
}
@Override
public Set<Relationship> getRelationships() {
return this.relationships;
}
@Override
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return descriptors;
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
return new PropertyDescriptor.Builder()
.name(propertyDescriptorName)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.dynamic(true)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
final List<ValidationResult> results = new ArrayList<>();
if (validationContext.getProperty(TRANSLATE_CONTENT).asBoolean().equals(Boolean.FALSE)) {
boolean foundDynamic = false;
for (final PropertyDescriptor descriptor : validationContext.getProperties().keySet()) {
if (descriptor.isDynamic()) {
foundDynamic = true;
break;
}
}
if (!foundDynamic) {
results.add(new ValidationResult.Builder().subject("Text to translate").input("<none>").valid(false)
.explanation("Must either set 'Translate Content' to true or add at least one user-defined property").build());
}
}
return results;
}
@OnScheduled
public void onScheduled(final ProcessContext context) {
final ClientConfig config = new DefaultClientConfig();
config.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING, Boolean.TRUE);
config.getClasses().add(ObjectMapperResolver.class);
client = Client.create(config);
}
@OnStopped
public void destroyClient() {
if (client != null) {
client.destroy();
}
}
protected WebResource.Builder prepareResource(final String key, final List<String> text, final String sourceLanguage, final String destLanguage) {
WebResource webResource = client.resource(URL);
final MultivaluedMap<String, String> paramMap = new MultivaluedMapImpl();
paramMap.put("text", text);
paramMap.add("key", key);
paramMap.add("lang", sourceLanguage + "-" + destLanguage);
WebResource.Builder builder = webResource
.accept(MediaType.APPLICATION_JSON)
.type(MediaType.APPLICATION_FORM_URLENCODED);
builder = builder.entity(paramMap);
return builder;
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final String key = context.getProperty(KEY).getValue();
final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();
final List<String> attributeNames = new ArrayList<>();
final List<String> textValues = new ArrayList<>();
for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
if (descriptor.isDynamic()) {
attributeNames.add(descriptor.getName()); // add to list so that we know the order when the translations come back.
textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
}
}
if (context.getProperty(TRANSLATE_CONTENT).asBoolean()) {
final byte[] buff = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buff);
}
});
final String content = new String(buff, Charset.forName(encoding));
textValues.add(content);
}
final WebResource.Builder builder = prepareResource(key, textValues, sourceLanguage, targetLanguage);
final ClientResponse response;
try {
response = builder.post(ClientResponse.class);
} catch (final Exception e) {
getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[]{flowFile, e});
session.transfer(flowFile, REL_COMMS_FAILURE);
return;
}
if (response.getStatus() != Status.OK.getStatusCode()) {
getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[]{
flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName()});
flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
session.transfer(flowFile, REL_TRANSLATION_FAILED);
return;
}
final Map<String, String> newAttributes = new HashMap<>();
final Translation translation = response.getEntity(Translation.class);
final List<String> texts = translation.getText();
for (int i = 0; i < texts.size(); i++) {
final String text = texts.get(i);
if (i < attributeNames.size()) {
final String attributeName = attributeNames.get(i);
newAttributes.put(attributeName, text);
} else {
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(text.getBytes(encoding));
}
});
newAttributes.put("language", targetLanguage);
}
}
if (!newAttributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, newAttributes);
}
stopWatch.stop();
session.transfer(flowFile, REL_SUCCESS);
getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success",
new Object[]{texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration()});
}
private static class LanguageNameValidator implements Validator {
@Override
public ValidationResult validate(final String subject, final String input, final ValidationContext context) {
if (context.isExpressionLanguagePresent(input)) {
return new ValidationResult.Builder().subject(subject).input(input).valid(true).explanation("Expression Language Present").build();
}
if (Languages.getLanguageMap().keySet().contains(input.toLowerCase())) {
return new ValidationResult.Builder().subject(subject).input(input).valid(true).build();
}
return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(input + " is not a language that is supported by Yandex").build();
}
}
}