//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.contentmappers; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.Arrays; import java.util.List; import java.util.Set; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.jsoup.nodes.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.gov.dstl.baleen.contentmanipulators.ParagraphMarkedClassification; import uk.gov.dstl.baleen.contentmanipulators.helpers.ContentManipulator; import uk.gov.dstl.baleen.contentmanipulators.helpers.MarkupUtils; import uk.gov.dstl.baleen.contentmappers.helpers.AnnotationCollector; import uk.gov.dstl.baleen.contentmappers.helpers.ContentMapper; /** * Create Baleen types based on the data attributes of tags. * * This will create both the annotations, and set any values on the annotation based on the tags * data-baleen- tags on the element. * * Use MarkupUtils.additionallyAnnotateAsType and MarkupUtils.add/setAttribute within * {@link ContentManipulator} in order to pass information to this mapper. * * Note the full Java type should be passed to additionallyAnnotateAsType. The attributes should * have the same key as the Java bean fields would be (eg if the UIMA value setter is * 'setSomething', the attribute key is 'something'). Only the first value is considered, and only * simple types (boolean, int, double, string) are used. * * See {@link ParagraphMarkedClassification} for an example. * * This is a useful helper tool for simple cases, but consider writing a full contentmapper to deal * with important tags, and depending on the complexity of the annotation introducing new * information carrying elements to the HTML DOM. */ public class DataAttributeMapper implements ContentMapper { private static final Logger LOGGER = LoggerFactory.getLogger(DataAttributeMapper.class); @Override public void map(JCas jCas, Element element, AnnotationCollector collector) { Set<String> types = MarkupUtils.getTypes(element); for (String type : types) { try { // Look for the annotation type by reflection Object annotation = getClass().getClassLoader().loadClass(type).getConstructor(JCas.class) .newInstance(jCas); if (!(annotation instanceof Annotation)) { LOGGER.error("Type is not an annotation {}, ignoring", type); continue; } // For each settings look if we have have an attribute defined on the element Arrays.stream(annotation.getClass().getMethods()) .filter(m -> m.getName().startsWith("set") && m.getName().length() > 3 && m.getParameterCount() == 1) .forEach(m -> setMethodValue(type, element, annotation, m)); // Add the annotation to the jCas collector.add((Annotation) annotation); } catch (Exception e) { LOGGER.error("Unable to create annotation of type {}", type, e); } } } /** * Sets the value of field by calling the method. * * @param type the type * @param element the element * @param annotation the annotation * @param m the method */ private void setMethodValue(String type, Element element, Object annotation, Method m) { String fieldName = m.getName().substring("set".length()); fieldName = Character.toLowerCase(fieldName.charAt(0)) + fieldName.substring(1); List<String> attributes = MarkupUtils.getAttributes(element, fieldName); if (attributes != null && !attributes.isEmpty()) { // We only try the first value... (even if it doesn't work) try { assignAttribute(annotation, m, attributes.get(0)); } catch (Exception e) { LOGGER.warn("Unable to set annotation values of type {} from attribute {}", type, fieldName, e); } } } /** * Convert an attribute value to something we can call on a method * * @param annotation * @param method to call * @param string * @throws IllegalAccessException * @throws InvocationTargetException */ private void assignAttribute(Object annotation, Method m, String string) throws IllegalAccessException, InvocationTargetException { Class<?> parameterType = m.getParameterTypes()[0]; // We only deal with the simple UIMA types // int, string, double, boolean if (parameterType.equals(int.class)) { int value = Integer.parseInt(string); m.invoke(annotation, value); } else if (parameterType.equals(double.class)) { double value = Double.parseDouble(string); m.invoke(annotation, value); } else if (parameterType.equals(boolean.class)) { boolean value = Boolean.parseBoolean(string); m.invoke(annotation, value); } else if (parameterType.equals(String.class)) { m.invoke(annotation, string); } } }