//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.contentmanipulators.helpers;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.jsoup.nodes.Element;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
/**
* Helper functions for dealing with HTML markup, in content manipulators.
*
*/
public class MarkupUtils {
private static final String BALEEN_ATTRIBUTE_PREFIX = "data-baleen-";
private static String ANNOTATION_TYPE_ATTRIBUTE = "types";
public static final String ATTRIBUTE_VALUE_SEPARATOR = ";";
private static final Joiner ATTRIBUTE_VALUE_JOINER =
Joiner.on(ATTRIBUTE_VALUE_SEPARATOR).skipNulls();
private static final Splitter ATTRIBUTE_VALUE_SPLITTER =
Splitter.on(ATTRIBUTE_VALUE_SEPARATOR).trimResults().omitEmptyStrings();
private MarkupUtils() {
// Utility class
}
/**
* Inform the DataAttributeMapper to create a Baleen type for this element.
*
* @param e the element
* @param type the baleen type.
*/
public static void additionallyAnnotateAsType(Element e, String type) {
addAttribute(e, ANNOTATION_TYPE_ATTRIBUTE, type);
}
/**
* Set an attribute on an element (in the data-baleen namespace)
*
* @param e
* @param key
* @param value
*/
public static void setAttribute(Element e, String key, String value) {
e.attr(attributeKey(key), value);
}
/**
* Add an attribute value to an existing attribute key (or set if that key does not exist).
*
* @param e
* @param key
* @param value
*/
public static void addAttribute(Element e, String key, String value) {
String fullKey = attributeKey(key);
String current = e.attr(fullKey);
if (Strings.isNullOrEmpty(current)) {
current = value;
} else {
current = concatenateAttribute(current, value);
}
e.attr(fullKey, current);
}
private static String attributeKey(String key) {
return BALEEN_ATTRIBUTE_PREFIX + key;
}
private static String concatenateAttribute(String... values) {
return ATTRIBUTE_VALUE_JOINER.join(values);
}
/**
* Get an attribute value
*
* @param e
* @param key
* @return attribute values (will be multiple as a single string)
*/
public static String getAttribute(Element e, String key) {
String fullKey = attributeKey(key);
return e.attr(fullKey);
}
/**
* Get attribute values as a list.
*
* @param e
* @param key
* @return
*/
public static List<String> getAttributes(Element e, String key) {
return ATTRIBUTE_VALUE_SPLITTER.splitToList(getAttribute(e, key));
}
/**
* Get type values of an element as a list
*
* @param element
* @return
*/
public static Set<String> getTypes(Element element) {
List<String> list = getAttributes(element, ANNOTATION_TYPE_ATTRIBUTE);
if (list.isEmpty()) {
return Collections.emptySet();
} else {
return new HashSet<>(list);
}
}
}