package org.gbif.dwca.record; import java.util.regex.Pattern; import org.apache.commons.lang3.StringEscapeUtils; public class CleanUtils { private static final Pattern NULL_REPL = Pattern.compile("^\\s*(null|\\\\N)?\\s*$", Pattern.CASE_INSENSITIVE); private CleanUtils() { } /** * Does basic entity replacments if requested to string values. * @param value the original string * @param nulls if true replaces common, literal NULL values with real nulls, e.g. "\N" or "NULL" * @param entities if true replaces html4, xml and numerical entities with their unicode character */ public static String clean(String value, boolean nulls, boolean entities) { if (value == null || (nulls && NULL_REPL.matcher(value).find()) ) { return null; } return entities ? StringEscapeUtils.unescapeHtml4(value) : value; } }