//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.contentmappers;
import org.apache.uima.jcas.JCas;
import org.jsoup.nodes.Element;
import uk.gov.dstl.baleen.contentmappers.helpers.AnnotationCollector;
import uk.gov.dstl.baleen.contentmappers.helpers.ContentMapper;
import uk.gov.dstl.baleen.types.common.Buzzword;
import uk.gov.dstl.baleen.types.common.DocumentReference;
import uk.gov.dstl.baleen.types.common.Quantity;
import uk.gov.dstl.baleen.types.semantic.Location;
import uk.gov.dstl.baleen.types.semantic.Temporal;
/**
* Use tags in HTML5 which have semantic meaning to create Baleen entity types.
*
* The tags are time (to Temporal), meter (to Quantity), dfn (to Buzzword), address (to Location),
* abbr(to Buzzword) and cite (to DocumentReference)
*/
public class SemanticHtml implements ContentMapper {
@Override
public void map(JCas jCas, Element element, AnnotationCollector collector) {
switch (element.tagName().toLowerCase()) {
case "time":
collector.add(new Temporal(jCas));
break;
case "meter":
collector.add(new Quantity(jCas));
break;
case "dfn":
collector.add(new Buzzword(jCas));
break;
case "address":
collector.add(new Location(jCas));
break;
case "abbr":
collector.add(new Buzzword(jCas));
break;
case "cite":
collector.add(new DocumentReference(jCas));
break;
default:
return;
}
}
}