//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners.helpers;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import com.google.common.base.Strings;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.uima.BaleenAnnotator;
/**
* A class for containing the generic functionality shared by all normalizing
* cleaners. Both methods are intended to be overridden with operations specific
* to the entities handled by a particular child cleaner.
*
* @baleen.javadoc
*/
public abstract class AbstractNormalizeEntities extends BaleenAnnotator {
@Override
public void doProcess(JCas jCas) throws AnalysisEngineProcessException {
FSIterator<Annotation> iter = jCas.getAnnotationIndex(Entity.type).iterator();
while (iter.hasNext()) {
Entity e = (Entity) iter.next();
if (Strings.isNullOrEmpty(e.getValue())) {
getMonitor().debug("No value set for entity '{}' - skipping", e.getCoveredText());
continue;
}
if (this.shouldNormalize(e)) {
String normalized = this.normalize(e);
if (!normalized.equals(e.getValue())) {
e.setValue(normalized);
e.setIsNormalised(true);
}
}
}
}
/**
* The shouldNormalize method is used first to identify entities of the type
* the cleaner is supposed to operate on.
*/
protected abstract boolean shouldNormalize(Entity e);
/**
* Overridden with the specific operations required to calculate the normalized
* value of the entity. If it is not possible to normalize this method should return
* the original value of the entity.
*/
protected abstract String normalize(Entity e);
}