//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import java.util.Collections;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.cleaners.helpers.AbstractNormalizeEntities;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.semantic.Entity;
/**
* Replace blocks of whitespace with a single space (this includes new lines) in the value
*
*/
public class NormalizeWhitespace extends AbstractNormalizeEntities {
@Override
protected boolean shouldNormalize(Entity e) {
return true;
}
@Override
protected String normalize(Entity e) {
return e.getValue().replaceAll("[\n\\h]+", " ");
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Entity.class), Collections.emptySet());
}
}