package org.myrobotlab.document.transformer; import java.util.HashSet; import java.util.List; import org.myrobotlab.document.Document; /** * This stage will remove all duplicate values for a given field on a document. * * @author kwatters * */ public class UniqueFieldValues extends AbstractStage { private String fieldName; @Override public void startStage(StageConfiguration config) { // NoOp fieldName = config.getProperty("fieldName"); } @Override public List<Document> processDocument(Document doc) { HashSet<Object> unique = new HashSet<Object>(); for (Object o : doc.getField(fieldName)) { unique.add(o); } doc.removeField(fieldName); for (Object o : unique) { doc.addToField(fieldName, o); } return null; } @Override public void stopStage() { // TODO Auto-generated method stub } @Override public void flush() { // TODO Auto-generated method stub } public String getFieldName() { return fieldName; } public void setFieldName(String fieldName) { this.fieldName = fieldName; } }