package org.archive.cdxserver.processor;
import java.util.HashMap;
import org.archive.format.cdx.CDXLine;
import org.archive.format.cdx.FieldSplitFormat;
public class DupeCountProcessor extends WrappedProcessor {
protected HashMap<String, DupeTrack> dupeHashmap = null;
protected boolean showDupeCount = false;
public final static String dupecount = "dupecount";
class DupeTrack {
int count = 0;
}
protected DupeTrack createDupeTrack()
{
return new DupeTrack();
}
protected void handleLine(DupeTrack counter, CDXLine line, boolean isDupe) {
}
public DupeCountProcessor(BaseProcessor output, boolean showDupeCount) {
super(output);
this.dupeHashmap = new HashMap<String, DupeTrack>();
this.showDupeCount = showDupeCount;
}
@Override
public int writeLine(CDXLine line) {
String digest = line.getDigest();
DupeTrack counter = dupeHashmap.get(digest);
if (counter == null) {
counter = createDupeTrack();
dupeHashmap.put(digest, counter);
if (showDupeCount) {
line.setField(dupecount, "0");
}
handleLine(counter, line, false);
} else {
counter.count++;
if (showDupeCount) {
line.setField(dupecount, "" + counter.count);
}
handleLine(counter, line, true);
}
return inner.writeLine(line);
}
@Override
public FieldSplitFormat modifyOutputFormat(FieldSplitFormat format) {
if (showDupeCount) {
format = super.modifyOutputFormat(format).addFieldNames(dupecount);
}
return format;
}
}