package org.archive.cdxserver.processor; import org.archive.format.cdx.CDXLine; import org.archive.format.cdx.FieldSplitFormat; public abstract class RevisitResolver extends DupeCountProcessor { public final static String origfilename = "orig.filename"; public final static String origoffset = "orig.offset"; public final static String origlength = "orig.length"; public RevisitResolver(BaseProcessor output, boolean showDupeCount) { super(output, showDupeCount); } protected void fillBlankOrig(CDXLine line) { line.setField(origlength, CDXLine.EMPTY_VALUE); line.setField(origoffset, CDXLine.EMPTY_VALUE); line.setField(origfilename, CDXLine.EMPTY_VALUE); } protected void fillRevisit(CDXLine line, CDXLine origLine) { line.setMimeType(origLine.getMimeType()); line.setStatusCode(origLine.getStatusCode()); line.setField(origlength, origLine.getLength()); line.setField(origoffset, origLine.getOffset()); line.setField(origfilename, origLine.getFilename()); } protected abstract void handleLine(DupeTrack counter, CDXLine line, boolean isDupe); protected boolean isRevisit(CDXLine line) { return (line.getMimeType().equals("warc/revisit") || line.getFilename().equals(CDXLine.EMPTY_VALUE)); } @Override public FieldSplitFormat modifyOutputFormat(FieldSplitFormat format) { format = super.modifyOutputFormat(format).addFieldNames(origlength, origoffset, origfilename); return format; } }