package org.archive.cdxserver.processor;
import org.archive.cdxserver.writer.CDXWriter;
import org.archive.format.cdx.CDXLine;
import org.archive.format.cdx.FieldSplitFormat;
/**
* {@code BaseProcessor} is an interface for a receiver
* of {@link CDXLine}s.
* <p>Implementation may be final output formatter ({@link CDXWriter}
* subclasses for example), or an intermediary processor that performs
* transformation and/or filtering on the sequence of CDXLines.</p>
* <p>{@code CDXServer} starts from {@code CDXWriter} and builds up
* nested pipeline of {@code BaseProcessor}s, and then calls following
* methods in sequence on the {@code BaseProcessor} at the top:
* <ol>
* <li>modifyOutputFormat(FieldSplitFormat)</li>
* <li>begin()</li>
* <li>for each CDXLine:</li>
* <ol>
* <li>trackLine(CDXLine)</li>
* <li>writeLine(CDXLine)</li>
* </ol>
* <li>writeResumeKey(String) (if {@code showResumeKey})</li>
* <li>end()</li>
* </ol>
*/
public interface BaseProcessor {
/**
* This method will be called just before looping over
* the sequence of CDX lines.
* Intermediary processor must call {@code begin()}
* on nested processor.
*/
public void begin();
/**
* Called on each CDX line, just before timestamp range filtering
* (to and from parameters), regexp filtering, and {@code collapser}
* processing. Typically used for counting the number of CDX lines
* collapsed / grouped.
* @param line CDX line
*/
public void trackLine(CDXLine line);
/**
* Process {@code line}.
* @param line {@code CDXLine}
* @return 1 if {@code line} is sent to output, 0 otherwise.
*/
public int writeLine(CDXLine line);
/**
* Write resumption key.
* Only the final {@code CDXWriter} should do
* actual work. All intermediaries shall simply call
* {@code writeResumeKey(resumeKey)} on nested processor.
* @param resumeKey
*/
public void writeResumeKey(String resumeKey);
/**
* Called at the end.
* Perform any clean ups / finalizations here.
* Intermediaries processor should call {@code end()}
* on nested processor.
*/
public void end();
/**
* Return output format (list of fields), given input format {@code format}.
* Intermediaries should call {@code modifyOutputFormat(format)} on nested
* processor first, then make appropriate changes to it if they add/remove
* fields.
* @param format input format
* @return output format
* @see CDXFieldConstants
*/
public FieldSplitFormat modifyOutputFormat(FieldSplitFormat format);
}