package qa.qcri.aidr.predict.classification;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
import qa.qcri.aidr.predict.DataStore;
import qa.qcri.aidr.predict.common.PipelineProcess;
import qa.qcri.aidr.predict.common.RateLimiter;
import qa.qcri.aidr.predict.common.TaggerConfigurationProperty;
import qa.qcri.aidr.predict.common.TaggerConfigurator;
import qa.qcri.aidr.predict.data.Document;
/**
* LabelingTaskWriter consumes fully classified items and writes them to the
* task buffer in the database for human annotation. If more documents are
* available than what can reasonably be processed by humans, the documents are
* discarded.
*
* @author jrogstadius
*/
public class LabelingTaskWriter extends PipelineProcess {
private static Logger logger = Logger.getLogger(LabelingTaskWriter.class);
private long lastDBWrite = 0;
private long lastTruncateTime = 0;
public long writeCount = 0;
ArrayList<Document> writeBuffer = new ArrayList<Document>();
// Maintain a hash table of currently seen <key> = crisisIDs that are
// being saved to the DB, with <value> being the number of items per crisisID
// being saved to the DB.
static Map<Integer, Long> activeCrisisIDList = new HashMap<Integer, Long>();
static Map<Integer, Double> activeCrisisMaxConf = new HashMap<Integer, Double>();
static Map<Integer, Long> crisisLastTruncateTime = new HashMap<Integer, Long>();
RateLimiter taskRateLimiter = new RateLimiter(
Integer.parseInt(TaggerConfigurator.getInstance().getProperty(
TaggerConfigurationProperty.MAX_NEW_TASKS_PER_MINUTE)));
DocumentHistory history = new DocumentHistory(
Integer.parseInt(TaggerConfigurator.getInstance().getProperty(TaggerConfigurationProperty.TAGGER_TASK_BUFFER_SIMILARITY_BUFFER)),
Double.parseDouble(TaggerConfigurator.getInstance().getProperty(TaggerConfigurationProperty.TAGGER_TASK_BUFFER_MAX_SIMILARITY)));
protected void processItem(Document item) {
// Write novel DocumentSets to the database at a maximum rate of up to N
// items per minute
if (item.hasHumanLabels()
|| (!taskRateLimiter.isLimited() && item.isNovel() && history
.addItemIfNovel(item))) {
// log(LogLevel.INFO, "LabelingTaskWriter recieved an item");
//if (item.getValueAsTrainingSample() < activeCrisisMaxConf.get(item.getCrisisID()))
{
//activeCrisisMaxConf.put(item.getCrisisID(), item.getValueAsTrainingSample());
save(item);
taskRateLimiter.logEvent();
}
}
}
void save(Document item) {
writeBuffer.add(item);
Long currentCrisisIDItemCount = activeCrisisIDList.containsKey(item.getCrisisID().intValue()) ?
activeCrisisIDList.get(item.getCrisisID().intValue()) : 0L;
activeCrisisIDList.put(item.getCrisisID().intValue(), ++currentCrisisIDItemCount);
if (!isWriteRateLimited()) {
writeToDB();
}
}
@Override
protected void idle() {
if (writeBuffer.size() > 0)
writeToDB();
}
void writeToDB() {
DataStore.saveDocumentsToDatabase(writeBuffer);
writeCount += writeBuffer.size();
writeBuffer.clear();
if (!isTruncateRunLimited() || 0 == lastDBWrite) {
for (Integer crisisID : activeCrisisIDList.keySet()) {
//Meghna: truncate code moved to MySQL event scheduler
/*logger.info("Looking at possible truncation for crisisID = "
+ crisisID
+ "last save count = "
+ activeCrisisIDList.get(crisisID)
+ " ["
+ Integer
.parseInt(TaggerConfigurator
.getInstance()
.getProperty(
TaggerConfigurationProperty.MAX_NEW_TASKS_PER_MINUTE))
+ "]");*/
if (!isTruncateRateLimited(crisisID)
|| activeCrisisIDList.get(crisisID) > Integer
.parseInt(TaggerConfigurator
.getInstance()
.getProperty(
TaggerConfigurationProperty.MAX_NEW_TASKS_PER_MINUTE))) {
/*logger.info("Going to truncate for crisisID = " + crisisID
+ " [" + activeCrisisIDList.get(crisisID)
+ "] new docs");
DataStore
.truncateLabelingTaskBufferForCrisis(
crisisID,
Integer.parseInt(TaggerConfigurator
.getInstance()
.getProperty(
TaggerConfigurationProperty.LABELLING_TASK_BUFFER_MAX_LENGTH)));*/
activeCrisisIDList.put(crisisID, 0L); // reset count for
// next interval
/*try {
Thread.sleep(200);
} catch (InterruptedException e) {
logger.warn("Sleep thread interrupted.");
}*/
}
}
}
lastDBWrite = System.currentTimeMillis();
}
boolean isWriteRateLimited() {
return (System.currentTimeMillis() - lastDBWrite) < Integer
.parseInt(TaggerConfigurator.getInstance().getProperty(
TaggerConfigurationProperty.MAX_TASK_WRITE_FQ_MS));
}
boolean isTruncateRateLimited(int crisisID) {
Long lastTruncateTime = crisisLastTruncateTime.containsKey(crisisID) ? crisisLastTruncateTime.get(crisisID) : 0L;
boolean result = (System.currentTimeMillis() - lastTruncateTime) < Integer
.parseInt(TaggerConfigurator.getInstance().getProperty(
TaggerConfigurationProperty.MIN_TRUNCATE_INTERVAL_MS));
if (!result) crisisLastTruncateTime.put(crisisID, System.currentTimeMillis());
return result;
}
boolean isTruncateRunLimited() {
boolean result = (System.currentTimeMillis() - lastTruncateTime) < Long
.parseLong(TaggerConfigurator.getInstance().getProperty(
TaggerConfigurationProperty.TRUNCATE_RUN_INTERVAL_MS));
if (!result) lastTruncateTime = System.currentTimeMillis();
return result;
}
}