package fi.otavanopisto.muikku.plugins.dnm.util;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.enterprise.inject.Any;
import javax.enterprise.inject.Instance;
import javax.inject.Inject;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.xerces.parsers.DOMParser;
import org.cyberneko.html.HTMLConfiguration;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import com.sksamuel.diffpatch.DiffMatchPatch;
import com.sksamuel.diffpatch.DiffMatchPatch.Diff;
import com.sksamuel.diffpatch.DiffMatchPatch.Patch;
import fi.foyt.coops.CoOpsInternalErrorException;
import fi.foyt.coops.CoOpsUsageException;
import fi.otavanopisto.muikku.plugins.dnm.parser.DeusNexXmlUtils;
import fi.otavanopisto.muikku.plugins.material.HtmlMaterialController;
import fi.otavanopisto.muikku.plugins.material.coops.CoOpsDiffAlgorithm;
import fi.otavanopisto.muikku.plugins.material.coops.model.HtmlMaterialRevision;
import fi.otavanopisto.muikku.plugins.material.model.HtmlMaterial;
import fi.otavanopisto.muikku.plugins.workspace.WorkspaceMaterialContainsAnswersExeption;
import fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceMaterial;
public class HtmlMaterialCleaner {
private static final String COOPS_PATCH_ALGORITHM = "dmp";
@Inject
private Logger logger;
@Inject
private HtmlMaterialController htmlMaterialController;
@Inject
@Any
private Instance<HtmlMaterialCleanerTask> analyzerTasks;
public void cleanMaterial(HtmlMaterial htmlMaterial, WorkspaceMaterial ownerMaterial) {
Long maxRevision = getMaterialRevision(htmlMaterial);
try {
// Document
String html = htmlMaterialController.getRevisionHtml(htmlMaterial, maxRevision);
DOMParser parser = new DOMParser(new HTMLConfiguration());
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
InputSource inputSource = new InputSource(new StringReader(html));
parser.parse(inputSource);
Document document = parser.getDocument();
// Tasks
Iterator<HtmlMaterialCleanerTask> taskIterator = analyzerTasks.iterator();
List<HtmlMaterialCleanerTask> cleanerTasks = new ArrayList<HtmlMaterialCleanerTask>();
while (taskIterator.hasNext()) {
cleanerTasks.add(taskIterator.next());
}
Collections.sort(cleanerTasks, new Comparator<HtmlMaterialCleanerTask>() {
@Override
public int compare(HtmlMaterialCleanerTask o1, HtmlMaterialCleanerTask o2) {
return o1.getPriority().compareTo(o2.getPriority());
}
});
String newHtml = null;
for (HtmlMaterialCleanerTask cleanerTask : cleanerTasks) {
if (cleanerTask.process(document, ownerMaterial)) {
newHtml = DeusNexXmlUtils.serializeElement(document.getDocumentElement(), true, false, "html");
patch(htmlMaterial, newHtml);
}
}
}
catch (Exception e) {
logger.log(Level.SEVERE, "Failed to clean material " + htmlMaterial.getId(), e);
}
}
public void patch(HtmlMaterial material, String newHtml) throws CoOpsUsageException, CoOpsInternalErrorException, WorkspaceMaterialContainsAnswersExeption {
CoOpsDiffAlgorithm algorithm = htmlMaterialController.findAlgorithm(COOPS_PATCH_ALGORITHM);
if (algorithm == null) {
throw new CoOpsUsageException("Algorithm is not supported by this server");
}
Long maxRevision = getMaterialRevision(material);
boolean published = material.getRevisionNumber().equals(maxRevision);
String oldHtml = htmlMaterialController.getRevisionHtml(material, maxRevision);
if (oldHtml == null) {
oldHtml = "";
}
String checksum = DigestUtils.md5Hex(newHtml);
String patch = createPatch(oldHtml, newHtml);
Long patchRevisionNumber = maxRevision + 1;
HtmlMaterialRevision htmlMaterialRevision = htmlMaterialController.createRevision(material, "dnm-cleaner", patchRevisionNumber, new Date(), patch, checksum);
if (published) {
htmlMaterialController.updateHtmlMaterialToRevision(material, newHtml, htmlMaterialRevision.getRevision(), false, false);
}
}
private Long getMaterialRevision(HtmlMaterial material) {
Long maxRevision = htmlMaterialController.lastHtmlMaterialRevision(material);
return maxRevision == null ? 0l : maxRevision;
}
private String createPatch(String oldHtml, String newHtml) {
DiffMatchPatch diffMatchPatch = new DiffMatchPatch();
LinkedList<Diff> diffs = diffMatchPatch.diff_main(oldHtml, newHtml);
diffMatchPatch.diff_cleanupEfficiency(diffs);
LinkedList<Patch> patch = diffMatchPatch.patch_make(diffs);
return diffMatchPatch.patch_toText(patch);
}
}