package org.meaningfulweb.cext.processors; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import javax.script.ScriptException; import org.meaningfulweb.cext.HtmlContentProcessor; import org.meaningfulweb.util.XMLUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.jdom.Comment; import org.jdom.Document; import org.jdom.Element; import org.jdom.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class ScriptProcessor extends HtmlContentProcessor { private final static Logger LOG = LoggerFactory .getLogger(ScriptProcessor.class); private static ScriptEngineManager factory = new ScriptEngineManager(); private String scriptDir = null; private List<String> scripts = new ArrayList<String>(); private boolean extractHtml = true; private boolean extractText = true; private String getScriptType(String filename) { if (filename.endsWith("rb")) { return "jruby"; } else if (filename.endsWith("groovy") || filename.endsWith("gv")) { return "groovy"; } else if (filename.endsWith("js")) { return "js"; } return null; } public String getScriptDir() { return scriptDir; } public void setScriptDir(String scriptDir) { this.scriptDir = scriptDir; } public List<String> getScripts() { return scripts; } public void setScripts(List<String> scripts) { this.scripts = scripts; } public boolean isExtractHtml() { return extractHtml; } public void setExtractHtml(boolean extractHtml) { this.extractHtml = extractHtml; } public boolean isExtractText() { return extractText; } public void setExtractText(boolean extractText) { this.extractText = extractText; } @Override public boolean processContent(Document document) { if (scripts != null && scripts.size() > 0) { Document tempDoc = new Document(); tempDoc.addContent(document.cloneContent()); for (String script : scripts) { // get the source of the script, return if no script or it is blank File scriptFile = new File(scriptDir, script); if (!scriptFile.exists()) { continue; } String scriptSource = null; try { scriptSource = FileUtils.readFileToString(scriptFile); } catch (IOException e) { } if (StringUtils.isBlank(scriptSource)) { continue; } // get the script engine, if none is available continue String engineType = getScriptType(script); if (StringUtils.isBlank(engineType)) { continue; } ScriptEngine engine = factory.getEngineByName(engineType); engine.put("doc", XMLUtils.toHtml(tempDoc)); engine.put("output", new HashMap<String, Object>()); try { engine.eval(scriptSource); } catch (ScriptException e) { LOG.error("Error processing script: " + script, e); continue; } Map<String, Object> output = (Map<String, Object>)engine.get("output"); if (output != null && output.size() > 0) { for (Entry<String, Object> entry : output.entrySet()) { String key = entry.getKey(); Object val = entry.getValue(); if (val instanceof Element) { Element selElem = (Element)entry.getValue(); if (extractHtml) { addExtractedValue(key, XMLUtils.toHtml(selElem)); } if (extractText) { addExtractedValue(key + ".text", XMLUtils.toText(selElem)); } } else if (val instanceof Text) { addExtractedValue(key, ((Text)val).getTextNormalize()); } else if (val instanceof Comment) { addExtractedValue(key, ((Comment)val).getText()); } } } } } return true; } }