/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.modules.cp;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.http.HttpServletRequest;
import org.apache.batik.css.parser.ParseException;
import org.apache.batik.css.parser.Parser;
import org.cyberneko.html.parsers.SAXParser;
import org.olat.core.dispatcher.impl.StaticMediaDispatcher;
import org.olat.core.dispatcher.mapper.Mapper;
import org.olat.core.gui.components.tree.TreeNode;
import org.olat.core.gui.media.MediaResource;
import org.olat.core.gui.media.NotFoundMediaResource;
import org.olat.core.gui.media.StringMediaResource;
import org.olat.core.gui.render.StringOutput;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.FileUtils;
import org.olat.core.util.StringHelper;
import org.olat.core.util.vfs.VFSContainer;
import org.olat.core.util.vfs.VFSItem;
import org.olat.core.util.vfs.VFSLeaf;
import org.olat.core.util.vfs.VFSMediaResource;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
*
* Description:<br>
* Deliver the CP as a single page. All the HTML Pages are parser and the
* attribute href/src are rewritten to absolute /olat/m/xxxx urls. The HTML parser
* used is NekoHTML. For CSS there is the same process. We use the Batik CSS
* parser for SAC.
*
* <P>
* Initial Date: 18 mars 2011 <br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*/
public class CPPrintMapper implements Mapper {
private static final OLog log = Tracing.createLoggerFor(CPPrintMapper.class);
private static final String DEFAULT_ENCODING = "iso-8859-1";
private static final String UNICODE_ENCODING = "unicode";
private static final String DEFAULT_CONTENT_TYPE = "text/html";
private static final String XHTML_EXTENSION = "xhtml";
private static final String XHTML_CONTENT_TYPE = "application/xhtml+xml";
private static final Pattern PATTERN_ENCTYPE = Pattern.compile("<meta.*charset=([^\"\']*)[\"\']", Pattern.CASE_INSENSITIVE);
private static final Pattern PATTERN_XML_ENCTYPE = Pattern.compile("<\\?xml.*encoding=[\"\']([^\"\']*)[\"\']", Pattern.CASE_INSENSITIVE);
private static final Pattern PATTERN_CONTTYPE = Pattern.compile("<meta.*content-type\"?\\s*content\\s*=\\s*[\"]?+(.+?)([\"]?+\\s*/>)", Pattern.CASE_INSENSITIVE);
private static final Pattern PATTERN_DOCTYPE = Pattern.compile("<!DOCTYPE\\s*html\\s*PUBLIC\\s*[\"\']\\s*-//W3C//DTD\\s*(.+?)(//EN)", Pattern.CASE_INSENSITIVE);
private static final String FILE_SUFFIX_HTM = "htm";
private static final String FILE_SUFFIX_CSS = "css";
private static final String TAG_FRAMESET = "<frameset";
private static final String TAG_FRAMESET_UPPERC = "<FRAMESET";
private static final String FILE_SUFFIX_JS = ".js";
private String g_encoding;
private List<String> selectedNodeIds;
private String baseUri;
private final String themeBaseUri;
private final CPManifestTreeModel ctm;
private final VFSContainer rootDir;
private String contentEncoding;
private String jsEncoding;
public CPPrintMapper(CPManifestTreeModel ctm, VFSContainer rootContainer, String themeBaseUri) {
this.themeBaseUri = themeBaseUri;
this.rootDir = rootContainer;
this.ctm = ctm;
}
public void setBaseUri(String baseUri) {
this.baseUri = baseUri;
}
public void setSelectedNodeIds(List<String> selectedNodeIds) {
this.selectedNodeIds = selectedNodeIds;
}
public void setContentEncoding(String encoding) {
this.contentEncoding = encoding;
}
public void setJSEncoding(String encoding) {
this.jsEncoding = encoding;
}
@Override
public MediaResource handle(String relPath, HttpServletRequest request) {
if(relPath.endsWith("print.html")) {
return deliverCompositePage(request);
}
return deliverFile(request, relPath);
}
private MediaResource deliverCompositePage(HttpServletRequest request) {
List<NekoHtmlPageHandler> parsedPages = composePrintPage(selectedNodeIds);
StringBuilder sb = new StringBuilder();
sb.append("<html><head>");
for(NekoHtmlPageHandler page:parsedPages) {
sb.append("<!-- Header of -->");
sb.append(page.getHeader()).append("\n\n");
}
injectJavascriptAndCss(sb);
sb.append("</head><body onload='window.focus();window.print()'>");
for(NekoHtmlPageHandler page:parsedPages) {
if(page.isEmpty()) {
String title = page.getTitle();
if(StringHelper.containsNonWhitespace(title)) {
int level = page.getLevel() + 1;
level = Math.min(level, 6);
sb.append("<h").append(level).append(">").append(page.getTitle()).append("</h").append(level).append(">");
}
} else {
bodyDecorator(page, sb);
}
}
sb.append("</body></html>");
MediaResource mr = prepareMediaResource(request, sb.toString(), g_encoding, "text/html");
return mr;
}
private void injectJavascriptAndCss(StringBuilder output) {
StringOutput sb = new StringOutput();
sb.append("<!--[if lt IE 9]>");
sb.append("<script type=\"text/javascript\" src=\"");
StaticMediaDispatcher.renderStaticURI(sb, "js/jquery/jquery-1.9.1.min.js");
sb.append("\")'></script>");
sb.append("<![endif]-->");
sb.append("<!--[if gte IE 9]><!-->");
sb.append("<script type=\"text/javascript\" src=\"");
StaticMediaDispatcher.renderStaticURI(sb, "js/jquery/jquery-2.1.3.min.js");
sb.append("\")'></script>");
sb.append("<!--<![endif]-->");
output.append(sb.toString());
output.append("<link href=\"").append(themeBaseUri).append("all/content.css\" rel=\"stylesheet\" type=\"text/css\" />\n");
}
protected void bodyDecorator(NekoHtmlPageHandler page, StringBuilder sb) {
sb.append("<!-- Body of ").append(page.getDocument().getName()).append("-->");
sb.append("<div class=\"o_cp_print_page\" style='clear:both; position:relative;page-break-after:always;'>\n");
sb.append(page.getBody());
sb.append("\n</div>");
}
private List<NekoHtmlPageHandler> composePrintPage(List<String> nodeIds) {
List<NekoHtmlPageHandler> pages = new ArrayList<NekoHtmlPageHandler>();
for(String nodeId:nodeIds) {
NekoHtmlPageHandler parsedPage = null;
TreeNode treeNode = ctm.getNodeById(nodeId);
String identifierRes = (String)treeNode.getUserObject();
if(StringHelper.containsNonWhitespace(identifierRes)) {
VFSItem currentItem = rootDir.resolve(identifierRes);
if(currentItem instanceof VFSLeaf) {
String extension = FileUtils.getFileSuffix(currentItem.getName());
if("htm".equalsIgnoreCase(extension) || "html".equalsIgnoreCase(extension) || "xhtml".equalsIgnoreCase(extension)) {
VFSLeaf currentLeaf = (VFSLeaf)currentItem;
parsedPage = parsePage(identifierRes, currentLeaf, treeNode);
}
}
}
if(parsedPage == null) {
parsedPage = new NekoHtmlPageHandler(treeNode, null, rootDir, baseUri);
}
pages.add(parsedPage);
}
return pages;
}
private NekoHtmlPageHandler parsePage(String identifierRes, VFSLeaf document, TreeNode node) {
NekoHtmlPageHandler page = new NekoHtmlPageHandler(node, document, rootDir, baseUri);
int index = identifierRes.lastIndexOf('/');
if(index > 0) {
String relativePath = identifierRes.substring(0, index+1);
page.setRelativePath(relativePath);
}
try {
Page content = loadPageWithGuess(document);
if(g_encoding == null) {
g_encoding = content.getEncoding();
}
String rawContent;
if (content.isUseLoadedPageString()) {
rawContent = content.getPage();
} else {
// found a new charset other than iso-8859-1, load string with proper encoding
rawContent = FileUtils.load(document.getInputStream(), content.getEncoding());
}
SAXParser parser = new SAXParser();
parser.setContentHandler(page);
parser.parse(new InputSource(new StringReader(rawContent)));
return page;
} catch (SAXException e) {
log.error("", e);
return null;
} catch (IOException e) {
log.error("", e);
return null;
} catch (Exception e) {
log.error("", e);
return null;
}
}
protected MediaResource deliverCssFile(VFSLeaf cssFile, HttpServletRequest request) {
Page page = loadPageWithGuess(cssFile);
String encoding = page.getEncoding();
String content = page.getPage();
SACCSSHandler handler = new SACCSSHandler(cssFile, rootDir, baseUri);
try {
Parser parser = new Parser();
parser.setDocumentHandler(handler);
parser.parseStyleSheet(new org.w3c.css.sac.InputSource(new StringReader(content)));
} catch (IOException ioe) {
log.error("", ioe);
return null;
} catch (ParseException pe) {
log.error("", pe);
return null;
}
String cleanStyleSheet = handler.getCleanStylesheet();
return prepareMediaResource(request, cleanStyleSheet, encoding, "text/css");
}
protected MediaResource deliverFile(HttpServletRequest httpRequest, String path) {
//if directory gets renamed root becomes null
if (rootDir == null) {
return new NotFoundMediaResource("directory not found"+path);
}
VFSLeaf vfsLeaf = null;
VFSItem vfsItem = rootDir.resolve(path);
//only files are allowed, but somehow it happened that folders showed up here
if (vfsItem instanceof VFSLeaf) {
vfsLeaf = (VFSLeaf)vfsItem;
} else {
return new NotFoundMediaResource(path);
}
MediaResource mr;
// check if path ends with .html, .htm or .xhtml. We do this by searching for "htm"
// and accept positions of this string at length-3 or length-4
if (path.toLowerCase().lastIndexOf(FILE_SUFFIX_HTM) >= (path.length()-4)) {
// set the http content-type and the encoding
Page page = loadPageWithGuess(vfsLeaf);
g_encoding = page.getEncoding();
if (page.isUseLoadedPageString()) {
mr = prepareMediaResource(httpRequest, page.getPage(), g_encoding, page.getContentType());
} else {
// found a new charset other than iso-8859-1, load string with proper encoding
String content = FileUtils.load(vfsLeaf.getInputStream(), g_encoding);
mr = prepareMediaResource(httpRequest, content, g_encoding, page.getContentType());
}
} else if (path.toLowerCase().lastIndexOf(FILE_SUFFIX_CSS) >= (path.length()-4)) {
// set the http content-type and the encoding
mr = deliverCssFile(vfsLeaf, httpRequest);
} else if (path.endsWith(FILE_SUFFIX_JS)) { // a javascript library
VFSMediaResource vmr = new VFSMediaResource(vfsLeaf);
// set the encoding; could be null if this page starts with .js file
// (not very common...).
// if we set no header here, apache sends the default encoding
// together with the mime-type, which is wrong.
// so we assume the .js file has the same encoding as the html file
// that loads the .js file
if (jsEncoding != null) vmr.setEncoding(jsEncoding);
else if (g_encoding != null) vmr.setEncoding(g_encoding);
mr = vmr;
} else {
// binary data: not .html, not .htm, not .js -> treated as is
mr = new VFSMediaResource(vfsLeaf);
}
return mr;
}
private Page loadPageWithGuess(VFSLeaf vfsPage) {
if(contentEncoding != null && isCharsetSupported(contentEncoding)) {
Page page = new Page();
page.setExtension(FileUtils.getFileSuffix(vfsPage.getName()));
page.setEncoding(contentEncoding);
page.setUseLoadedPageString(true);
String content = FileUtils.load(vfsPage.getInputStream(), contentEncoding);
page.setContentType(guessContentType(page, content));
page.setPage(content);
return page;
}
Page page = new Page();
page.setExtension(FileUtils.getFileSuffix(vfsPage.getName()));
page.setEncoding(DEFAULT_ENCODING);
String content = FileUtils.load(vfsPage.getInputStream(), DEFAULT_ENCODING);
page.setContentType(guessContentType(page, content));
// <meta.*charset=([^"]*)"
//extract only the charset attribute without the overhead of creating an htmlparser
boolean guessed = loadPageWithGuess(page, content, DEFAULT_ENCODING);
if(!guessed) {
//try opening it with utf-8
String contentUnicode = FileUtils.load(vfsPage.getInputStream(), UNICODE_ENCODING);
guessed = loadPageWithGuess(page, contentUnicode, UNICODE_ENCODING);
if(!guessed) {
//take default
page.setPage(content);
page.setUseLoadedPageString(true);
}
}
return page;
}
private boolean loadPageWithGuess(Page page, String content, String encoding) {
//default encoding for xhtml
if(XHTML_CONTENT_TYPE.equals(page.getContentType())) {
page.setEncoding("utf-8");
}
String guessedEncoding = guessEncoding(content);
if (guessedEncoding != null) {
// use found char set
//if longer than 50 the regexp did fail
if (isCharsetSupported(guessedEncoding)) {
page.setEncoding(guessedEncoding);
} else {
return false;
}
// reuse already loaded page when page uses the default encoding
if (page.getEncoding().equalsIgnoreCase(encoding) || page.getEncoding().contains(encoding)
|| page.getEncoding().toLowerCase().contains(encoding)) {
page.setUseLoadedPageString(true);
page.setPage(content);
}
return true;
}
return false;
}
private String guessContentType(Page page, String content) {
String cType = null;
if(XHTML_EXTENSION.equals(page.getExtension())) {
Matcher dm = PATTERN_DOCTYPE.matcher(content);
if (dm.find()) {
String doctype = dm.group(1).toLowerCase();
//default settings for XHTML-documents, should be taken if no <meta http-equiv="content-type" .../> is given
if (doctype.indexOf("xhtml") == 0 && doctype.indexOf("mathml") > 0) {
cType = XHTML_CONTENT_TYPE;
}
}
}
Matcher cm = PATTERN_CONTTYPE.matcher(content);
if (cm.find()) {
//use found content-type
String contentType = cm.group(1);
String[] types=contentType.split(";");
for (int i=0;i<types.length;i++) {
if (!(types[i].contains("charset"))) {
contentType=types[i].trim();
break;
}
}
//if longer than 50 the regexp did fail
if (contentType.length() < 50) {
cType = contentType;
}
}
if(cType == null) {
return DEFAULT_CONTENT_TYPE;
}
return cType;
}
private String guessEncoding(String content) {
Matcher m = PATTERN_ENCTYPE.matcher(content);
if (m.find()) {
// use found char set
String htmlcharset = m.group(1);
//if longer than 50 the regexp did fail
if (htmlcharset.length() < 50 ) {
return htmlcharset;
}
}
Matcher xmlDeclaration = PATTERN_XML_ENCTYPE.matcher(content);
if (xmlDeclaration.find()) {
// use found char set
String xmlcharset = xmlDeclaration.group(1);
//if longer than 50 the regexp did fail
if (xmlcharset.length() < 50 ) {
return xmlcharset;
}
}
return null;
}
private boolean isCharsetSupported(String enc) {
try {
return Charset.isSupported(enc);
} catch (IllegalCharsetNameException e) {
return false;
}
}
private StringMediaResource prepareMediaResource(HttpServletRequest httpRequest, String page, String enc, String contentType) {
StringMediaResource smr = new StringMediaResource();
if(XHTML_CONTENT_TYPE.equals(contentType)) {
//check if the application/xhtml+xml is supported (not supported by IEs)
//if not, replace the content type by text/html for compatibility
String accept = httpRequest.getHeader("Accept");
if(accept == null || accept.indexOf(XHTML_CONTENT_TYPE) < 0) {
contentType = DEFAULT_CONTENT_TYPE;
}
}
String mimetype = contentType + ";charset=" + StringHelper.check4xMacRoman(enc);
smr.setContentType(mimetype);
smr.setEncoding(enc);
//inject some javascript code to size iframe to proper height, but only when not a page with framesets
if (page.indexOf(TAG_FRAMESET) != -1 || page.indexOf(TAG_FRAMESET_UPPERC) != -1) {
//is frameset -> deliver unparsed
smr.setData(page);
} else {
smr.setData(page);
}
return smr;
}
public class Page {
private String encoding;
private String contentType;
private String extension;
private String page;
private boolean useLoadedPageString = false;
public String getExtension() {
return extension;
}
public void setExtension(String extension) {
this.extension = extension;
}
public String getEncoding() {
return encoding;
}
public void setEncoding(String encoding) {
this.encoding = encoding;
}
public boolean isUseLoadedPageString() {
return useLoadedPageString;
}
public void setUseLoadedPageString(boolean useLoadedPageString) {
this.useLoadedPageString = useLoadedPageString;
}
public String getContentType() {
return contentType;
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
public String getPage() {
return page;
}
public void setPage(String page) {
this.page = page;
}
}
}