/*
* Copyright (C) 2011 Alex Kuiper <http://www.nightwhistler.net>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.nightwhistler.htmlspanner;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import android.util.Log;
import net.nightwhistler.htmlspanner.exception.ParsingCancelledException;
import net.nightwhistler.htmlspanner.handlers.*;
import net.nightwhistler.htmlspanner.handlers.attributes.AlignmentAttributeHandler;
import net.nightwhistler.htmlspanner.handlers.attributes.BorderAttributeHandler;
import net.nightwhistler.htmlspanner.handlers.attributes.StyleAttributeHandler;
import net.nightwhistler.htmlspanner.style.Style;
import net.nightwhistler.htmlspanner.handlers.StyledTextHandler;
import net.nightwhistler.htmlspanner.style.StyleValue;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.ContentNode;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import android.text.Spannable;
import android.text.SpannableStringBuilder;
/**
* HtmlSpanner provides an alternative to Html.fromHtml() from the Android
* libraries.
*
* In its simplest form, just call new HtmlSpanner().fromHtml() to get a similar
* result. The real strength is in being able to register custom NodeHandlers.
*
* @author work
*
*/
public class HtmlSpanner {
/**
* Temporary constant for the width of 1 horizontal em
* Used for calculating margins.
*/
public static final int HORIZONTAL_EM_WIDTH = 10;
private Map<String, TagNodeHandler> handlers;
private boolean stripExtraWhiteSpace = false;
private HtmlCleaner htmlCleaner;
private FontResolver fontResolver;
/**
* Switch to determine if CSS is used
*/
private boolean allowStyling = true;
/**
* If CSS colours are used
*/
private boolean useColoursFromStyle = true;
/**
* Creates a new HtmlSpanner using a default HtmlCleaner instance.
*/
public HtmlSpanner() {
this(createHtmlCleaner(), new SystemFontResolver());
}
/**
* Creates a new HtmlSpanner using the given HtmlCleaner instance.
*
* This allows for a custom-configured HtmlCleaner.
*
* @param cleaner
*/
public HtmlSpanner(HtmlCleaner cleaner, FontResolver fontResolver) {
this.htmlCleaner = cleaner;
this.fontResolver = fontResolver;
this.handlers = new HashMap<String, TagNodeHandler>();
registerBuiltInHandlers();
}
public FontResolver getFontResolver() {
return this.fontResolver;
}
public void setFontResolver( FontResolver fontResolver ) {
this.fontResolver = fontResolver;
}
public FontFamily getFont( String name ) {
return this.fontResolver.getFont(name);
}
/**
* Switch to specify whether excess whitespace should be stripped from the
* input.
*
* @param stripExtraWhiteSpace
*/
public void setStripExtraWhiteSpace(boolean stripExtraWhiteSpace) {
this.stripExtraWhiteSpace = stripExtraWhiteSpace;
}
/**
* Returns if whitespace is being stripped.
*
* @return
*/
public boolean isStripExtraWhiteSpace() {
return stripExtraWhiteSpace;
}
/**
* Indicates whether the text style may be updated.
*
* If this is set to false, all CSS is ignored
* and the basic built-in style is used.
*
* @return
*/
public boolean isAllowStyling() {
return allowStyling;
}
/**
* Switch to specify is CSS style should be used.
*
* @param value
*/
public void setAllowStyling( boolean value ) {
this.allowStyling = value;
}
/**
* Switch to specify if the colours from CSS
* should override user-specified colours.
*
* @param value
*/
public void setUseColoursFromStyle( boolean value ) {
this.useColoursFromStyle = value;
}
public boolean isUseColoursFromStyle() {
return this.useColoursFromStyle;
}
/**
* Registers a new custom TagNodeHandler.
*
* If a TagNodeHandler was already registered for the specified tagName it
* will be overwritten.
*
* @param tagName
* @param handler
*/
public void registerHandler(String tagName, TagNodeHandler handler) {
this.handlers.put(tagName, handler);
handler.setSpanner(this);
}
/**
* Removes the handler for the given tag.
*
* @param tagName the tag to remove handlers for.
*/
public void unregisterHandler(String tagName) {
this.handlers.remove(tagName);
}
/**
* Parses the text in the given String.
*
* @param html
*
* @return a Spanned version of the text.
*/
public Spannable fromHtml(String html) {
return fromTagNode(this.htmlCleaner.clean(html), null);
}
public Spannable fromHtml(String html, CancellationCallback cancellationCallback) {
return fromTagNode(this.htmlCleaner.clean(html), cancellationCallback);
}
/**
* Parses the text in the given Reader.
*
* @param reader
* @return
* @throws IOException
*/
public Spannable fromHtml(Reader reader) throws IOException {
return fromTagNode(this.htmlCleaner.clean(reader), null);
}
public Spannable fromHtml(Reader reader, CancellationCallback cancellationCallback) throws IOException {
return fromTagNode(this.htmlCleaner.clean(reader), cancellationCallback);
}
/**
* Parses the text in the given InputStream.
*
* @param inputStream
* @return
* @throws IOException
*/
public Spannable fromHtml(InputStream inputStream) throws IOException {
return fromTagNode(this.htmlCleaner.clean(inputStream), null);
}
public Spannable fromHtml(InputStream inputStream, CancellationCallback cancellationCallback) throws IOException {
return fromTagNode(this.htmlCleaner.clean(inputStream), cancellationCallback);
}
/**
* Gets the currently registered handler for this tag.
*
* Used so it can be wrapped.
*
* @param tagName
* @return the registed TagNodeHandler, or null if none is registered.
*/
public TagNodeHandler getHandlerFor(String tagName) {
return this.handlers.get(tagName);
}
/**
* Creates spanned text from a TagNode.
*
* @param node
* @return
*/
public Spannable fromTagNode(TagNode node, CancellationCallback cancellationCallback) {
SpannableStringBuilder result = new SpannableStringBuilder();
SpanStack stack = new SpanStack();
applySpan( result, node, stack, cancellationCallback );
stack.applySpans(this, result);
return result;
}
private static HtmlCleaner createHtmlCleaner() {
HtmlCleaner result = new HtmlCleaner();
CleanerProperties cleanerProperties = result.getProperties();
cleanerProperties.setAdvancedXmlEscape(true);
cleanerProperties.setOmitXmlDeclaration(true);
cleanerProperties.setOmitDoctypeDeclaration(false);
cleanerProperties.setTranslateSpecialEntities(true);
cleanerProperties.setTransResCharsToNCR(true);
cleanerProperties.setRecognizeUnicodeChars(true);
cleanerProperties.setIgnoreQuestAndExclam(true);
cleanerProperties.setUseEmptyElementTags(false);
cleanerProperties.setPruneTags("script,title");
return result;
}
private void checkForCancellation( CancellationCallback cancellationCallback ) {
if ( cancellationCallback != null && cancellationCallback.isCancelled() ) {
throw new ParsingCancelledException();
}
}
private void handleContent(SpannableStringBuilder builder, Object node,
SpanStack stack, CancellationCallback cancellationCallback ) {
checkForCancellation(cancellationCallback);
ContentNode contentNode = (ContentNode) node;
String text = TextUtil.replaceHtmlEntities(
contentNode.getContent().toString(), false);
if ( isStripExtraWhiteSpace() ) {
//Replace unicode non-breaking space with normal space.
text = text.replace( '\u00A0', ' ' );
}
if ( text.trim().length() > 0 ) {
builder.append(text);
}
}
private void applySpan(SpannableStringBuilder builder, TagNode node, SpanStack stack,
CancellationCallback cancellationCallback) {
checkForCancellation(cancellationCallback);
TagNodeHandler handler = this.handlers.get(node.getName());
if ( handler == null ) {
handler = new StyledTextHandler();
handler.setSpanner(this);
}
int lengthBefore = builder.length();
handler.beforeChildren(node, builder, stack);
if ( !handler.rendersContent() ) {
for (Object childNode : node.getAllChildren()) {
if ( childNode instanceof ContentNode ) {
handleContent( builder, childNode, stack, cancellationCallback );
} else if ( childNode instanceof TagNode ) {
applySpan( builder, (TagNode) childNode, stack, cancellationCallback );
}
}
}
int lengthAfter = builder.length();
handler.handleTagNode(node, builder, lengthBefore, lengthAfter, stack);
}
private static StyledTextHandler wrap( StyledTextHandler handler ) {
return new StyleAttributeHandler(new AlignmentAttributeHandler(handler));
}
private void registerBuiltInHandlers() {
TagNodeHandler italicHandler = new StyledTextHandler(
new Style().setFontStyle(Style.FontStyle.ITALIC));
registerHandler("i", italicHandler);
registerHandler("em", italicHandler);
registerHandler("cite", italicHandler);
registerHandler("dfn", italicHandler);
TagNodeHandler boldHandler = new StyledTextHandler(
new Style().setFontWeight(Style.FontWeight.BOLD));
registerHandler("b", boldHandler);
registerHandler("strong", boldHandler);
TagNodeHandler marginHandler = new StyledTextHandler(
new Style().setMarginLeft(new StyleValue(2.0f, StyleValue.Unit.EM)));
registerHandler("blockquote", marginHandler);
registerHandler("ul", marginHandler);
registerHandler("ol", marginHandler);
TagNodeHandler monSpaceHandler = wrap(new MonoSpaceHandler());
registerHandler("tt", monSpaceHandler);
registerHandler("code", monSpaceHandler);
registerHandler("style", new StyleNodeHandler() );
//We wrap an alignment-handler to support
//align attributes
StyledTextHandler inlineAlignment = wrap(new StyledTextHandler());
TagNodeHandler brHandler = new NewLineHandler(1, inlineAlignment);
registerHandler("br", brHandler);
Style paragraphStyle = new Style()
.setDisplayStyle(Style.DisplayStyle.BLOCK)
.setMarginBottom(
new StyleValue(1.0f, StyleValue.Unit.EM));
TagNodeHandler pHandler = new BorderAttributeHandler(wrap(new StyledTextHandler(paragraphStyle)));
registerHandler("p", pHandler);
registerHandler("div", pHandler);
registerHandler("h1", wrap(new HeaderHandler(1.5f, 0.5f)));
registerHandler("h2", wrap(new HeaderHandler(1.4f, 0.6f)));
registerHandler("h3", wrap(new HeaderHandler(1.3f, 0.7f)));
registerHandler("h4", wrap(new HeaderHandler(1.2f, 0.8f)));
registerHandler("h5", wrap(new HeaderHandler(1.1f, 0.9f)));
registerHandler("h6", wrap(new HeaderHandler(1f, 1f)));
TagNodeHandler preHandler = new PreHandler();
registerHandler("pre", preHandler);
TagNodeHandler bigHandler = new StyledTextHandler(
new Style().setFontSize(
new StyleValue(1.25f, StyleValue.Unit.EM)));
registerHandler("big", bigHandler);
TagNodeHandler smallHandler = new StyledTextHandler(
new Style().setFontSize(
new StyleValue(0.8f, StyleValue.Unit.EM)));
registerHandler("small", smallHandler);
TagNodeHandler subHandler = new SubScriptHandler();
registerHandler("sub", subHandler);
TagNodeHandler superHandler = new SuperScriptHandler();
registerHandler("sup", superHandler);
TagNodeHandler centerHandler = new StyledTextHandler(new Style().setTextAlignment(Style.TextAlignment.CENTER));
registerHandler("center", centerHandler);
registerHandler("li", new ListItemHandler());
registerHandler("a", new LinkHandler());
registerHandler("img", new ImageHandler());
registerHandler("font", new FontHandler() );
}
public static interface CancellationCallback {
boolean isCancelled();
}
}