/******************************************************************************* * Copyright (c) 2011, 2015 Tasktop Technologies. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * David Green - initial API and implementation *******************************************************************************/ package org.eclipse.mylyn.wikitext.parser; import static com.google.common.base.Preconditions.checkNotNull; import java.io.IOException; import org.eclipse.mylyn.wikitext.internal.parser.html.AbstractSaxHtmlParser; import org.eclipse.mylyn.wikitext.internal.parser.html.HtmlCleaner; import org.eclipse.mylyn.wikitext.internal.parser.html.XHtmlParser; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * A parser for HTML, driving {@link DocumentBuilder}. Depending on parsers available at runtime, input may need to be * well-formed XHTML. * * @see DocumentBuilder * @author David Green * @see #instanceWithHtmlCleanupRules() * @since 3.0 */ public class HtmlParser { private final AbstractSaxHtmlParser delegate; private HtmlParser(AbstractSaxHtmlParser parser) { this.delegate = checkNotNull(parser); } public HtmlParser() { AbstractSaxHtmlParser parser; if (isJsoupAvailable()) { parser = new org.eclipse.mylyn.wikitext.internal.parser.html.HtmlParser(); } else { parser = new XHtmlParser(); } this.delegate = parser; } /** * Provides a parser instance with cleanup rules that make the result more suitable for generating wiki markup. */ public static HtmlParser instanceWithHtmlCleanupRules() { org.eclipse.mylyn.wikitext.internal.parser.html.HtmlParser parser = new org.eclipse.mylyn.wikitext.internal.parser.html.HtmlParser(); HtmlCleaner htmlCleaner = new HtmlCleaner(); htmlCleaner.configure(parser); return new HtmlParser(parser); } /** * Creates a new parser instance. */ public static HtmlParser instance() { return new HtmlParser(); } AbstractSaxHtmlParser getDelegate() { return delegate; } /** * Parses well-formed XHTML from the given input, and emit an approximation of the source document to the given * document builder. Equivalent to {@code parse(input,builder,true)} * * @param input * the source input * @param builder * the builder to which output is provided * @throws IOException * @throws SAXException */ public void parse(InputSource input, DocumentBuilder builder) throws IOException, SAXException { parse(input, builder, true); } /** * Parses well-formed XHTML or HTML from the given input, and emit an approximation of the source document to the * given document builder. * * @param input * the source input * @param builder * the builder to which output is provided * @param asDocument * indicates if the builder should be driven as a {@link DocumentBuilder#beginDocument() document}. */ public void parse(InputSource input, DocumentBuilder builder, boolean asDocument) throws IOException, SAXException { checkNotNull(input); checkNotNull(builder); delegate.parse(input, builder, asDocument); } boolean isJsoupAvailable() { try { Class.forName("org.jsoup.Jsoup", true, HtmlParser.class.getClassLoader()); //$NON-NLS-1$ return true; } catch (Throwable t) { return false; } } }