package jj.document;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import javax.inject.Inject;
import jj.event.Publisher;
import jj.resource.AbstractFileResource;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.parser.ParseError;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
/**
* An immutable collection of information about an
* HTML resource.
* @author jason
*
*/
public class HtmlResource extends AbstractFileResource<Void> {
private final Document document;
private static final class CommentKiller implements NodeVisitor {
List<Node> comments = new ArrayList<>();
@Override
public void head(Node node, int depth) {
}
@Override
public void tail(Node node, int depth) {
if (node instanceof Comment) {
comments.add(node);
}
}
void kill() {
for (Node node: comments) {
node.remove();
}
}
}
/**
* @param absoluteUri The server absolute URI to this resource
* @param path The filesystem Path to this resource
* @throws IOException
*/
@Inject
HtmlResource(
final DocumentConfiguration configuration,
final Publisher publisher,
final Dependencies dependencies,
final Path path
) throws IOException {
super(dependencies, path);
String html = byteBuffer.toString(UTF_8);
Parser parser = Parser.htmlParser().setTrackErrors(configuration.showParsingErrors() ? Integer.MAX_VALUE : 0);
this.document = parser.parseInput(html, name());
List<ParseError> errors = parser.getErrors();
if (!errors.isEmpty()) {
publisher.publish(new HtmlParseError(path, errors));
}
if (configuration.removeComments()) {
CommentKiller commentKiller = new CommentKiller();
new NodeTraversor(commentKiller).traverse(document);
commentKiller.kill();
}
}
public Document document() {
return document;
}
@Override
public Charset charset() {
return UTF_8;
}
}