package org.nate.internal.jsoup;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.nate.encoder.NateDocument;
import org.nate.exception.IONateException;
import org.nate.internal.util.StreamUtils;
public class JsoupBackedNateDocumentFactory {
private static final String NATE_FRAGMENT_WRAPPER = "natewrapper";
private static final String BEGIN_NATE_FRAGMENT_WRAPPER = "<" + NATE_FRAGMENT_WRAPPER + ">";
private static final String END_NATE_FRAGMENT_WRAPPER = "</" + NATE_FRAGMENT_WRAPPER + ">";
/**
* TODO: What should we do with this???
*/
public static final String BASE_URI = "";
public JsoupBackedNateDocument createFromHtmlDocument(InputStream source) {
return new JsoupBackedNateDocument(parse(source));
}
public JsoupBackedNateDocument createFromHtmlDocument(String source) {
return new JsoupBackedNateDocument(parse(source));
}
public JsoupBackedNateDocumentFragment createFromHtmlFragment(InputStream source) {
return new JsoupBackedNateDocumentFragment(parseFragment(source));
}
public NateDocument createFromHtmlFragment(String source) {
return new JsoupBackedNateDocumentFragment(parseFragment(source));
}
private Document parse(String source) {
return Jsoup.parse(source);
}
private Document parse(InputStream source) {
try {
// TODO: Really need to sort out charsets and base uri...
return Jsoup.parse(source, null, BASE_URI);
} catch (IOException e) {
throw new IONateException("Problems parsing HTML.", e);
}
}
private List<Node> parseFragment(InputStream source) {
Element pseudoRootNode =
parse(StreamUtils.wrapInPseudoRootElement(source, NATE_FRAGMENT_WRAPPER))
.select(NATE_FRAGMENT_WRAPPER).get(0);
return pseudoRootNode.childNodes();
}
private List<Node> parseFragment(String source) {
Element pseudoRootNode =
parse(wrapInPseudoRootElement(source)).select(NATE_FRAGMENT_WRAPPER).get(0);
return pseudoRootNode.childNodes();
}
private String wrapInPseudoRootElement(String source) {
return BEGIN_NATE_FRAGMENT_WRAPPER + source + END_NATE_FRAGMENT_WRAPPER;
}
}