package org.androiddaisyreader.model; import static org.androiddaisyreader.model.XmlUtilities.obtainEncodingStringFromInputStream; import java.io.IOException; import java.io.InputStream; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; public class DaisySnippet extends Snippet { private Document doc; private String id; // Prevent people from using the default constructor. @SuppressWarnings("unused") private DaisySnippet() { } /** * Create a DAISY 2.02 snippet. * * Uses a jsoup document and the id part of a composite reference. This * constructor should be significantly faster than the one that creates a * jsoup document. * * @param doc the jsoup representation of the HTML document * @param id the id used to get the text. */ DaisySnippet(Document doc, String id) { this.doc = doc; this.id = id; } /** * Create a DAISY 2.02 snippet. Uses the book's context & a composite * reference. * * The context may be a file location, an index into a zip file, etc. The * context is needed as composite references contain relative references. * * A composite reference is formatted as follows: * fire_safety.html#dol_1_4_rgn_cnt_0043 An example of the reference in * context follows: <text src="fire_safety.html#dol_1_4_rgn_cnt_0043" * id="rgn_txt_0004_0017"/> * * @param context * @param compositeReference */ DaisySnippet(BookContext context, String compositeReference) { if (context == null) { throw new IllegalArgumentException("Programming error: context needs to be set"); } String[] elements = parseCompositeReference(compositeReference); String uri = elements[0]; this.id = elements[1]; try { InputStream contents = context.getResource(uri); String encoding = obtainEncodingStringFromInputStream(contents); doc = Jsoup.parse(contents, encoding, context.getBaseUri()); } catch (IOException ioe) { // TODO 20120214 (jharty): we need to consider more appropriate // error reporting. throw new RuntimeException("TODO fix me", ioe); } } /** * Split a composite reference into the constituent parts. * * A composite reference is formatted as follows: * fire_safety.html#dol_1_4_rgn_cnt_0043 An example of the reference in * context follows: <text src="fire_safety.html#dol_1_4_rgn_cnt_0043" * id="rgn_txt_0004_0017"/> * * @param compositeReference to split * @return 2 strings, the first [0] contains the relative filename, the * second [1] contains the id. * @throws IllegalArgumentException if the composite reference doesn't match * the expected structure. */ public static String[] parseCompositeReference(String compositeReference) { String[] elements = compositeReference.split("#"); if (elements.length != 2) { throw new IllegalArgumentException( "Expected composite reference in the form uri#id, got " + compositeReference); } return elements; } @Override public String getText() { return doc.getElementById(id).text(); } @Override public boolean hasText() { final Element element = doc.getElementById(id); if (element == null || element.text() == null) { return false; } else { return true; } } public String getId() { // TODO 20120214 (jharty): Consider keeping the composite reference as // the ID since these IDs are only truly unique in the context of the // filename... return id; } }