package ecologylab.bigsemantics.html; import org.w3c.dom.Node; import ecologylab.bigsemantics.html.utils.StringBuilderUtils; import ecologylab.bigsemantics.model.text.TermVector; import ecologylab.serialization.XMLTools; /** * Keep the paragraph text in the document with the DOM Node to recognize the ArticleMain node. * * @author eunyee * */ public class ParagraphText { private StringBuilder buffy; private Node node; public ParagraphText() { // ptext = new String(); } public Node getNode() { return node; } public void setNode(Node blockNode) { this.node = blockNode; } //TODO -- get rid of this visibility, because it is prone to creating memory leaks. // Anyway, higher level passing of this is better, because it carries context with it. public StringBuilder getBuffy() { return buffy; } public int length() { return buffy == null ? 0 : buffy.length(); } public void setBuffy(StringBuilder buffy) { if (this.buffy != null) StringBuilderUtils.release(this.buffy); this.buffy = buffy; } /** * Append the argument to the buffy inside. * If buffy was not empty at the start of this operation, append a space first. * * @param toAppend * @return The number of characters added. */ public void append(CharSequence toAppend) { if (buffy == null) //TODO -- should this be built larger? how many calls are made on average? buffy = StringBuilderUtils.acquire(); else buffy.append(' '); buffy.append(toAppend); } public void append(byte[] bytes, int start, int end) { if (buffy == null) //TODO -- should this be built larger? how many calls are made on average? buffy = StringBuilderUtils.acquire(); else buffy.append(' '); while (start < end) { buffy.append((char) bytes[start++]); } } public void append(char[] bytes, int start, int end) { if (buffy == null) //TODO -- should this be built larger? how many calls are made on average? buffy = StringBuilderUtils.acquire(); else buffy.append(' '); while (start < end) { buffy.append((char) bytes[start++]); } } public void recycle() { if (buffy != null) { StringBuilderUtils.release(buffy); buffy = null; } if (termVector != null) { termVector.clear(); termVector = null; } } public Node getElementNode() { for (Node thisNode = node; thisNode != null; thisNode = thisNode.getParentNode()) { switch (thisNode.getNodeType()) { case Node.ELEMENT_NODE: return thisNode; } } return null; } public boolean hasText() { return buffy != null && buffy.length() > 0; } public boolean isEmpty() { return buffy == null || buffy.length() == 0; } public int indexOf(String s) { return (buffy == null) ? -1 : buffy.indexOf(s); } public void unescapeXML() { XMLTools.unescapeXML(buffy); } TermVector termVector; public TermVector termVector() { TermVector result = this.termVector; if (result == null) { result = new TermVector(buffy); this.termVector = result; } return result; } /** * Set the textContext for the ImgElement (HTMLElement) to the buffy of this. * * @param imgElement */ public void setImgElementTextContext(ImgElement imgElement) { imgElement.setTextContext(buffy); } }