Java Examples for org.jsoup.nodes.TextNode
The following java examples will help you to understand the usage of org.jsoup.nodes.TextNode. These source code samples are taken from different open source projects.
Example 1
| Project: OpenLegislation-master File: BillTextTest.java View source code |
void processNode(Element ele, StringBuilder stringBuilder) {
for (Node t : ele.childNodes()) {
if (t instanceof Element) {
Element e = (Element) t;
if (e.tag().getName().equals("u")) {
stringBuilder.append(e.text().toUpperCase());
stringBuilder.append("\n");
} else {
processNode(e, stringBuilder);
}
} else if (t instanceof TextNode) {
stringBuilder.append(((TextNode) t).text());
stringBuilder.append("\n");
}
}
}Example 2
| Project: cms-ce-master File: HtmlExtractor.java View source code |
@Override
public String extractText(final String mimeType, final InputStream inputStream, final String encoding) throws IOException {
if (!canHandle(mimeType)) {
return null;
}
StringBuilder builder = new StringBuilder();
Document doc = Jsoup.parse(inputStream, encoding, "");
for (Element element : doc.getAllElements()) {
for (TextNode textNode : element.textNodes()) {
final String text = textNode.text();
builder.append(text);
appendWhitespaceAfterTextIfNotThere(builder, text);
}
}
return builder.toString();
}Example 3
| Project: alfresco-apache-storm-demo-master File: JSoupDOMBuilder.java View source code |
/**
* The internal helper that copies content from the specified Jsoup <tt>Node</tt> into a W3C
* {@link Node}.
*
* @param node The Jsoup node containing the content to copy to the specified W3C {@link Node}.
* @param out The W3C {@link Node} that receives the DOM content.
*/
public static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
if (node instanceof org.jsoup.nodes.Document) {
org.jsoup.nodes.Document d = ((org.jsoup.nodes.Document) node);
for (org.jsoup.nodes.Node n : d.childNodes()) {
createDOM(n, out, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.Element) {
org.jsoup.nodes.Element e = ((org.jsoup.nodes.Element) node);
org.w3c.dom.Element _e = doc.createElement(e.tagName());
out.appendChild(_e);
org.jsoup.nodes.Attributes atts = e.attributes();
for (org.jsoup.nodes.Attribute a : atts) {
String attName = a.getKey();
// omit xhtml namespace
if (attName.equals("xmlns")) {
continue;
}
String attPrefix = getNSPrefix(attName);
if (attPrefix != null) {
if (attPrefix.equals("xmlns")) {
ns.put(getLocalName(attName), a.getValue());
} else if (!attPrefix.equals("xml")) {
String namespace = ns.get(attPrefix);
if (namespace == null) {
// fix attribute names looking like qnames
attName = attName.replace(':', '_');
}
}
}
_e.setAttribute(attName, a.getValue());
}
for (org.jsoup.nodes.Node n : e.childNodes()) {
createDOM(n, _e, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.TextNode) {
org.jsoup.nodes.TextNode t = ((org.jsoup.nodes.TextNode) node);
if (!(out instanceof Document)) {
out.appendChild(doc.createTextNode(t.text()));
}
}
}Example 4
| Project: stanbol-master File: DOMBuilder.java View source code |
/**
* The internal helper that copies content from the specified Jsoup <tt>Node</tt> into a W3C {@link Node}.
* @param node The Jsoup node containing the content to copy to the specified W3C {@link Node}.
* @param out The W3C {@link Node} that receives the DOM content.
*/
private static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
if (node instanceof org.jsoup.nodes.Document) {
org.jsoup.nodes.Document d = ((org.jsoup.nodes.Document) node);
for (org.jsoup.nodes.Node n : d.childNodes()) {
createDOM(n, out, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.Element) {
org.jsoup.nodes.Element e = ((org.jsoup.nodes.Element) node);
org.w3c.dom.Element _e = doc.createElement(e.tagName());
out.appendChild(_e);
org.jsoup.nodes.Attributes atts = e.attributes();
for (org.jsoup.nodes.Attribute a : atts) {
String attName = a.getKey();
//omit xhtml namespace
if (attName.equals("xmlns")) {
continue;
}
String attPrefix = getNSPrefix(attName);
if (attPrefix != null) {
if (attPrefix.equals("xmlns")) {
ns.put(getLocalName(attName), a.getValue());
} else if (!attPrefix.equals("xml")) {
String namespace = ns.get(attPrefix);
if (namespace == null) {
//fix attribute names looking like qnames
attName = attName.replace(':', '_');
}
}
}
_e.setAttribute(attName, a.getValue());
}
for (org.jsoup.nodes.Node n : e.childNodes()) {
createDOM(n, _e, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.TextNode) {
org.jsoup.nodes.TextNode t = ((org.jsoup.nodes.TextNode) node);
if (!(out instanceof Document)) {
out.appendChild(doc.createTextNode(t.text()));
}
}
}Example 5
| Project: tori-master File: DOMBuilder.java View source code |
/**
* The internal helper that copies content from the specified Jsoup
* <tt>Node</tt> into a W3C {@link Node}.
*
* @param node
* The Jsoup node containing the content to copy to the specified
* W3C {@link Node}.
* @param out
* The W3C {@link Node} that receives the DOM content.
*/
private static void createDOM(final org.jsoup.nodes.Node node, final Node out, final Document doc, final Map<String, String> ns) {
if (node instanceof org.jsoup.nodes.Document) {
org.jsoup.nodes.Document d = ((org.jsoup.nodes.Document) node);
for (org.jsoup.nodes.Node n : d.childNodes()) {
createDOM(n, out, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.Element) {
org.jsoup.nodes.Element e = ((org.jsoup.nodes.Element) node);
org.w3c.dom.Element _e = doc.createElement(e.tagName());
out.appendChild(_e);
org.jsoup.nodes.Attributes atts = e.attributes();
for (org.jsoup.nodes.Attribute a : atts) {
String attName = a.getKey();
// omit xhtml namespace
if (attName.equals("xmlns")) {
continue;
}
String attPrefix = getNSPrefix(attName);
if (attPrefix != null) {
if (attPrefix.equals("xmlns")) {
ns.put(getLocalName(attName), a.getValue());
} else if (!attPrefix.equals("xml")) {
String namespace = ns.get(attPrefix);
if (namespace == null) {
// fix attribute names looking like qnames
attName = attName.replace(':', '_');
}
}
}
_e.setAttribute(attName, a.getValue());
}
for (org.jsoup.nodes.Node n : e.childNodes()) {
createDOM(n, _e, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.TextNode) {
org.jsoup.nodes.TextNode t = ((org.jsoup.nodes.TextNode) node);
if (!(out instanceof Document)) {
out.appendChild(doc.createTextNode(t.text()));
}
}
}Example 6
| Project: jresponder-master File: TextUtil.java View source code |
/* ====================================================================== */
/**
* @param cell element that contains whitespace formatting
* @return
*/
public String getWholeText(Element cell) {
String text = null;
List<Node> childNodes = cell.childNodes();
if (childNodes.size() > 0) {
Node childNode = childNodes.get(0);
if (childNode instanceof TextNode) {
text = ((TextNode) childNode).getWholeText();
}
}
if (text == null) {
text = cell.text();
}
return text;
}Example 7
| Project: mbox_tools-master File: HTMLStripUtil.java View source code |
@Override
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
// non breaking space
String text = textNode.text().replace(' ', ' ').trim();
if (!text.isEmpty()) {
buffer.append(text);
if (!text.endsWith(" ")) {
// the last text gets appended the extra space too but we remove it later
buffer.append(" ");
}
}
}
}Example 8
| Project: mylyn.docs-master File: HtmlCleanerTest.java View source code |
@Test
public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2() {
// bug 406943
Document document = Document.createShell("");
document.body().appendChild(new TextNode("\n", ""));
document.body().appendChild(new TextNode("text", ""));
document.body().appendChild(new TextNode("\n", ""));
document.body().appendChild(new TextNode("\n", ""));
String result = cleanToBody(document);
assertEquals("<body>text</body>", result);
}Example 9
| Project: org.eclipse.mylyn.docs-master File: HtmlCleanerTest.java View source code |
@Test
public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2() {
// bug 406943
Document document = Document.createShell("");
document.body().appendChild(new TextNode("\n", ""));
document.body().appendChild(new TextNode("text", ""));
document.body().appendChild(new TextNode("\n", ""));
document.body().appendChild(new TextNode("\n", ""));
String result = cleanToBody(document);
assertEquals("<body>text</body>", result);
}Example 10
| Project: XCoLab-master File: EmailNotification.java View source code |
@Override
protected Node resolvePlaceholderTag(Element tag) {
final Node node = super.resolvePlaceholderTag(tag);
if (node != null) {
return node;
}
Contest contest = getContest();
Proposal proposal = getProposal();
final boolean hasProposal = contest != null && proposal != null;
final ContestType contestType = contest != null ? ContestClientUtil.getContestType(contest.getContestTypeId()) : null;
switch(tag.nodeName()) {
case COLAB_NAME_PLACEHOLDER:
return new TextNode(ConfigurationAttributeKey.COLAB_NAME.get(), "");
case COLAB_URL_PLACEHOLDER:
return new TextNode(ConfigurationAttributeKey.COLAB_URL.get(), "");
case COLAB_ADMIN_EMAIL_PLACEHOLDER:
return new TextNode(ConfigurationAttributeKey.ADMIN_EMAIL.get(), "");
case FIRSTNAME_PLACEHOLDER:
return new TextNode(getRecipient().getFirstName(), "");
case FULL_NAME_PLACEHOLDER:
return new TextNode(getRecipient().getFullName(), "");
case CONTEST_LINK_PLACEHOLDER:
if (contest != null) {
return parseXmlNode(getContestLink(contest));
}
break;
case PROPOSAL_LINK_PLACEHOLDER:
if (hasProposal) {
final String tab = tag.hasAttr("tab") ? tag.attr("tab") : null;
final String linkText;
if (StringUtils.isNotBlank(tag.ownText())) {
linkText = tag.ownText();
} else {
linkText = getProposalAttributeHelper().getAttributeValueString(ProposalAttributeKeys.NAME, "");
}
return parseXmlNode(getProposalLinkWithLinkText(contest, proposal, linkText, tab));
}
break;
case PROPOSAL_STRING_PLACEHOLDER:
if (contest != null && contestType != null) {
return new TextNode(contestType.getProposalName(), "");
}
break;
case PROPOSALS_STRING_PLACEHOLDER:
if (contest != null && contestType != null) {
return new TextNode(contestType.getProposalNamePlural(), "");
}
break;
case CONTEST_STRING_PLACEHOLDER:
if (contest != null && contestType != null) {
return new TextNode(contestType.getContestName(), "");
}
break;
case CONTESTS_STRING_PLACEHOLDER:
if (contest != null && contestType != null) {
return new TextNode(contestType.getContestNamePlural(), "");
}
break;
case TWITTER_PLACEHOLDER:
if (hasProposal) {
return parseXmlNode(getTwitterShareLink(getProposalLinkUrl(contest, proposal), tag.ownText()));
}
break;
case PINTEREST_PLACEHOLDER:
if (hasProposal) {
return parseXmlNode(getPinterestShareLink(getProposalLinkUrl(contest, proposal), tag.ownText()));
}
break;
case FACEBOOK_PLACEHOLDER:
if (hasProposal) {
return parseXmlNode(getFacebookShareLink(getProposalLinkUrl(contest, proposal)));
}
break;
case LINKEDIN_PLACEHOLDER:
if (hasProposal) {
return parseXmlNode(getLinkedInShareLink(getProposalLinkUrl(contest, proposal), tag.attr("title"), tag.ownText()));
}
break;
default:
}
return null;
}Example 11
| Project: iee-master File: TextPadParser.java View source code |
@Override
public void head(org.jsoup.nodes.Node node, int depth) {
INode newNode;
if (node instanceof org.jsoup.nodes.TextNode) {
newNode = new Text().setText(((org.jsoup.nodes.TextNode) node).text());
} else if (node instanceof org.jsoup.nodes.Element) {
Span span = new Span();
if (node.hasAttr("style")) {
TextStyle style = span.getStyle();
try {
CSSStyleDeclaration styleDecl = parser.parseStyleDeclaration(new InputSource(new StringReader(node.attr("style"))));
if ("italic".equals(styleDecl.getPropertyValue("font-style"))) {
style.setItalic(true);
} else {
style.setItalic(false);
}
if ("bold".equals(styleDecl.getPropertyValue("font-weight"))) {
style.setBold(true);
} else {
style.setBold(false);
}
if (styleDecl.getPropertyValue("font-family") != null) {
style.setFont(styleDecl.getPropertyValue("font-family"));
}
if (!Strings.isNullOrEmpty(styleDecl.getPropertyValue("font-size"))) {
style.setFontSize(new Integer(styleDecl.getPropertyValue("font-size")));
}
if (!Strings.isNullOrEmpty(styleDecl.getPropertyValue("color"))) {
RGBColor rgbColorValue = ((CSSPrimitiveValue) styleDecl.getPropertyCSSValue("color")).getRGBColorValue();
Color fg = new Color((int) rgbColorValue.getRed().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getGreen().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getBlue().getFloatValue(CSSPrimitiveValue.CSS_NUMBER));
style.setFgColor(fg);
}
if (!Strings.isNullOrEmpty(styleDecl.getPropertyValue("background-color"))) {
RGBColor rgbColorValue = ((CSSPrimitiveValue) styleDecl.getPropertyCSSValue("background-color")).getRGBColorValue();
Color bg = new Color((int) rgbColorValue.getRed().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getGreen().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getBlue().getFloatValue(CSSPrimitiveValue.CSS_NUMBER));
style.setBgColor(bg);
}
} catch (IOException e) {
e.printStackTrace();
}
}
newNode = span;
} else {
newNode = new Span();
}
stack.push(newNode);
}Example 12
| Project: serverside-elements-master File: ElementReflectHelper.java View source code |
public static NodeImpl wrap(org.jsoup.nodes.Node soupNode, Class<? extends Element> elementType) {
if (soupNode instanceof org.jsoup.nodes.TextNode) {
return new TextNodeImpl((org.jsoup.nodes.TextNode) soupNode);
// } else if (soupNode instanceof org.jsoup.nodes.DataNode) {
// return Elements.createText(((org.jsoup.nodes.DataNode) soupNode)
// .getWholeData());
} else if (soupNode instanceof org.jsoup.nodes.Element) {
org.jsoup.nodes.Element soupElement = (org.jsoup.nodes.Element) soupNode;
String tag = soupElement.tagName();
if (elementType == null) {
elementType = Elements.getRegisteredClass(tag);
}
if (elementType == null) {
return new ElementImpl(soupElement);
} else {
return (NodeImpl) wrapElement(elementType, soupElement);
}
} else {
throw new RuntimeException(soupNode.getClass().getName());
}
}Example 13
| Project: baleen-master File: ParagraphMarkedClassification.java View source code |
private void processParagraph(Element p) {
String text = p.text();
Matcher matcher = PARAGRAPH_MARKING.matcher(text);
if (matcher.find()) {
String classification = matcher.group(CLASSFICATION_GROUP);
MarkupUtils.additionallyAnnotateAsType(p, "uk.gov.dstl.baleen.types.metadata.ProtectiveMarking");
// TODO: We override this for simplicity but we could select the best classification etc
// (or output everything later and let a cleaner decide)
MarkupUtils.setAttribute(p, "classification", classification.trim());
// TODO: Ideally delete text the classification from the front.
// That needs a util as we need to eat up the children of p until we've got to the end.
// That's quite complex, you'd need to split down the text nodes across multiple children.
// We'll just remove the the first text node matching the classification we've found as an interim.
String marking = "(" + classification + ')';
for (org.jsoup.nodes.TextNode t : p.textNodes()) {
if (t.text().contains(marking)) {
String newText = t.text().replace(marking, "");
t.text(newText);
}
}
}
}Example 14
| Project: java-autolinker-master File: UrlAutoLinkerTest.java View source code |
@Test
public void createLinksShouldWork() {
final UrlAutoLinker autoLinker = new UrlAutoLinker(30);
List<Node> result;
Element a;
result = autoLinker.createLinks(new TextNode("das ist ein test ohne urls", ""));
Assert.assertTrue(result.size() == 1);
Assert.assertTrue(result.get(0) instanceof TextNode);
Assert.assertEquals("das ist ein test ohne urls", ((TextNode) result.get(0)).getWholeText());
result = autoLinker.createLinks(new TextNode("das ist eine url ohne twitter.com ohne protocoll", ""));
Assert.assertTrue(result.size() == 3);
Assert.assertTrue(result.get(0) instanceof TextNode);
Assert.assertEquals("das ist eine url ohne ", ((TextNode) result.get(0)).getWholeText());
Assert.assertTrue(result.get(1) instanceof Element);
a = (Element) result.get(1);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("http://twitter.com", a.attr("href"));
Assert.assertEquals("http://twitter.com", a.attr("title"));
Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
Assert.assertTrue(result.get(2) instanceof TextNode);
Assert.assertEquals(" ohne protocoll", ((TextNode) result.get(2)).getWholeText());
result = autoLinker.createLinks(new TextNode("twitter.com ohne protocoll am anfang", ""));
Assert.assertTrue(result.size() == 2);
Assert.assertTrue(result.get(0) instanceof Element);
a = (Element) result.get(0);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("http://twitter.com", a.attr("href"));
Assert.assertEquals("http://twitter.com", a.attr("title"));
Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
Assert.assertTrue(result.get(1) instanceof TextNode);
Assert.assertEquals(" ohne protocoll am anfang", ((TextNode) result.get(1)).getWholeText());
result = autoLinker.createLinks(new TextNode("twitter.com ohne protocoll am anfang mit am ende https://dailyfratze.de?foo=bar", ""));
Assert.assertTrue(result.size() == 3);
Assert.assertTrue(result.get(0) instanceof Element);
a = (Element) result.get(0);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("http://twitter.com", a.attr("href"));
Assert.assertEquals("http://twitter.com", a.attr("title"));
Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
Assert.assertTrue(result.get(1) instanceof TextNode);
Assert.assertEquals(" ohne protocoll am anfang mit am ende ", ((TextNode) result.get(1)).getWholeText());
Assert.assertTrue(result.get(2) instanceof Element);
a = (Element) result.get(2);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("href"));
Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("title"));
Assert.assertEquals("dailyfratze.de?foo=bar", ((TextNode) a.childNode(0)).getWholeText());
Assert.assertTrue(result.get(2) instanceof Element);
a = (Element) result.get(2);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("href"));
Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("title"));
Assert.assertEquals("dailyfratze.de?foo=bar", ((TextNode) a.childNode(0)).getWholeText());
result = autoLinker.createLinks(new TextNode("das ist eine url ohne https://dailyfratze.de/app/tags/CoStarring/Anton#taggedPictures ohne protocoll", ""));
Assert.assertTrue(result.get(1) instanceof Element);
a = (Element) result.get(1);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("https://dailyfratze.de/app/tags/CoStarring/Anton#taggedPictures", a.attr("href"));
Assert.assertEquals("https://dailyfratze.de/app/tags/CoStarring/Anton#taggedPictures", a.attr("title"));
Assert.assertEquals("dailyfratze.de/app/tags/CoSta…", ((TextNode) a.childNode(0)).getWholeText());
result = autoLinker.createLinks(new TextNode(" twitter.com ohne protocoll am anfang mit am ende https://dailyfratze.de?foo=bar ", ""));
Assert.assertTrue(result.size() == 5);
Assert.assertTrue(result.get(0) instanceof TextNode);
Assert.assertEquals(" ", ((TextNode) result.get(0)).getWholeText());
Assert.assertTrue(result.get(1) instanceof Element);
a = (Element) result.get(1);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("http://twitter.com", a.attr("href"));
Assert.assertEquals("http://twitter.com", a.attr("title"));
Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
Assert.assertTrue(result.get(2) instanceof TextNode);
Assert.assertEquals(" ohne protocoll am anfang mit am ende ", ((TextNode) result.get(2)).getWholeText());
Assert.assertTrue(result.get(3) instanceof Element);
a = (Element) result.get(3);
Assert.assertEquals("a", a.tagName());
Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("href"));
Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("title"));
Assert.assertEquals("dailyfratze.de?foo=bar", ((TextNode) a.childNode(0)).getWholeText());
Assert.assertTrue(result.get(4) instanceof TextNode);
Assert.assertEquals(" ", ((TextNode) result.get(4)).getWholeText());
}Example 15
| Project: mechanize-master File: HtmlElements.java View source code |
public HtmlNode getHtmlNode(org.jsoup.nodes.Node node) {
if (elementCache.containsKey(node)) {
return elementCache.get(node);
} else {
HtmlNode htmlNode = null;
if (node instanceof Element)
htmlNode = new HtmlElement(page, (Element) node);
else if (node instanceof TextNode)
htmlNode = new HtmlTextNode(page, (TextNode) node);
else
htmlNode = new HtmlNode(page, node);
elementCache.put(node, htmlNode);
return htmlNode;
}
}Example 16
| Project: Tanaguru-master File: DeepTextElementBuilder.java View source code |
@Override
public String buildTextFromElement(Element element) {
StringBuilder elementText = new StringBuilder();
if (element.hasAttr(ALT_ATTR)) {
elementText.append(SPACER);
elementText.append(altAttrTextBuilder.buildTextFromElement(element));
}
for (Node child : element.childNodes()) {
if (child instanceof TextNode && !((TextNode) child).isBlank()) {
elementText.append(SPACER);
elementText.append(StringUtils.trim(((TextNode) child).text()));
} else if (child instanceof Element) {
elementText.append(SPACER);
elementText.append(buildTextFromElement((Element) child));
}
}
return StringUtils.trim(elementText.toString());
}Example 17
| Project: jsoup-master File: XmlTreeBuilderTest.java View source code |
@Test
public void xmlFragment() {
String xml = "<one src='/foo/' />Two<three><four /></three>";
List<Node> nodes = Parser.parseXmlFragment(xml, "http://example.com/");
assertEquals(3, nodes.size());
assertEquals("http://example.com/foo/", nodes.get(0).absUrl("src"));
assertEquals("one", nodes.get(0).nodeName());
assertEquals("Two", ((TextNode) nodes.get(1)).text());
}Example 18
| Project: storm-crawler-master File: JSoupDOMBuilder.java View source code |
/**
* The internal helper that copies content from the specified Jsoup
* <tt>Node</tt> into a W3C {@link Node}.
*
* @param node
* The Jsoup node containing the content to copy to the specified
* W3C {@link Node}.
* @param out
* The W3C {@link Node} that receives the DOM content.
*/
public static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
if (node instanceof org.jsoup.nodes.Document) {
org.jsoup.nodes.Document d = (org.jsoup.nodes.Document) node;
for (org.jsoup.nodes.Node n : d.childNodes()) {
createDOM(n, out, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.Element) {
org.jsoup.nodes.Element e = (org.jsoup.nodes.Element) node;
org.w3c.dom.Element _e = doc.createElement(e.tagName());
out.appendChild(_e);
org.jsoup.nodes.Attributes atts = e.attributes();
for (org.jsoup.nodes.Attribute a : atts) {
String attName = a.getKey();
// omit xhtml namespace
if (attName.equals("xmlns")) {
continue;
}
String attPrefix = getNSPrefix(attName);
if (attPrefix != null) {
if (attPrefix.equals("xmlns")) {
ns.put(getLocalName(attName), a.getValue());
} else if (!attPrefix.equals("xml")) {
String namespace = ns.get(attPrefix);
if (namespace == null) {
// fix attribute names looking like qnames
attName = attName.replace(':', '_');
}
}
}
_e.setAttribute(attName, a.getValue());
}
for (org.jsoup.nodes.Node n : e.childNodes()) {
createDOM(n, _e, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.TextNode) {
org.jsoup.nodes.TextNode t = (org.jsoup.nodes.TextNode) node;
if (!(out instanceof Document)) {
out.appendChild(doc.createTextNode(t.text()));
}
} else if (node instanceof org.jsoup.nodes.Comment) {
if (!(out instanceof Document)) {
org.jsoup.nodes.Comment comment = (org.jsoup.nodes.Comment) node;
out.appendChild(doc.createComment(comment.getData()));
}
} else if (node instanceof org.jsoup.nodes.DataNode) {
if (!(out instanceof Document)) {
org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) node;
String whole = sourceData.getWholeData();
out.appendChild(doc.createTextNode(whole));
}
}
}Example 19
| Project: web-crawler-master File: JSoupDOMBuilder.java View source code |
/**
* The internal helper that copies content from the specified Jsoup
* <tt>Node</tt> into a W3C {@link Node}.
*
* @param node
* The Jsoup node containing the content to copy to the specified
* W3C {@link Node}.
* @param out
* The W3C {@link Node} that receives the DOM content.
*/
public static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
if (node instanceof org.jsoup.nodes.Document) {
org.jsoup.nodes.Document d = (org.jsoup.nodes.Document) node;
for (org.jsoup.nodes.Node n : d.childNodes()) {
createDOM(n, out, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.Element) {
org.jsoup.nodes.Element e = (org.jsoup.nodes.Element) node;
org.w3c.dom.Element _e = doc.createElement(e.tagName());
out.appendChild(_e);
org.jsoup.nodes.Attributes atts = e.attributes();
for (org.jsoup.nodes.Attribute a : atts) {
String attName = a.getKey();
// omit xhtml namespace
if (attName.equals("xmlns")) {
continue;
}
String attPrefix = getNSPrefix(attName);
if (attPrefix != null) {
if (attPrefix.equals("xmlns")) {
ns.put(getLocalName(attName), a.getValue());
} else if (!attPrefix.equals("xml")) {
String namespace = ns.get(attPrefix);
if (namespace == null) {
// fix attribute names looking like qnames
attName = attName.replace(':', '_');
}
}
}
_e.setAttribute(attName, a.getValue());
}
for (org.jsoup.nodes.Node n : e.childNodes()) {
createDOM(n, _e, doc, ns);
}
} else if (node instanceof org.jsoup.nodes.TextNode) {
org.jsoup.nodes.TextNode t = (org.jsoup.nodes.TextNode) node;
if (!(out instanceof Document)) {
out.appendChild(doc.createTextNode(t.text()));
}
} else if (node instanceof org.jsoup.nodes.Comment) {
if (!(out instanceof Document)) {
org.jsoup.nodes.Comment comment = (org.jsoup.nodes.Comment) node;
out.appendChild(doc.createComment(comment.getData()));
}
} else if (node instanceof org.jsoup.nodes.DataNode) {
if (!(out instanceof Document)) {
org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) node;
String whole = sourceData.getWholeData();
out.appendChild(doc.createTextNode(whole));
}
}
}Example 20
| Project: facelets-lite-master File: Test.java View source code |
String toNormalHtml(Document doc) {
doc.normalise();
doc.traverse(new NodeVisitor() {
@Override
public void tail(Node node, int depth) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
textNode.text(textNode.text().trim());
}
}
@Override
public void head(Node arg0, int arg1) {
}
});
return cleaner.clean(doc).html();
}Example 21
| Project: framework-master File: DeclarativeTestBaseBase.java View source code |
/**
* Produce predictable html (attributes in alphabetical order), always
* include close tags
*/
private String elementToHtml(Element producedElem, StringBuilder sb) {
HashSet<String> booleanAttributes = new HashSet<>();
ArrayList<String> names = new ArrayList<>();
for (Attribute a : producedElem.attributes().asList()) {
names.add(a.getKey());
if (a instanceof BooleanAttribute) {
booleanAttributes.add(a.getKey());
}
}
Collections.sort(names);
sb.append("<").append(producedElem.tagName()).append("");
for (String attrName : names) {
sb.append(" ").append(attrName);
if (!booleanAttributes.contains(attrName)) {
sb.append("=").append("\'").append(producedElem.attr(attrName)).append("\'");
}
}
sb.append(">");
for (Node child : producedElem.childNodes()) {
if (child instanceof Element) {
elementToHtml((Element) child, sb);
} else if (child instanceof TextNode) {
String text = ((TextNode) child).text();
sb.append(text.trim());
}
}
sb.append("</").append(producedElem.tagName()).append(">");
return sb.toString();
}Example 22
| Project: jbehave-core-master File: LoadFromConfluence.java View source code |
protected void cleanNodes(Element body, String tag) {
for (Element element : body.getElementsByTag(tag)) {
if (element == null || element.parent() == null) {
continue;
}
for (Element child : element.children().select(tag)) {
cleanNodes(child, tag);
}
element.replaceWith(new TextNode(element.text() + "<br/>", ""));
}
}Example 23
| Project: jinjava-master File: TruncateHtmlFilter.java View source code |
@Override
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode text = (TextNode) node;
String textContent = text.text();
if (textLen >= maxTextLen) {
text.text("");
} else if (textLen + textContent.length() > maxTextLen) {
int ptr = maxTextLen - textLen;
if (!killwords) {
ptr = Functions.movePointerToJustBeforeLastWord(ptr, textContent) - 1;
}
text.text(textContent.substring(0, ptr) + ending);
textLen = maxTextLen;
} else {
textLen += textContent.length();
}
}
}Example 24
| Project: kune-master File: ContentUnrenderer.java View source code |
// private static final Logger LOG =
// Logger.getLogger(ContentUnrenderer.class.getName());
/**
* Helper method to recursively parse a HTML element and construct a wave
* document.
*
* @param parent the parent
* @param output the output
* @param elements the elements
* @param annotations the annotations
*/
private static void unrender(final Node parent, final StringBuilder output, final Map<Integer, com.google.wave.api.Element> elements, final Annotations annotations) {
for (final Node node : parent.childNodes()) {
if (node instanceof TextNode) {
output.append(((TextNode) node).text());
} else if (node instanceof Element) {
final int position = output.length();
final Element element = (Element) node;
final String name = element.tag().getName();
if ("p".equalsIgnoreCase(name)) {
elements.put(position, new Line());
// handle any attributes?
}
// Additional HTML element tags here.
unrender(element, output, elements, annotations);
}
}
}Example 25
| Project: SMSnatcher-master File: LyricWikiScraper.java View source code |
public static String getLyrics(String artist, String title) {
// Prepare artist and title for LyricWiki's URL format
artist = artist.replace(' ', '_');
String mod_title = title.replace(' ', '_');
Logger.LogToStatusBar("Getting lyrics (" + artist + " : " + mod_title + ")!");
String url = "http://lyrics.wikia.com/" + artist + ":" + mod_title;
Logger.LogToStatusBar(url);
String lyrics = "";
// Try to load page using Jsoup
try {
// Load page into Document
Document doc = Jsoup.connect(url).get();
// Get lyricBox from page
Elements lyricBox = doc.select("div.lyricbox");
//System.out.println(lyricBox.hasText());
if (!lyricBox.hasText()) {
Logger.LogToStatusBar("Lyrics not found!");
return "";
}
// Remove ads and junk
lyricBox.get(0).select("div.rtMatcher").remove();
lyricBox.get(0).select("div.lyricsbreak").remove();
// Remove comments
ParseUtils.removeComments(lyricBox.get(0));
// We now have almost perfect lyrics.
lyrics = lyricBox.get(0).html();
TextNode t = TextNode.createFromEncoded(lyrics, "lyricwiki");
lyrics = t.getWholeText();
//System.out.println(lyrics);
//Remove minimal HTML tags, leaving newlines intact
lyrics = lyrics.replaceAll("<br />", "");
lyrics = lyrics.replaceAll("<i>", "");
lyrics = lyrics.replaceAll("</i>", "");
lyrics = lyrics.replaceAll("<b>", "");
lyrics = lyrics.replaceAll("</b>", "");
lyrics = lyrics.replaceAll("<p>", "");
lyrics = lyrics.replaceAll("</p>", "");
lyrics = lyrics.replaceAll("<", "<");
lyrics = lyrics.replaceAll(">", ">");
lyrics = lyrics.replaceAll("�", "\'");
// Check if LyricWiki has full lyrics or only portion
if (lyrics.contains("we are not licensed to display the full lyrics")) {
return "";
} else if (lyricBox.get(0).select("a").attr("title").contains("Instrumental")) {
return "Instrumental";
}
} catch (IOException e) {
Logger.LogToStatusBar("Lyrics not found!");
}
Logger.LogToStatusBar("Done");
return lyrics;
}Example 26
| Project: vaadin-master File: DeclarativeTestBaseBase.java View source code |
/**
* Produce predictable html (attributes in alphabetical order), always
* include close tags
*/
private String elementToHtml(Element producedElem, StringBuilder sb) {
HashSet<String> booleanAttributes = new HashSet<>();
ArrayList<String> names = new ArrayList<>();
for (Attribute a : producedElem.attributes().asList()) {
names.add(a.getKey());
if (a instanceof BooleanAttribute) {
booleanAttributes.add(a.getKey());
}
}
Collections.sort(names);
sb.append("<").append(producedElem.tagName()).append("");
for (String attrName : names) {
sb.append(" ").append(attrName);
if (!booleanAttributes.contains(attrName)) {
sb.append("=").append("\'").append(producedElem.attr(attrName)).append("\'");
}
}
sb.append(">");
for (Node child : producedElem.childNodes()) {
if (child instanceof Element) {
elementToHtml((Element) child, sb);
} else if (child instanceof TextNode) {
String text = ((TextNode) child).text();
sb.append(text.trim());
}
}
sb.append("</").append(producedElem.tagName()).append(">");
return sb.toString();
}Example 27
| Project: zongtui-webcrawler-master File: ElementOperator.java View source code |
@Override
public String operate(Element element) {
int index = 0;
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
if (group == 0) {
accum.append(textNode.text());
} else if (++index == group) {
return textNode.text();
}
}
}
return accum.toString();
}Example 28
| Project: Vega-master File: NodeImpl.java View source code |
static NodeImpl createFromJsoupNode(org.jsoup.nodes.Node node, Document ownerDocument) {
if (node == null)
return null;
else if (node instanceof org.jsoup.nodes.Element)
return HTMLElementImpl.create((Element) node, ownerDocument);
else if (node instanceof org.jsoup.nodes.TextNode)
return new TextImpl((org.jsoup.nodes.TextNode) node, ownerDocument);
else if (node instanceof org.jsoup.nodes.Comment)
return new CommentImpl((org.jsoup.nodes.Comment) node, ownerDocument);
else if (node instanceof org.jsoup.nodes.DataNode)
return new CharacterDataImpl((DataNode) node, ((DataNode) node).getWholeData(), ownerDocument);
else
return new NodeImpl(node, ownerDocument);
}Example 29
| Project: android-essentials-toolbox-master File: GenerateUndocumentedPermissions.java View source code |
/**
* Searches for the preceeding sibling level comment before the given xml permission element
* @param permissionElement
* @return
*/
private static org.jsoup.nodes.Comment getPreceedingComment(org.jsoup.nodes.Element permissionElement) {
org.jsoup.nodes.Node node = permissionElement;
while (true) {
node = node.previousSibling();
if (node instanceof Comment) {
return (org.jsoup.nodes.Comment) node;
} else if (node instanceof org.jsoup.nodes.TextNode) {
// important, there is a trailing whitespace character after the comment that is considered as a node
continue;
} else if (node instanceof org.jsoup.nodes.Element) {
return null;
}
}
}Example 30
| Project: bavrd-core-master File: Face.java View source code |
private String sanitize(Node n) {
String output;
if (n instanceof Element) {
StringBuffer inner = new StringBuffer();
for (Node child : n.childNodes()) {
inner.append(sanitize(child));
}
String text = inner.toString();
Element e = (Element) n;
if (e.tagName().equals("b")) {
output = formatBold(text);
} else if (e.tagName().equals("br")) {
output = formatNewLine();
} else if (e.tagName().equals("i")) {
output = formatItalic(text);
} else if (e.tagName().equals("code")) {
output = formatCode(text);
} else if (e.tagName().equals("img")) {
output = formatImg(e.attr("abs:src"), e.attr("alt"));
} else {
output = text;
}
} else if (n instanceof TextNode) {
output = ((TextNode) n).text();
} else
output = "";
//jsoup tends to add some whitespaces before and after <br>, let's get rid of them
if (n.nextSibling() instanceof Element && ((Element) n.nextSibling()).tagName().equals("br"))
output = output.replaceFirst("\\s+$", "");
if (n.previousSibling() instanceof Element && ((Element) n.previousSibling()).tagName().equals("br"))
output = output.replaceFirst("^\\s+", "");
return output;
}Example 31
| Project: CN1ML-NetbeansModule-master File: XmlTreeBuilderTest.java View source code |
@Test
public void xmlFragment() {
String xml = "<one src='/foo/' />Two<three><four /></three>";
List<Node> nodes = Parser.parseXmlFragment(xml, "http://example.com/");
assertEquals(3, nodes.size());
assertEquals("http://example.com/foo/", nodes.get(0).absUrl("src"));
assertEquals("one", nodes.get(0).nodeName());
assertEquals("Two", ((TextNode) nodes.get(1)).text());
}Example 32
| Project: link-bubble-master File: OutputFormatter.java View source code |
void appendTextSkipHidden(Element e, StringBuilder accum) {
for (Node child : e.childNodes()) {
if (unlikely(child))
continue;
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
accum.append(" ");
else if (element.tagName().equals("br"))
accum.append(" ");
appendTextSkipHidden(element, accum);
}
}
}Example 33
| Project: open-data-service-master File: PegelPortalMvSourceAdapter.java View source code |
private String extractText(Element element) {
StringBuilder builder = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
builder.append(node.toString());
} else if (node instanceof Element) {
builder.append(extractText((Element) node));
}
}
return builder.toString();
}Example 34
| Project: structured-content-tools-master File: StripHtmlPreprocessor.java View source code |
@Override
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
// non breaking space
String text = textNode.text().replace(' ', ' ').trim();
if (!text.isEmpty()) {
buffer.append(text);
if (!text.endsWith(" ")) {
// the last text gets appended the extra space too but we remove it later
buffer.append(" ");
}
}
}
}Example 35
| Project: Vaadin-SignatureField-master File: DeclarativeTestBaseBase.java View source code |
/**
* Produce predictable html (attributes in alphabetical order), always
* include close tags
*/
private String elementToHtml(Element producedElem, StringBuilder sb) {
ArrayList<String> names = new ArrayList<String>();
for (Attribute a : producedElem.attributes().asList()) {
names.add(a.getKey());
}
Collections.sort(names);
sb.append("<" + producedElem.tagName() + "");
for (String attrName : names) {
sb.append(" ").append(attrName).append("=").append("\'").append(producedElem.attr(attrName)).append("\'");
}
sb.append(">");
for (Node child : producedElem.childNodes()) {
if (child instanceof Element) {
elementToHtml((Element) child, sb);
} else if (child instanceof TextNode) {
String text = ((TextNode) child).text();
sb.append(text.trim());
}
}
sb.append("</").append(producedElem.tagName()).append(">");
return sb.toString();
}Example 36
| Project: aMatch-master File: QuestionSearch.java View source code |
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
// TextNodes carry all user-readable text in the DOM.
append(((TextNode) node).text());
else if (name.equals("li"))
append("\n * ");
else if (name.equals("dt"))
append(" ");
else if (StringUtil.in(name, "h1", "h2", "h3", "h4", "h5", "tr"))
// else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr"))
append("\n");
}Example 37
| Project: Android_RssReader-master File: Readability.java View source code |
// private static String GetArticleTitle(Element htmlNode)
// {
// if (htmlNode.getElementsByTag("title") == null)
// return null;
//
// Element titleNode = htmlNode.getElementsByTag("title").get(0);
//
// String currTitle, origTitle;
// currTitle = origTitle = GetInnerText(titleNode);
//
// if (Regex.IsMatch(currTitle, @" [\|\-] "))
// {
// currTitle = Regex.Replace(origTitle, @"(.*)[\|\-] .*", "$1");
//
// if (currTitle.Split(' ').Length < 3)
// {
// currTitle = origTitle.Replace(@"[^\|\-]*[\|\-](.*)", "$1");
// }
// }
// else if (currTitle.IndexOf(": ") != -1)
// {
// currTitle = Regex.Replace(origTitle, @".*:(.*)", "$1");
//
// if(currTitle.Split(' ').Length < 3)
// {
// currTitle = Regex.Replace(origTitle, @"[^:]*[:](.*)", "$1");
// }
// }
// else if (currTitle.Length > 150 || currTitle.Length < 15)
// {
// var hOnes = htmlNode.GetElementsByTagName("h1");
// if (hOnes.Count == 1)
// {
// currTitle = GetInnerText(hOnes[0]);
// }
// }
//
// if (currTitle.Split(' ').Length <= 4)
// {
// currTitle = origTitle;
// }
//
// return currTitle.Trim();
// }
private static String GetArticleContent(Document doc) {
Element body = doc.body();
List<Element> allElements = body.getAllElements();
List<Element> nodesToScore = new ArrayList<Element>();
for (int nodeIndex = 0, len = allElements.size(); nodeIndex < len; nodeIndex++) {
Element node = allElements.get(nodeIndex);
String unlikelyMatchString = node.hasAttr("class") ? node.attr("class") : "" + node.attr("id");
if (s_unlikelyCandidates.matcher(unlikelyMatchString).find() && !s_okMaybeItsACandidate.matcher(unlikelyMatchString).find() && !node.nodeName().equals("body") && !node.nodeName().equals("html") && !node.nodeName().equals("head")) {
node.remove();
continue;
}
if (node.nodeName().equals("p") || node.nodeName().equals("td") || node.nodeName().equals("pre")) {
nodesToScore.add(node);
}
if (node.nodeName().equals("div")) {
if (!s_divToPElements.matcher(node.html()).find()) {
if (node.ownerDocument() != null) {
Element newNode = node.ownerDocument().createElement("p");
newNode.html(node.html());
node.replaceWith(newNode);
nodesToScore.add(newNode);
}
} else {
for (Node childNode : node.childNodes()) {
if (childNode instanceof TextNode) {
if (node.ownerDocument() != null) {
Element p = node.ownerDocument().createElement("p");
p.html(((TextNode) childNode).text());
childNode.replaceWith(p);
}
}
}
}
}
}
Map<Element, Integer> scores = new HashMap<Element, Integer>();
List<Element> candidates = new ArrayList<Element>();
for (int pt = 0, len = nodesToScore.size(); pt < len; pt++) {
Element parentNode = nodesToScore.get(pt).parent();
Element grandParentNode = parentNode != null ? parentNode.parent() : null;
String innerText = GetInnerText(nodesToScore.get(pt));
if (parentNode == null)
continue;
if (parentNode.nodeName().equals("body"))
continue;
if (parentNode.nodeName().equals("html"))
continue;
if (parentNode.nodeName().equals("footer"))
continue;
if (parentNode != null && parentNode.hasAttr("class") && parentNode.attr("class").equals("copyright"))
continue;
if (innerText.length() < 25)
continue;
if (!scores.containsKey(parentNode)) {
scores.put(parentNode, CalculateNodeScore(parentNode));
candidates.add(parentNode);
}
if (grandParentNode != null && !scores.containsKey(grandParentNode)) {
scores.put(grandParentNode, CalculateNodeScore(grandParentNode));
candidates.add(grandParentNode);
}
int contentScore = 0;
contentScore++;
//for embed flash case
if (innerText.contains("embed") && (innerText.contains("youku") || innerText.contains("tudou") || innerText.contains("ku6") || innerText.contains("sohu") || innerText.contains("weiphone") || innerText.contains("56") || innerText.contains("youtube") || innerText.contains("qq")))
contentScore += 50;
contentScore += innerText.split("[,]|[,]").length;
contentScore += Math.min(innerText.length() / 100, 3);
int v = scores.get(parentNode);
v += contentScore;
scores.put(parentNode, v);
if (grandParentNode != null) {
v = scores.get(grandParentNode);
v += contentScore / 2;
scores.put(grandParentNode, v);
}
}
Element topCandidate = null;
for (Element cand : candidates) {
int v = scores.get(cand);
v = (int) (v * (1 - GetLinkDensity(cand)));
scores.put(cand, v);
if (topCandidate == null || scores.get(cand) > scores.get(topCandidate)) {
topCandidate = cand;
}
if (topCandidate == null || topCandidate.nodeName().equals("body")) {
topCandidate = doc.createElement("div");
topCandidate.html(body.html());
body.html("");
body.appendChild(topCandidate);
scores.put(topCandidate, CalculateNodeScore(topCandidate));
}
}
return topCandidate == null ? null : topCandidate.html();
}Example 38
| Project: anewjkuapp-master File: FeedEntryImpl.java View source code |
@Override
public String getShortDescription() {
String shortDescr = htmlToStr(getDescription());
try {
Document doc = Jsoup.parse(shortDescr);
Element body = doc.body();
if (body != null) {
List<TextNode> textNodes = body.textNodes();
if (textNodes.size() > 0) {
shortDescr = textNodes.get(0).getWholeText();
} else {
List<Element> children = body.children();
if (children.size() > 0) {
shortDescr = children.get(0).text();
} else {
shortDescr = doc.text();
}
}
} else {
shortDescr = doc.text();
}
shortDescr = shortDescr.trim();
Pattern p = Pattern.compile("(\\D\\.|\\?|\\!)(\\s+)");
Matcher m = p.matcher(shortDescr);
if (m.find()) {
shortDescr = shortDescr.substring(0, m.end());
}
if (shortDescr.length() > 350) {
shortDescr = shortDescr.substring(0, 175).trim() + "...";
}
} catch (Exception e) {
Log.e(getClass().getSimpleName(), "gsd failed", e);
}
return shortDescr.trim();
}Example 39
| Project: brightspot-cms-master File: RichTextViewBuilder.java View source code |
// Traverses the siblings all the way down the tree, collapsing balanced
// blocks of HTML that do NOT contain any rich text elements into a single
// HTML string.
private List<RichTextViewNode<V>> toViewNodes(List<Node> siblings) {
List<RichTextViewNode<V>> viewNodes = new ArrayList<>();
for (Node sibling : siblings) {
if (sibling instanceof Element) {
Element element = (Element) sibling;
RichTextElement rte = RichTextElement.fromElement(element);
ObjectType tagType = rte != null ? rte.getState().getType() : null;
if (rte != null && elementToView != null) {
viewNodes.add(new ElementRichTextViewNode<>(rte, elementToView));
} else if (tagType == null || keepUnboundElements) {
List<RichTextViewNode<V>> childViewNodes = toViewNodes(element.childNodes());
String html = element.outerHtml();
if (element.tag().isSelfClosing()) {
viewNodes.add(new StringRichTextViewNode<>(html, htmlToView));
} else {
int firstGtAt = html.indexOf('>');
int lastLtAt = html.lastIndexOf('<');
// This deliberately does not validate the index values
// above, since non-self-closing element should always
// have those characters present in the HTML.
viewNodes.add(new StringRichTextViewNode<>(html.substring(0, firstGtAt + 1), htmlToView));
viewNodes.addAll(childViewNodes);
viewNodes.add(new StringRichTextViewNode<>(html.substring(lastLtAt), htmlToView));
}
}
} else if (sibling instanceof TextNode) {
viewNodes.add(new StringRichTextViewNode<>(((TextNode) sibling).text(), htmlToView));
} else if (sibling instanceof DataNode) {
viewNodes.add(new StringRichTextViewNode<>(((DataNode) sibling).getWholeData(), htmlToView));
}
}
// Collapse the nodes as much as possible.
List<RichTextViewNode<V>> collapsed = new ArrayList<>();
List<StringRichTextViewNode<V>> adjacent = new ArrayList<>();
for (RichTextViewNode<V> childBuilderNode : viewNodes) {
if (childBuilderNode instanceof StringRichTextViewNode) {
adjacent.add((StringRichTextViewNode<V>) childBuilderNode);
} else {
collapsed.add(new StringRichTextViewNode<>(adjacent.stream().map(StringRichTextViewNode::getHtml).collect(Collectors.joining()), htmlToView));
adjacent.clear();
collapsed.add(childBuilderNode);
}
}
if (!adjacent.isEmpty()) {
collapsed.add(new StringRichTextViewNode<>(adjacent.stream().map(StringRichTextViewNode::getHtml).collect(Collectors.joining()), htmlToView));
}
return collapsed;
}Example 40
| Project: dogeared-extruder-master File: Readability.java View source code |
// CHECKSTYLE:OFF
private Element grabArticle(Element pageElement) {
boolean isPaging = pageElement != null;
if (pageElement == null) {
pageElement = body;
}
String pageCacheHtml = pageElement.html();
Elements allElements = pageElement.getAllElements();
/*
* Note: in Javascript, this list would be *live*. If you deleted a node from the tree, it and its
* children would remove themselves. To get the same effect, we make a linked list and we remove
* things from it. This won't win prizes for speed, but, then again, the code in Javascript has to be
* doing something nearly as awful.
*/
LinkedList<Element> allElementsList = new LinkedList<Element>();
allElementsList.addAll(allElements);
/**
* First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc),
* and turn divs into P tags where they have been used inappropriately (as in, where they contain no
* other block level elements.) Note: Assignment from index for performance. See
* http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 TODO: Shouldn't this be a reverse
* traversal?
**/
List<Element> nodesToScore = new ArrayList<Element>();
ListIterator<Element> elIterator = allElementsList.listIterator();
Set<Element> goodAsDead = new HashSet<Element>();
while (elIterator.hasNext()) {
Element node = elIterator.next();
if (goodAsDead.contains(node)) {
continue;
}
/* Remove unlikely candidates */
if (stripUnlikelyCandidates) {
String unlikelyMatchString = node.className() + node.id();
if (Patterns.exists(Patterns.UNLIKELY_CANDIDATES, unlikelyMatchString) && !Patterns.exists(Patterns.OK_MAYBE_ITS_A_CANDIDATE, unlikelyMatchString) && !"body".equals(node.tagName())) {
LOG.debug("Removing unlikely candidate - " + unlikelyMatchString);
List<Element> toRemoveAndBelow = node.getAllElements();
elIterator.remove();
/*
* adding 'node' to that set is harmless and reduces the code complexity here.
*/
goodAsDead.addAll(toRemoveAndBelow);
continue;
}
}
if ("p".equals(node.tagName()) || "td".equals(node.tagName()) || "pre".equals(node.tagName())) {
nodesToScore.add(node);
}
/*
* Turn all divs that don't have children block level elements into p's
*/
if ("div".equals(node.tagName())) {
boolean hasBlock = false;
for (Element divChild : node.getAllElements()) {
if (divChild != node) {
if (DIV_TO_P_ELEMENTS.contains(divChild.tagName())) {
hasBlock = true;
break;
}
}
}
if (!hasBlock) {
Element newElement = changeElementTag(node, "p");
nodesToScore.remove(node);
nodesToScore.add(newElement);
} else {
/* EXPERIMENTAL */
//*
int limit = node.childNodes().size();
for (int i = 0; i < limit; i++) {
Node childNode = node.childNodes().get(i);
if (childNode instanceof TextNode) {
Element p = document.createElement("p");
p.attr("basisInline", "true");
p.html(((TextNode) childNode).text());
childNode.replaceWith(p);
}
}
}
}
}
/**
* Loop through all paragraphs, and assign a score to them based on how content-y they look. Then add
* their score to their parent node. A score is determined by things like number of commas, class
* names, etc. Maybe eventually link density.
**/
List<Element> candidates = new ArrayList<Element>();
for (Element nodeToScore : nodesToScore) {
Element parentNode = nodeToScore.parent();
if (null == parentNode) {
// dropped previously.
continue;
}
Element grandParentNode = parentNode.parent();
if (grandParentNode == null) {
// ditto
continue;
}
String innerText = nodeToScore.text();
/*
* If this paragraph is less than 25 characters, don't even count it.
*/
if (innerText.length() < 25) {
continue;
}
/* Initialize readability data for the parent. */
if ("".equals(parentNode.attr("readability"))) {
initializeNode(parentNode);
candidates.add(parentNode);
}
/*
* If the grandparent has no parent, we don't want it as a candidate. It's probably a symptom that
* we're operating in an orphan.
*/
if (grandParentNode.parent() != null && "".equals(grandParentNode.attr("readability"))) {
initializeNode(grandParentNode);
candidates.add(grandParentNode);
}
double contentScore = 0;
/* Add a point for the paragraph itself as a base. */
contentScore++;
/* Add points for any commas within this paragraph */
contentScore += innerText.split(",").length;
/*
* For every 100 characters in this paragraph, add another point. Up to 3 points.
*/
contentScore += Math.min(Math.floor(innerText.length() / 100.0), 3.0);
/* Add the score to the parent. The grandparent gets half. */
incrementContentScore(parentNode, contentScore);
if (grandParentNode != null) {
incrementContentScore(grandParentNode, contentScore / 2.0);
}
}
/**
* After we've calculated scores, loop through all of the possible candidate nodes we found and find
* the one with the highest score.
**/
Element topCandidate = null;
for (Element candidate : candidates) {
/**
* Scale the final candidates score based on link density. Good content should have a relatively
* small link density (5% or less) and be mostly unaffected by this operation.
**/
double score = getContentScore(candidate);
double newScore = score * (1.0 - getLinkDensity(candidate));
setContentScore(candidate, newScore);
LOG.debug("Candidate [" + candidate.getClass() + "] (" + candidate.className() + ":" + candidate.id() + ") with score " + newScore);
if (null == topCandidate || newScore > getContentScore(topCandidate)) {
topCandidate = candidate;
}
}
/**
* If we still have no top candidate, just use the body as a last resort. We also have to copy the
* body node so it is something we can modify.
**/
if (topCandidate == null || topCandidate == body) {
topCandidate = document.createElement("div");
// not efficient but not likely.
topCandidate.html(pageElement.html());
pageElement.html("");
pageElement.appendChild(topCandidate);
initializeNode(topCandidate);
}
/**
* Now that we have the top candidate, look through its siblings for content that might also be
* related. Things like preambles, content split by ads that we removed, etc.
**/
Element articleContent = document.createElement("div");
if (isPaging) {
articleContent.attr("id", "readability-content");
}
double siblingScoreThreshold = Math.max(10, getContentScore(topCandidate) * 0.2);
List<Element> siblingNodes = topCandidate.parent().children();
for (Element siblingNode : siblingNodes) {
boolean scored = isElementScored(siblingNode);
boolean append = false;
LOG.debug("Looking at sibling node: [" + siblingNode.getClass() + "] (" + siblingNode.className() + ":" + siblingNode.id() + ")");
if (scored) {
LOG.debug("Sibling has score " + getContentScore(siblingNode));
} else {
LOG.debug("Sibling has score unknown");
}
if (siblingNode == topCandidate) {
append = true;
}
double contentBonus = 0;
/*
* Give a bonus if sibling nodes and top candidates have the example same classname
*/
if (siblingNode.className().equals(topCandidate.className()) && !"".equals(topCandidate.className())) {
contentBonus += getContentScore(topCandidate) * 0.2;
}
if (scored && (getContentScore(siblingNode) + contentBonus >= siblingScoreThreshold)) {
append = true;
}
if ("p".equals(siblingNode.tagName())) {
double linkDensity = getLinkDensity(siblingNode);
String nodeContent = siblingNode.text();
int nodeLength = nodeContent.length();
if (nodeLength > 80 && linkDensity < 0.25) {
append = true;
} else if (nodeLength < 80 && linkDensity == 0 && Patterns.exists(Patterns.ENDS_WITH_DOT, nodeContent)) {
append = true;
}
}
if (append) {
LOG.debug("Appending node: [" + siblingNode.getClass() + "]");
Element nodeToAppend = null;
if (!"div".equals(siblingNode.tagName()) && !"p".equals(siblingNode.tagName())) {
/*
* We have a node that isn't a common block level element, like a form or td tag. Turn it
* into a div so it doesn't get filtered out later by accident.
*/
LOG.debug("Altering siblingNode of " + siblingNode.tagName() + " to div.");
nodeToAppend = changeElementTag(siblingNode, "div");
} else {
nodeToAppend = siblingNode;
}
/*
* To ensure a node does not interfere with readability styles, remove its classnames
*/
nodeToAppend.removeAttr("class");
/*
* Append sibling and subtract from our list because it removes the node when you append to
* another node
*/
articleContent.appendChild(nodeToAppend);
}
}
document.body().empty();
document.body().appendChild(articleContent);
/**
* So we have all of the content that we need. Now we clean it up for presentation.
**/
prepArticle(articleContent);
/**
* Now that we've gone through the full algorithm, check to see if we got any meaningful content. If
* we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
* likelihood of finding the content, and the sieve approach gives us a higher likelihood of finding
* the -right- content.
**/
if (articleContent.text().length() < 250) {
pageElement.html(pageCacheHtml);
if (stripUnlikelyCandidates) {
try {
stripUnlikelyCandidates = false;
return grabArticle(pageElement);
} finally {
stripUnlikelyCandidates = true;
}
} else if (classWeight) {
try {
classWeight = false;
return grabArticle(pageElement);
} finally {
classWeight = true;
}
} else if (cleanConditionally) {
try {
cleanConditionally = false;
return grabArticle(pageElement);
} finally {
cleanConditionally = true;
}
} else {
return null;
}
}
return articleContent;
}Example 41
| Project: ez-vcard-master File: HCardElement.java View source code |
private void visitForValue(Element element, StringBuilder value) {
for (Node node : element.childNodes()) {
if (node instanceof Element) {
Element e = (Element) node;
if (e.classNames().contains("type")) {
//ignore "type" elements
continue;
}
if ("br".equals(e.tagName())) {
//convert "<br>" to a newline
value.append(NEWLINE);
continue;
}
if ("del".equals(e.tagName())) {
//skip "<del>" tags
continue;
}
visitForValue(e, value);
continue;
}
if (node instanceof TextNode) {
TextNode t = (TextNode) node;
value.append(t.text());
continue;
}
}
}Example 42
| Project: Java-readability-master File: Readability.java View source code |
// CHECKSTYLE:OFF
private Element grabArticle(Element pageElement) {
boolean isPaging = pageElement != null;
if (pageElement == null) {
pageElement = body;
}
String pageCacheHtml = pageElement.html();
Elements allElements = pageElement.getAllElements();
/*
* Note: in Javascript, this list would be *live*. If you deleted a node from the tree, it and its
* children would remove themselves. To get the same effect, we make a linked list and we remove
* things from it. This won't win prizes for speed, but, then again, the code in Javascript has to be
* doing something nearly as awful.
*/
LinkedList<Element> allElementsList = new LinkedList<Element>();
allElementsList.addAll(allElements);
/**
* First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc),
* and turn divs into P tags where they have been used inappropriately (as in, where they contain no
* other block level elements.) Note: Assignment from index for performance. See
* http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 TODO: Shouldn't this be a reverse
* traversal?
**/
List<Element> nodesToScore = new ArrayList<Element>();
ListIterator<Element> elIterator = allElementsList.listIterator();
Set<Element> goodAsDead = new HashSet<Element>();
while (elIterator.hasNext()) {
Element node = elIterator.next();
if (goodAsDead.contains(node)) {
continue;
}
/* Remove unlikely candidates */
if (stripUnlikelyCandidates) {
String unlikelyMatchString = node.className() + node.id();
if (Patterns.exists(Patterns.UNLIKELY_CANDIDATES, unlikelyMatchString) && !Patterns.exists(Patterns.OK_MAYBE_ITS_A_CANDIDATE, unlikelyMatchString) && !"body".equals(node.tagName())) {
LOG.debug("Removing unlikely candidate - " + unlikelyMatchString);
List<Element> toRemoveAndBelow = node.getAllElements();
elIterator.remove();
/*
* adding 'node' to that set is harmless and reduces the code complexity here.
*/
goodAsDead.addAll(toRemoveAndBelow);
continue;
}
}
if ("p".equals(node.tagName()) || "td".equals(node.tagName()) || "pre".equals(node.tagName())) {
nodesToScore.add(node);
}
/*
* Turn all divs that don't have children block level elements into p's
*/
if ("div".equals(node.tagName())) {
boolean hasBlock = false;
for (Element divChild : node.getAllElements()) {
if (divChild != node) {
if (DIV_TO_P_ELEMENTS.contains(divChild.tagName())) {
hasBlock = true;
break;
}
}
}
if (!hasBlock) {
Element newElement = changeElementTag(node, "p");
nodesToScore.remove(node);
nodesToScore.add(newElement);
} else {
/* EXPERIMENTAL */
//*
int limit = node.childNodes().size();
for (int i = 0; i < limit; i++) {
Node childNode = node.childNodes().get(i);
if (childNode instanceof TextNode) {
Element p = document.createElement("p");
p.attr("basisInline", "true");
p.html(((TextNode) childNode).text());
childNode.replaceWith(p);
}
}
}
}
}
/**
* Loop through all paragraphs, and assign a score to them based on how content-y they look. Then add
* their score to their parent node. A score is determined by things like number of commas, class
* names, etc. Maybe eventually link density.
**/
List<Element> candidates = new ArrayList<Element>();
for (Element nodeToScore : nodesToScore) {
Element parentNode = nodeToScore.parent();
if (null == parentNode) {
// dropped previously.
continue;
}
Element grandParentNode = parentNode.parent();
if (grandParentNode == null) {
// ditto
continue;
}
String innerText = nodeToScore.text();
/*
* If this paragraph is less than 25 characters, don't even count it.
*/
if (innerText.length() < 25) {
continue;
}
/* Initialize readability data for the parent. */
if ("".equals(parentNode.attr("readability"))) {
initializeNode(parentNode);
candidates.add(parentNode);
}
/*
* If the grandparent has no parent, we don't want it as a candidate. It's probably a symptom that
* we're operating in an orphan.
*/
if (grandParentNode.parent() != null && "".equals(grandParentNode.attr("readability"))) {
initializeNode(grandParentNode);
candidates.add(grandParentNode);
}
double contentScore = 0;
/* Add a point for the paragraph itself as a base. */
contentScore++;
/* Add points for any commas within this paragraph */
contentScore += innerText.split(",").length;
/*
* For every 100 characters in this paragraph, add another point. Up to 3 points.
*/
contentScore += Math.min(Math.floor(innerText.length() / 100.0), 3.0);
/* Add the score to the parent. The grandparent gets half. */
incrementContentScore(parentNode, contentScore);
if (grandParentNode != null) {
incrementContentScore(grandParentNode, contentScore / 2.0);
}
}
/**
* After we've calculated scores, loop through all of the possible candidate nodes we found and find
* the one with the highest score.
**/
Element topCandidate = null;
for (Element candidate : candidates) {
/**
* Scale the final candidates score based on link density. Good content should have a relatively
* small link density (5% or less) and be mostly unaffected by this operation.
**/
double score = getContentScore(candidate);
double newScore = score * (1.0 - getLinkDensity(candidate));
setContentScore(candidate, newScore);
LOG.debug("Candidate [" + candidate.getClass() + "] (" + candidate.className() + ":" + candidate.id() + ") with score " + newScore);
if (null == topCandidate || newScore > getContentScore(topCandidate)) {
topCandidate = candidate;
}
}
/**
* If we still have no top candidate, just use the body as a last resort. We also have to copy the
* body node so it is something we can modify.
**/
if (topCandidate == null || topCandidate == body) {
topCandidate = document.createElement("div");
// not efficient but not likely.
topCandidate.html(pageElement.html());
pageElement.html("");
pageElement.appendChild(topCandidate);
initializeNode(topCandidate);
}
/**
* Now that we have the top candidate, look through its siblings for content that might also be
* related. Things like preambles, content split by ads that we removed, etc.
**/
Element articleContent = document.createElement("div");
if (isPaging) {
articleContent.attr("id", "readability-content");
}
double siblingScoreThreshold = Math.max(10, getContentScore(topCandidate) * 0.2);
List<Element> siblingNodes = topCandidate.parent().children();
for (Element siblingNode : siblingNodes) {
boolean scored = isElementScored(siblingNode);
boolean append = false;
LOG.debug("Looking at sibling node: [" + siblingNode.getClass() + "] (" + siblingNode.className() + ":" + siblingNode.id() + ")");
if (scored) {
LOG.debug("Sibling has score " + getContentScore(siblingNode));
} else {
LOG.debug("Sibling has score unknown");
}
if (siblingNode == topCandidate) {
append = true;
}
double contentBonus = 0;
/*
* Give a bonus if sibling nodes and top candidates have the example same classname
*/
if (siblingNode.className().equals(topCandidate.className()) && !"".equals(topCandidate.className())) {
contentBonus += getContentScore(topCandidate) * 0.2;
}
if (scored && (getContentScore(siblingNode) + contentBonus >= siblingScoreThreshold)) {
append = true;
}
if ("p".equals(siblingNode.tagName())) {
double linkDensity = getLinkDensity(siblingNode);
String nodeContent = siblingNode.text();
int nodeLength = nodeContent.length();
if (nodeLength > 80 && linkDensity < 0.25) {
append = true;
} else if (nodeLength < 80 && linkDensity == 0 && Patterns.exists(Patterns.ENDS_WITH_DOT, nodeContent)) {
append = true;
}
}
if (append) {
LOG.debug("Appending node: [" + siblingNode.getClass() + "]");
Element nodeToAppend = null;
if (!"div".equals(siblingNode.tagName()) && !"p".equals(siblingNode.tagName())) {
/*
* We have a node that isn't a common block level element, like a form or td tag. Turn it
* into a div so it doesn't get filtered out later by accident.
*/
LOG.debug("Altering siblingNode of " + siblingNode.tagName() + " to div.");
nodeToAppend = changeElementTag(siblingNode, "div");
} else {
nodeToAppend = siblingNode;
}
/*
* To ensure a node does not interfere with readability styles, remove its classnames
*/
nodeToAppend.removeAttr("class");
/*
* Append sibling and subtract from our list because it removes the node when you append to
* another node
*/
articleContent.appendChild(nodeToAppend);
}
}
document.body().empty();
document.body().appendChild(articleContent);
/**
* So we have all of the content that we need. Now we clean it up for presentation.
**/
prepArticle(articleContent);
/**
* Now that we've gone through the full algorithm, check to see if we got any meaningful content. If
* we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher
* likelihood of finding the content, and the sieve approach gives us a higher likelihood of finding
* the -right- content.
**/
if (articleContent.text().length() < 250) {
pageElement.html(pageCacheHtml);
if (stripUnlikelyCandidates) {
try {
stripUnlikelyCandidates = false;
return grabArticle(pageElement);
} finally {
stripUnlikelyCandidates = true;
}
} else if (classWeight) {
try {
classWeight = false;
return grabArticle(pageElement);
} finally {
classWeight = true;
}
} else if (cleanConditionally) {
try {
cleanConditionally = false;
return grabArticle(pageElement);
} finally {
cleanConditionally = true;
}
} else {
return null;
}
}
return articleContent;
}Example 43
| Project: jooby-master File: Doc.java View source code |
@Override
public void head(final Node node, final int depth) {
if (!isInToc) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
// non-break spaces
String txt = textNode.text().replaceAll(" ", " ");
builder.append(txt);
} else if (node instanceof Element) {
Element element = (Element) node;
switch(element.tagName()) {
case "span":
case "blockquote":
// ignored
break;
case "ol":
case "ul":
listDepth += 1;
case "br":
case "p":
builder.append("\n");
break;
case "div":
builder.append("\n");
break;
case "h1":
builder.append("\n# ");
break;
case "h2":
builder.append("\n## ");
break;
case "h3":
builder.append("\n### ");
break;
case "h4":
builder.append("\n#### ");
case "b":
case "strong":
builder.append("**");
break;
case "cite":
case "i":
case "u":
builder.append("*");
break;
case "a":
builder.append('[');
break;
case "li":
for (int i = 0; i < listDepth - 1; i++) {
builder.append(" ");
}
builder.append(element.parent().tagName().equals("ol") ? "1. " : "* ");
break;
case "code":
builder.append("`");
break;
case "strike":
builder.append("<").append(element.tagName()).append(">");
break;
case "img":
String src = element.attr("src");
String alt = element.attr("alt");
alt = alt == null ? "" : alt;
if (src != null) {
builder.append(".append(src).append(")\n");
}
break;
case "pre":
builder.append("```\n");
break;
case "hr":
builder.append("\n***\n");
break;
case "font":
String face = element.attr("face");
if (face != null && face.contains("monospace")) {
builder.append("`");
}
break;
default:
log.debug("Unhandled element {}", element.tagName());
}
}
}
}Example 44
| Project: LastCalc-master File: Renderers.java View source code |
private static void renderObject(final String baseUri, final Map<String, Integer> variables, final Element renderTo, final Object obj) {
renderTo.append(" ");
if (obj instanceof Map) {
final Map<Object, Object> map = (Map<Object, Object>) obj;
final int mapSize = map.size();
final Element mapSpan = renderTo.appendElement("span").addClass("map");
mapSpan.append("{");
int count = 0;
for (final Entry<Object, Object> e : map.entrySet()) {
renderObject(baseUri, variables, mapSpan, e.getKey());
mapSpan.append(" :");
renderObject(baseUri, variables, mapSpan, e.getValue());
if (count < mapSize - 1) {
mapSpan.append(", ");
}
count++;
}
mapSpan.append("}");
final int textLength = mapSpan.text().length();
if (textLength > 120) {
mapSpan.html("{ too big (" + textLength + " chars) }");
}
} else if (obj instanceof List) {
final List<Object> list = (List<Object>) obj;
final int listSize = list.size();
final Element listSpan = renderTo.appendElement("span").addClass("map");
listSpan.append("[");
int count = 0;
for (final Object e : list) {
renderObject(baseUri, variables, listSpan, e);
if (count < listSize - 1) {
listSpan.append(", ");
}
count++;
}
listSpan.append("]");
final int textLength = listSpan.text().length();
if (textLength > 120) {
listSpan.html("[ too big (" + textLength + " chars) ]");
}
} else if (obj instanceof Amount) {
final Amount<?> amount = (Amount<?>) obj;
Unit<? extends Quantity> unit = amount.getUnit();
log.log(Level.INFO, "Amount: " + amount + ", unit type: " + unit.getClass().getCanonicalName());
final Element amountSpan = renderTo.appendElement("span").addClass("amount");
final double estimatedValue = amount.getEstimatedValue();
if (unit instanceof Currency) {
final Element currencySpan = amountSpan.appendElement("span").addClass("currency");
final Currency currency = (Currency) unit;
if (currency.getCode().equalsIgnoreCase("USD")) {
currencySpan.html("US$" + currencyFormat.format(estimatedValue));
} else if (currency.getCode().equalsIgnoreCase("GBP")) {
currencySpan.html("£" + currencyFormat.format(estimatedValue));
} else if (currency.getCode().equalsIgnoreCase("EUR")) {
currencySpan.html("€" + currencyFormat.format(estimatedValue));
} else if (currency.getCode().equalsIgnoreCase("JPY")) {
currencySpan.html("¥" + currencyFormat.format(estimatedValue));
} else {
currencySpan.text(currencyFormat.format(estimatedValue) + currency.getCode());
}
} else if (unit.equals(NonSI.FAHRENHEIT) || unit.equals(SI.CELSIUS) || unit.equals(SI.KELVIN)) {
// Avoid "33 fahrenheits"
final Element temperatureSpan = amountSpan.appendElement("span").addClass("temperature");
temperatureSpan.text(estimatedValue + unit.toString());
} else {
final String numStr = Misc.numberFormat.format(estimatedValue);
amountSpan.appendElement("span").addClass("number").text(numStr);
amountSpan.appendText(" ");
if (!unit.equals(Unit.ONE)) {
final Element unitSpan = amountSpan.appendElement("span").addClass("recognized");
final String verboseName = estimatedValue == 1.0 ? UnitParser.verboseNamesSing.get(unit) : UnitParser.verboseNamesPlur.get(unit);
if (verboseName != null) {
unitSpan.text(verboseName);
} else {
unitSpan.text(unit.toString());
}
}
}
} else if (obj instanceof org.jscience.mathematics.number.Number) {
final org.jscience.mathematics.number.Number<?> num = (org.jscience.mathematics.number.Number<?>) obj;
final String numStr = Misc.numberFormat.format(num.doubleValue());
renderTo.appendElement("span").addClass("number").text(numStr);
} else if (obj instanceof Radix) {
renderTo.appendElement("span").addClass("number").text(obj.toString());
} else if (obj instanceof UserDefinedParser) {
renderTo.appendChild(toHtml(baseUri, ((UserDefinedParser) obj).after));
} else if (variables.containsKey(obj)) {
final String color = variableColors.get(variables.get(obj) % variableColors.size());
renderTo.appendElement("span").addClass("highlighted").addClass("variable").addClass(color).text((String) obj);
} else if (obj instanceof String && Character.isUpperCase(((String) obj).charAt(0))) {
renderTo.appendElement("span").addClass("highlighted").addClass("variable").addClass("white").text((String) obj);
} else if (obj instanceof DocumentWrapper) {
renderTo.append("<html>" + ((DocumentWrapper) obj).title() + " ... </html>");
} else {
renderTo.appendChild(new TextNode(obj.toString(), baseUri));
}
}Example 45
| Project: Lightning-Browser-master File: OutputFormatter.java View source code |
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}Example 46
| Project: NiceText-master File: NTHelper.java View source code |
private void removeFat(Document doc) {
//String[] commonLinks = new String[] {"subscribe",""}
for (String UNWRAP_TAG : UNWRAP_TAGS) {
doc.select(UNWRAP_TAG).unwrap();
}
for (Element element : doc.body().getElementsByTag("br")) {
if (element != null && element.tagName().equalsIgnoreCase("br")) {
element.replaceWith(new TextNode("\n", null));
}
}
for (Element element : doc.body().getAllElements()) {
String tagName = element.tagName();
if (tagName.equalsIgnoreCase("script") || tagName.equalsIgnoreCase("noscript") || tagName.equalsIgnoreCase("style")) {
element.remove();
} else if (tagName.equalsIgnoreCase("a")) {
if (element.text().length() > 40) {
element.remove();
} else if (!POSSIBLE_TEXT_NODES.matcher(element.parent().tagName()).matches() || element.parent().ownText().length() == 0) {
element.remove();
}
} else if (element.text().length() < WORDS_T) {
element.remove();
} else if (element.ownText().split("\\|").length > 3) {
element.remove();
}
/*else if (element.ownText().contains("...")) {
element.remove();
}*/
}
}Example 47
| Project: shopb2b-master File: Article.java View source code |
@Transient
public String[] getPageContents() {
if (StringUtils.isEmpty(this.content))
return new String[] { "" };
if (this.content.contains(contentBreake))
return this.content.split(contentBreake);
ArrayList<String> localArrayList = new ArrayList<String>();
org.jsoup.nodes.Document localDocument = Jsoup.parse(this.content);
List<Node> localList = localDocument.body().childNodes();
if (localList != null) {
int i = 0;
StringBuffer localStringBuffer = new StringBuffer();
Iterator<Node> localIterator = localList.iterator();
while (localIterator.hasNext()) {
Node localObject1 = (Node) localIterator.next();
Object localObject2;
if ((localObject1 instanceof org.jsoup.nodes.Element)) {
localObject2 = (org.jsoup.nodes.Element) localObject1;
localStringBuffer.append(((org.jsoup.nodes.Element) localObject2).outerHtml());
i += ((org.jsoup.nodes.Element) localObject2).text().length();
if (i < MAX_PAGE_CONTENT_COUNT)
continue;
localArrayList.add(localStringBuffer.toString());
i = 0;
localStringBuffer.setLength(0);
} else {
if (!(localObject1 instanceof TextNode))
continue;
localObject2 = (TextNode) localObject1;
String str1 = ((TextNode) localObject2).text();
String[] arrayOfString1 = pattern.split(str1);
Matcher localMatcher = pattern.matcher(str1);
for (String str2 : arrayOfString1) {
if (localMatcher.find())
str2 = str2 + localMatcher.group();
localStringBuffer.append(str2);
i += str2.length();
if (i < MAX_PAGE_CONTENT_COUNT)
continue;
localArrayList.add(localStringBuffer.toString());
i = 0;
localStringBuffer.setLength(0);
}
}
}
String localObject1 = localStringBuffer.toString();
if (StringUtils.isNotEmpty((String) localObject1))
localArrayList.add(localObject1);
}
return (String[]) localArrayList.toArray(new String[localArrayList.size()]);
}Example 48
| Project: SubTools-master File: JAddic7edApi.java View source code |
public List<Addic7edSubtitleDescriptor> searchSubtitles(String showname, int season, int episode, String title) throws Exception {
// http://www.addic7ed.com/serie/Smallville/9/11/Absolute_Justice
String url = "http://www.addic7ed.com/serie/" + showname.toLowerCase().replace(" ", "_") + "/" + season + "/" + episode + "/" + title.toLowerCase().replace(" ", "_").replace("#", "");
String content = this.getContent(false, url);
List<Addic7edSubtitleDescriptor> lSubtitles = new ArrayList<Addic7edSubtitleDescriptor>();
Document doc = Jsoup.parse(content);
String titel = null;
Elements elTitel = doc.getElementsByClass("titulo");
if (elTitel.size() == 1) {
titel = elTitel.get(0).html().substring(0, elTitel.get(0).html().indexOf("<") - 1).trim();
}
String uploader, version, lang, download = null;
boolean hearingImpaired = false;
Elements blocks = doc.getElementsByClass("tabel95");
blocks = blocks.select("table[width=100%]");
for (Element block : blocks) {
uploader = "";
version = null;
lang = null;
download = null;
hearingImpaired = false;
Elements classesNewsTitle = block.getElementsByClass("NewsTitle");
Elements classesNewsDate = block.getElementsByClass("newsDate").select("td[colspan=3]");
Elements imgHearingImpaired = block.select("img").select("img[title~=Hearing]");
if (classesNewsTitle.size() == 1 && classesNewsDate.size() == 1) {
TextNode tn = (TextNode) classesNewsTitle.get(0).childNode(1);
Matcher m = pattern.matcher(tn.text());
if (!m.find()) {
break;
} else {
version = m.group().substring(0, m.group().lastIndexOf(",")).replace("Version", "") + (" ") + classesNewsDate.get(0).text().trim();
uploader = block.getElementsByTag("a").select("a[href*=user/]").get(0).text();
hearingImpaired = imgHearingImpaired.size() > 0;
}
}
if (version != null) {
Elements tds = block.select("tr:contains(Completed)");
Elements reqTds = tds.select("td").not("td[rowspan=2]");
for (Element td : reqTds) {
if (td.hasClass("language")) {
lang = td.html().substring(0, td.html().indexOf("<"));
}
if (lang != null && td.toString().toLowerCase().contains("completed")) {
// incompleted not wanted
if (td.html().toLowerCase().contains("% completed"))
lang = null;
}
if (lang != null && td.getElementsByClass("buttonDownload").size() > 0) {
Elements a = td.getElementsByClass("buttonDownload");
if (a.size() == 1) {
download = "http://www.addic7ed.com" + a.get(0).attr("href");
}
if (a.size() == 2) {
download = "http://www.addic7ed.com" + a.get(1).attr("href");
}
}
if (lang != null && download != null && titel != null) {
Addic7edSubtitleDescriptor sub = new Addic7edSubtitleDescriptor();
sub.setUploader(uploader);
sub.setTitel(titel.trim());
sub.setVersion(version.trim());
sub.setUrl(download);
sub.setLanguage(lang.trim());
sub.setHearingImpaired(hearingImpaired);
if (!isDuplicate(lSubtitles, sub)) {
lSubtitles.add(sub);
}
lang = null;
download = null;
}
}
}
}
return lSubtitles;
}Example 49
| Project: uzlee-master File: ThreadsParser.java View source code |
public Threads parseMessages(String html) {
Document doc = getDoc(html);
Elements pms = doc.select("ul.pm_list li.s_clear");
Threads threads = new Threads();
for (Element pm : pms) {
try {
Elements eUser = pm.select("p.cite a");
String userName = eUser.text();
String userLink = eUser.attr("href");
String uid = Utils.getUriQueryParameter(userLink).get("uid");
User u = new User().setId(Integer.valueOf(uid)).setName(userName);
String title = pm.select("div.summary").text();
boolean isNew = pm.select("img[alt=NEW]").size() != 0;
String dateStr = ((TextNode) pm.select("p.cite").get(0).childNode(2)).text().replaceAll(" ", "");
Thread thread = new Thread().setTitle(title).setAuthor(u).setNew(isNew).setDateStr(dateStr);
threads.add(thread);
} catch (Exception e) {
e.printStackTrace();
}
}
int currPage = 1;
Elements page = doc.select("div.pages > strong");
if (page.size() > 0) {
currPage = Integer.valueOf(page.first().text());
}
boolean hasNextPage = doc.select("div.pages > a[href$=&page=" + (currPage + 1) + "]").size() > 0;
threads.getMeta().setHasNextPage(hasNextPage);
threads.getMeta().setPage(currPage);
return threads;
}Example 50
| Project: web-entity-extractor-ACL2014-master File: KnowledgeTreeBuilder.java View source code |
/**
* Convert jsoup Element (= an HTML tag and its content) into a knowledge tree.
* Contents inside style tag (CSS) and script tag (JavaScript) are ignored.
*
* @param elt The jsoup Element corresponding to the root of the tree
* @param parent The parent of the created tree's root node.
*/
public void convertElementToKTree(Element elt, KNode parent) {
String eltText = LingUtils.normalize(elt.text(), opts.earlyNormalizeEntities);
KNode currentNode = parent.createChild(KNode.Type.TAG, elt.tagName(), eltText.length() > opts.maxFullTextLength ? null : eltText);
// Add children
for (Node child : elt.childNodes()) {
if (child instanceof Element) {
convertElementToKTree((Element) child, currentNode);
} else if (child instanceof TextNode) {
if (!opts.ignoreTextNodes) {
String text = LingUtils.normalize(((TextNode) child).text(), opts.earlyNormalizeEntities);
if (!text.isEmpty()) {
//currentNode.createChild(KNode.Type.TEXT, text, text);
currentNode.createChild(KNode.Type.TAG, "text", text.length() > opts.maxFullTextLength ? null : text);
}
}
}
}
// Add attributes
for (Attribute attr : elt.attributes()) {
currentNode.createAttribute(attr.getKey(), attr.getValue());
}
}Example 51
| Project: awesome-blogs-android-master File: DocumentConverter.java View source code |
// Utility method to quickly walk the DOM tree and estimate the size of the
// buffer necessary to hold the result.
private static int calculateLength(Element el, int depth) {
int result = 0;
for (final Node n : el.childNodes()) {
if (n instanceof Element) {
result += (4 * depth) + calculateLength((Element) n, depth + 1);
} else if (n instanceof TextNode) {
result += ((TextNode) n).text().length();
}
}
return result;
}Example 52
| Project: elasticsearch-river-remote-master File: GetSitemapHtmlClient.java View source code |
@Override
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
// non breaking space
String text = textNode.text().replace(' ', ' ').trim();
if (!text.isEmpty()) {
buffer.append(text);
if (!text.endsWith(" ")) {
buffer.append(" ");
}
}
}
}Example 53
| Project: JAVMovieScraper-master File: AvEntertainmentParsingProfile.java View source code |
@Override
public Runtime scrapeRuntime() {
String runtime = "";
Elements elements = document.select("div[id=titlebox] ul li");
for (Element element : elements) {
if (element.childNodeSize() == 3) {
Node childNode = element.childNode(2);
if (childNode instanceof TextNode && (element.childNode(1).childNode(0).toString().startsWith("Playing time") || element.childNode(1).childNode(0).toString().startsWith("�録時間"))) {
String data = element.childNode(2).toString();
Pattern pattern = Pattern.compile("\\d+");
Matcher matcher = pattern.matcher(data);
if (matcher.find()) {
runtime = matcher.group();
break;
}
}
}
}
return new Runtime(runtime);
}Example 54
| Project: jHTML2Md-master File: HTML2Md.java View source code |
private static String getTextContent(Element element) {
ArrayList<MDLine> lines = new ArrayList<MDLine>();
List<Node> children = element.childNodes();
for (Node child : children) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
MDLine line = getLastLine(lines);
if (line.getContent().equals("")) {
if (!textNode.isBlank()) {
line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*"));
}
} else {
line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*"));
}
} else if (child instanceof Element) {
Element childElement = (Element) child;
processElement(childElement, lines);
} else {
System.out.println();
}
}
int blankLines = 0;
StringBuilder result = new StringBuilder();
for (int i = 0; i < lines.size(); i++) {
String line = lines.get(i).toString().trim();
if (line.equals("")) {
blankLines++;
} else {
blankLines = 0;
}
if (blankLines < 2) {
result.append(line);
if (i < lines.size() - 1) {
result.append("\n");
}
}
}
return result.toString();
}Example 55
| Project: jodtemplate-master File: HtmlStylizer.java View source code |
private List<Element> process(final org.jsoup.nodes.Element element, final Element arPr, final Element apPr, final Slide slide) throws IOException {
if (BR_TAG.equals(element.tagName())) {
return Arrays.asList(new Element(PPTXDocument.BR_ELEMENT, getDrawingmlNamespace()));
}
final List<org.jsoup.nodes.Element> tags = getAllTags(element);
final List<Element> elements = new ArrayList<>();
for (Node node : element.childNodes()) {
if (node instanceof org.jsoup.nodes.Element) {
elements.addAll(process((org.jsoup.nodes.Element) node, arPr, apPr, slide));
} else if (node instanceof TextNode) {
final TextNode textNode = (TextNode) node;
elements.add(createTextElement(tags, arPr, textNode, slide));
}
}
if (LI_TAG.equals(element.tagName())) {
return createListElements(tags, elements, apPr, element);
}
if (P_TAG.equals(element.tagName())) {
return Arrays.asList(createParagraphElement(elements, apPr));
}
return elements;
}Example 56
| Project: Mover-master File: MoverParser.java View source code |
public Channel getChannelExpandedInfo(Element element, Channel channel) {
Elements channelBox = element.select("div#channel-box");
// Parsed user picture source link
String userPicture = channelBox.select("a.userpic img").first().attr("src");
channel.setPicture(userPicture);
// Channel Display Name
String displayName = channelBox.select("div.info div.user").first().text();
channel.setDisplayName(displayName);
String videosCount = channelBox.select("div.info div.videos").first().text();
channel.setVideosCount(internalGetIntegers(videosCount));
List<TextNode> dataNodes = channelBox.select("div.data").first().textNodes();
// Magic 1 is to get only registrationDate text information
// Here is HARD CORE NEVER REPEAT THIS CODE CUT
String registrationDate = dataNodes.get(1).text().replace("РегиÑ?трациÑ?:", "").trim();
channel.setRegistrationDate(parseRussianFormat(PROFILE_FORMAT, registrationDate));
String profileViewsCount = dataNodes.get(2).text();
channel.setProfileViewsCount(internalGetIntegers(profileViewsCount));
return channel;
}Example 57
| Project: scheduler-legacy-master File: CourseParser.java View source code |
/**
* Parse the Catalog Entry page for a given course to retrieve the long description of the course, the credit
* hour breakdown, and the department of the course
*
* @param document the Catalog Entry page HTML document
* @param values the retrieved course data set, including the newly added Catalog Entry values
*/
private void parseCatalogEntry(Document document, Map<String, String> values) {
//Long description is in the first text node in the table
Element longDetailElement = document.select("table.datadisplaytable td.ntdefault").first();
String longDetail = longDetailElement.textNodes().get(0).toString();
values.put("description", longDetail);
//Credit hours are in TextNodes following the long description
List<TextNode> creditNodes = longDetailElement.textNodes();
for (TextNode creditNode : creditNodes) {
String text = creditNode.text();
try (Scanner scanner = new Scanner(text)) {
scanner.useDelimiter(" ");
if (text.contains("TO")) {
logger.debug("Found credit range entry, will attempt to use max value in range. Range: {}", text);
//Some catalog entries use the "X.000 TO Y.000" Credits format for the credit hours
//in almost all cases, X is 0, so we take Y as the credit count - skip "X.000" and "TO"
scanner.next();
scanner.next();
}
if (scanner.hasNextDouble()) {
double value = scanner.nextDouble();
String component = scanner.next();
values.put("credit." + component, Double.toString(value));
logger.debug("Found credit hour entry: {}={}", component, value);
} else {
logger.debug("Expected credit hour text node, found instead: {}", text);
}
}
}
//Department always seems to be 3rd text node from the end of the table
String department = longDetailElement.textNodes().get(longDetailElement.textNodes().size() - 3).toString();
values.put("department", department);
}Example 58
| Project: symphony-master File: Markdowns.java View source code |
@Override
public void head(final org.jsoup.nodes.Node node, int depth) {
if (node instanceof org.jsoup.nodes.TextNode) {
final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
final org.jsoup.nodes.Node parent = textNode.parent();
if (parent instanceof org.jsoup.nodes.Element) {
final Element parentElem = (Element) parent;
if (!parentElem.tagName().equals("code")) {
String text = textNode.getWholeText();
if (null != userQueryService) {
try {
final Set<String> userNames = userQueryService.getUserNames(text);
for (final String userName : userNames) {
text = text.replace('@' + userName + " ", "@<a href='" + Latkes.getServePath() + "/member/" + userName + "'>" + userName + "</a> ");
}
text = text.replace("@participants ", "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
} finally {
JdbcRepository.dispose();
}
}
if (text.contains("@<a href=")) {
final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem, "");
final int index = textNode.siblingIndex();
parentElem.insertChildren(index, nodes);
toRemove.add(node);
} else {
textNode.text(Pangu.spacingText(text));
}
}
}
}
}Example 59
| Project: holoreader-master File: RefreshFeedService.java View source code |
private ContentValues prepareArticle(int feedID, String guid, String link, Date pubdate, String title, String summary, String content) {
boolean missingContent = false;
boolean missingSummary = false;
if (content == null) {
missingContent = true;
}
if (summary == null) {
missingSummary = true;
}
if (missingContent && missingSummary) {
return null;
}
if (missingContent) {
content = summary;
} else if (missingSummary) {
summary = content;
}
Document parsedContent = Jsoup.parse(content);
Elements iframes = parsedContent.getElementsByTag("iframe");
TextNode placeholder = new TextNode("(video removed)", null);
for (Element mIframe : iframes) {
mIframe.replaceWith(placeholder);
}
content = parsedContent.html();
Document parsedSummary = Jsoup.parse(summary);
Elements pics = parsedSummary.getElementsByTag("img");
for (Element pic : pics) {
pic.remove();
}
summary = parsedSummary.text();
if (summary.length() > SUMMARY_MAXLENGTH) {
summary = summary.substring(0, SUMMARY_MAXLENGTH) + "...";
}
Element image = parsedContent.select("img").first();
ContentValues contentValues = new ContentValues();
contentValues.put(ArticleDAO.FEEDID, feedID);
contentValues.put(ArticleDAO.GUID, guid);
contentValues.put(ArticleDAO.LINK, link);
contentValues.put(ArticleDAO.PUBDATE, SQLiteHelper.fromDate(pubdate));
contentValues.put(ArticleDAO.TITLE, title);
contentValues.put(ArticleDAO.SUMMARY, summary);
contentValues.put(ArticleDAO.CONTENT, content);
if (image != null) {
contentValues.put(ArticleDAO.IMAGE, image.absUrl("src"));
}
contentValues.put(ArticleDAO.ISDELETED, 0);
return contentValues;
}Example 60
| Project: Ouroboros-master File: CommentParser.java View source code |
private CharSequence parseFormatting(Element bodyLine, String currentBoard, String resto, FragmentManager fragmentManager, InfiniteDbHelper infiniteDbHelper) {
CharSequence parsedText = "";
for (Node childNode : bodyLine.childNodes()) {
if (childNode instanceof TextNode) {
parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(((TextNode) childNode).text())));
} else if (childNode instanceof Element) {
Element childElement = (Element) childNode;
switch(childElement.tagName()) {
default:
parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(childElement.text())));
break;
case "span":
CharSequence spanText = parseSpanText(childElement);
parsedText = TextUtils.concat(parsedText, spanText);
break;
case "em":
parsedText = TextUtils.concat(parsedText, parseItalicText(new SpannableString(childElement.text())));
break;
case "strong":
parsedText = TextUtils.concat(parsedText, parseBoldText(new SpannableString(childElement.text())));
break;
case "u":
parsedText = TextUtils.concat(parsedText, parseUnderlineText(new SpannableString(childElement.text())));
break;
case "s":
parsedText = TextUtils.concat(parsedText, parseStrikethroughText(new SpannableString(childElement.text())));
break;
case "a":
parsedText = TextUtils.concat(parsedText, parseAnchorText(childElement, currentBoard, resto, fragmentManager, infiniteDbHelper));
}
}
}
return parsedText;
}Example 61
| Project: TuCanMobile-master File: EventsScraper.java View source code |
/**
* Gibt einzelne Events in einem ListAdapter zurück.
*
* @param content
* Content div Element
* @return ListAdapter
* @author Daniel Thiem
*/
private ListAdapter getApplicationSingleItems(Element content) {
final Element coursestatusTable = content.select("table.tbcoursestatus").first();
if (coursestatusTable != null) {
Elements moduleTable = coursestatusTable.select("tr");
ListAdapter singleEventAdapter = null;
if (moduleTable.size() > 0) {
// Einzelne Veranstaltungen werden angeboten
ArrayList<String> itemName = new ArrayList<String>();
ArrayList<String> itemInstructor = new ArrayList<String>();
ArrayList<String> itemDate = new ArrayList<String>();
ArrayList<Boolean> isModule = new ArrayList<Boolean>();
for (Element next : moduleTable) {
final Elements cols = next.select("td");
Element firstCol = cols.first();
if (firstCol != null && cols.size() == 4) {
final Element secondCol = cols.get(1);
List<Node> innerChilds = secondCol.childNodes();
if (firstCol.hasClass("tbsubhead")) {
if (innerChilds.size() == 4) {
final Node instructorNode = innerChilds.get(3);
if (instructorNode instanceof TextNode) {
String moduleInstructor = ((TextNode) instructorNode).text();
String moduleName = secondCol.select("span.eventTitle").text();
String moduleDeadline = cols.get(2).text();
itemName.add(moduleName);
itemInstructor.add(moduleInstructor);
itemDate.add(moduleDeadline);
isModule.add(true);
}
}
} else if (firstCol.hasClass("tbdata")) {
// Es handelt sich um ein Event
String eventName = null, eventInstructor = null, eventDates = null;
if (innerChilds.size() == 1) {
// Event nur mit Namen
final String evNmHtml = secondCol.html();
eventName = TucanMobile.getEventNameByString(evNmHtml);
eventInstructor = "";
eventDates = "";
} else if (innerChilds.size() == 7) {
// Event mit Vollinformationen
final Node instructorNode = innerChilds.get(4);
final Node dateNode = innerChilds.get(6);
if (instructorNode instanceof TextNode && dateNode instanceof TextNode) {
eventName = secondCol.select("span.eventTitle").text();
eventInstructor = ((TextNode) instructorNode).text().trim();
eventDates = ((TextNode) dateNode).text().trim();
}
} else if (innerChilds.size() == 5) {
// Event ohne Datum
final Node instructorNode = innerChilds.get(4);
if (instructorNode instanceof TextNode) {
eventName = secondCol.select("span.eventTitle").text();
eventInstructor = ((TextNode) instructorNode).text().trim();
eventDates = "";
}
}
itemName.add(eventName);
itemInstructor.add(eventInstructor);
itemDate.add(eventDates);
isModule.add(false);
}
}
}
// Adapter zum zurückgeben erstellen
singleEventAdapter = new HighlightedThreeLinesAdapter(context, itemName, itemInstructor, itemDate, isModule);
}
return singleEventAdapter;
}
return null;
}Example 62
| Project: WebCollector-master File: ContentExtractor.java View source code |
protected CountInfo computeInfo(Node node) {
if (node instanceof Element) {
Element tag = (Element) node;
CountInfo countInfo = new CountInfo();
for (Node childNode : tag.childNodes()) {
CountInfo childCountInfo = computeInfo(childNode);
countInfo.textCount += childCountInfo.textCount;
countInfo.linkTextCount += childCountInfo.linkTextCount;
countInfo.tagCount += childCountInfo.tagCount;
countInfo.linkTagCount += childCountInfo.linkTagCount;
countInfo.leafList.addAll(childCountInfo.leafList);
countInfo.densitySum += childCountInfo.density;
countInfo.pCount += childCountInfo.pCount;
}
countInfo.tagCount++;
String tagName = tag.tagName();
if (tagName.equals("a")) {
countInfo.linkTextCount = countInfo.textCount;
countInfo.linkTagCount++;
} else if (tagName.equals("p")) {
countInfo.pCount++;
}
int pureLen = countInfo.textCount - countInfo.linkTextCount;
int len = countInfo.tagCount - countInfo.linkTagCount;
if (pureLen == 0 || len == 0) {
countInfo.density = 0;
} else {
countInfo.density = (pureLen + 0.0) / len;
}
infoMap.put(tag, countInfo);
return countInfo;
} else if (node instanceof TextNode) {
TextNode tn = (TextNode) node;
CountInfo countInfo = new CountInfo();
String text = tn.text();
int len = text.length();
countInfo.textCount = len;
countInfo.leafList.add(len);
return countInfo;
} else {
return new CountInfo();
}
}Example 63
| Project: yobi-master File: AutoLinkRenderer.java View source code |
private AutoLinkRenderer parse(Pattern pattern, ToLink toLink) {
Document doc = Jsoup.parse(body);
Document.OutputSettings settings = doc.outputSettings();
settings.prettyPrint(false);
Elements elements = doc.getElementsMatchingOwnText(pattern);
for (Element el : elements) {
if (isIgnoreElement(el)) {
continue;
}
List<TextNode> textNodeList = el.textNodes();
for (TextNode node : textNodeList) {
String result = convertLink(node.text(), pattern, toLink);
node.text(StringUtils.EMPTY);
node.after(result);
}
}
this.body = doc.body().html();
return this;
}Example 64
| Project: asta4d-master File: Asta4DTagSupportHtmlTreeBuilder.java View source code |
void insert(Token.Character characterToken) {
Node node;
// characters in script and style go in as datanodes, not text nodes
String tagName = currentElement().tagName();
if (tagName.equals("script") || tagName.equals("style"))
node = new DataNode(characterToken.getData(), baseUri);
else
node = new TextNode(characterToken.getData(), baseUri);
// doesn't use insertNode, because we don't foster these; and will always have a stack.
currentElement().appendChild(node);
}Example 65
| Project: LTB-android-master File: LTCScraper.java View source code |
public ArrayList<Prediction> getPredictions(LTCRoute route, String stopNumber, ScrapeStatus scrapeStatus) {
// usually get 3 of them
ArrayList<Prediction> predictions = new ArrayList<Prediction>(3);
Resources res = context.getResources();
try {
Calendar now = Calendar.getInstance();
now.set(Calendar.SECOND, 0);
// now we have 'now' set to the current time
now.set(Calendar.MILLISECOND, 0);
Document doc = parseDocFromUri(proxyPredictionPath(route, stopNumber), ltcPredictionPath(route, stopNumber), INITIAL_FETCH_TIMEOUT);
Elements divs = doc.select("div");
if (divs.size() == 0) {
throw new ScrapeException("LTC down?", ScrapeStatus.PROBLEM_IMMEDIATELY, true);
}
//Log.i("GP", String.format("rows=%d", timeRows.size()));
for (Element div : divs) {
//Log.i("GP", String.format("cols=%d", cols.size()));
List<TextNode> textNodes = div.textNodes();
for (TextNode node : textNodes) {
String text = node.text();
Matcher noBusMatcher = NO_BUS_PATTERN.matcher(text);
if (noBusMatcher.find()) {
throw new ScrapeException(res.getString(R.string.no_further), ScrapeStatus.PROBLEM_IF_ALL, false);
}
Matcher noStopMatcher = NO_INFO_PATTERN.matcher(text);
if (noStopMatcher.find()) {
throw new ScrapeException(res.getString(R.string.no_service), ScrapeStatus.PROBLEM_IF_ALL, false);
}
Matcher arrivalMatcher = ARRIVAL_PATTERN.matcher(text);
while (arrivalMatcher.find()) {
String textTime = arrivalMatcher.group(1);
String destination = arrivalMatcher.group(2);
predictions.add(new Prediction(route, textTime, destination, now));
}
}
}
if (predictions.size() == 0) {
throw new ScrapeException(res.getString(R.string.no_bus), ScrapeStatus.PROBLEM_IF_ALL, true);
}
scrapeStatus.setStatus(ScrapeStatus.OK, ScrapeStatus.NOT_PROBLEM, null);
} catch (ScrapeException e) {
scrapeStatus.setStatus(ScrapeStatus.FAILED, e.problemType, e.getMessage());
predictions.add(new Prediction(route, e.getMessage(), e.seriousProblem));
} catch (SocketTimeoutException e) {
scrapeStatus.setStatus(ScrapeStatus.FAILED, ScrapeStatus.PROBLEM_IMMEDIATELY, e.getMessage());
predictions.add(new Prediction(context, route, R.string.times_timeout, true));
} catch (IOException e) {
scrapeStatus.setStatus(ScrapeStatus.FAILED, ScrapeStatus.PROBLEM_IMMEDIATELY, e.getMessage());
predictions.add(new Prediction(context, route, R.string.times_fail, true));
}
return predictions;
}Example 66
| Project: opacclient-master File: SISIS.java View source code |
public SearchRequestResult parse_search(String html, int page) throws OpacErrorException, SingleResultFound {
Document doc = Jsoup.parse(html);
doc.setBaseUri(opac_url + "/searchfoo");
if (doc.select(".error").size() > 0) {
throw new OpacErrorException(doc.select(".error").text().trim());
} else if (doc.select(".nohits").size() > 0) {
throw new OpacErrorException(doc.select(".nohits").text().trim());
} else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
}
int results_total = -1;
String resultnumstr = doc.select(".box-header h2").first().text();
if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
throw new SingleResultFound();
} else if (resultnumstr.contains("(")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
} else if (resultnumstr.contains(": ")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
}
Elements table = doc.select("table.data tbody tr");
identifier = null;
Elements links = doc.select("table.data a");
boolean haslink = false;
for (int i = 0; i < links.size(); i++) {
Element node = links.get(i);
if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
haslink = true;
try {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING);
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("identifier")) {
identifier = nv.getValue();
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
if (tr.select("td img[title]").size() > 0) {
String title = tr.select("td img").get(0).attr("title");
String[] fparts = tr.select("td img").get(0).attr("src").split("/");
String fname = fparts[fparts.length - 1];
MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
MediaType default_by_title = defaulttypes.get(title);
MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
} catch (JSONExceptionIllegalArgumentException | e) {
sr.setType(default_name);
}
} else {
sr.setType(default_name);
}
}
String alltext = tr.text();
if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
sr.setType(MediaType.MP3);
} else if (alltext.contains("eVideo")) {
sr.setType(MediaType.EVIDEO);
} else if (alltext.contains("eBook")) {
sr.setType(MediaType.EBOOK);
} else if (alltext.contains("Munzinger")) {
sr.setType(MediaType.EDOC);
}
if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
if (sr.getCover().contains("showCover.do")) {
downloadCover(sr);
}
}
Element middlething;
if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
middlething = tr.child(2);
} else {
middlething = tr.child(1);
}
List<Node> children = middlething.childNodes();
if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
if (indiv.children().size() > 1) {
children = indiv.childNodes();
}
} else if (middlething.select("span.titleData").size() == 1) {
children = middlething.select("span.titleData").first().childNodes();
}
int childrennum = children.size();
List<String[]> strings = new ArrayList<>();
for (int ch = 0; ch < childrennum; ch++) {
Node node = children.get(ch);
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (text.length() > 3) {
strings.add(new String[] { "text", "", text });
}
} else if (node instanceof Element) {
List<Node> subchildren = node.childNodes();
for (int j = 0; j < subchildren.size(); j++) {
Node subnode = subchildren.get(j);
if (subnode instanceof TextNode) {
String text = ((TextNode) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
}
} else if (subnode instanceof Element) {
String text = ((Element) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
}
}
}
}
}
StringBuilder description = null;
if (tr.select("span.Z3988").size() == 1) {
// Sometimes there is a <span class="Z3988"> item which provides
// data in a standardized format.
List<NameValuePair> z3988data;
boolean hastitle = false;
try {
description = new StringBuilder();
z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
for (NameValuePair nv : z3988data) {
if (nv.getValue() != null) {
if (!nv.getValue().trim().equals("")) {
if (nv.getName().equals("rft.btitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.atitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.au")) {
description.append("<br />").append(nv.getValue());
} else if (nv.getName().equals("rft.date")) {
description.append("<br />").append(nv.getValue());
}
}
}
}
} catch (URISyntaxException e) {
description = null;
}
}
boolean described = false;
if (description != null && description.length() > 0) {
sr.setInnerhtml(description.toString());
described = true;
} else {
description = new StringBuilder();
}
int k = 0;
boolean yearfound = false;
boolean titlefound = false;
boolean sigfound = false;
for (String[] part : strings) {
if (!described) {
if (part[0].equals("a") && (k == 0 || !titlefound)) {
if (k != 0) {
description.append("<br />");
}
description.append("<b>").append(part[2]).append("</b>");
titlefound = true;
} else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
yearfound = true;
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound) {
description.append("<br />");
description.append(part[2]);
} else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
description.append("<br />");
description.append(part[2]);
}
}
if (part.length == 4) {
if (part[0].equals("span") && part[3].equals("textgruen")) {
sr.setStatus(SearchResult.Status.GREEN);
} else if (part[0].equals("span") && part[3].equals("textrot")) {
sr.setStatus(SearchResult.Status.RED);
}
} else if (part.length == 5) {
if (part[4].contains("purple")) {
sr.setStatus(SearchResult.Status.YELLOW);
}
}
if (sr.getStatus() == null) {
if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht möglich")) || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
sr.setStatus(SearchResult.Status.RED);
} else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
sr.setStatus(SearchResult.Status.YELLOW);
} else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurückgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
sr.setStatus(SearchResult.Status.GREEN);
}
if (sr.getType() != null) {
if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
// green though they are not available.
{
sr.setStatus(SearchResult.Status.UNKNOWN);
}
}
}
k++;
}
if (!described) {
sr.setInnerhtml(description.toString());
}
sr.setNr(10 * (page - 1) + i);
sr.setId(null);
results.add(sr);
}
resultcount = results.size();
return new SearchRequestResult(results, results_total, page);
}Example 67
| Project: sitebricks-master File: HtmlTemplateCompiler.java View source code |
/**
* Walks the DOM recursively, and converts elements into corresponding sitebricks widgets.
*/
@NotNull
private <N extends Node> WidgetChain walk(PageCompilingContext pc, N node) {
WidgetChain widgetChain = Chains.proceeding();
for (Node n : node.childNodes()) {
if (n instanceof Element) {
final Element child = (Element) n;
//push form if this is a form tag
if (child.tagName().equals("form"))
pc.form = (Element) n;
//setup a lexical scope if we're going into a repeat widget (by reading the previous node)
final boolean shouldPopScope = lexicalClimb(pc, child);
//continue recursing down, perform a post-order, depth-first traversal of the DOM
WidgetChain childsChildren;
try {
childsChildren = walk(pc, child);
//process the widget itself into a Renderable with child tree
widgetChain.addWidget(widgetize(pc, child, childsChildren));
} finally {
lexicalDescend(pc, child, shouldPopScope);
}
} else if (n instanceof TextNode) {
TextNode child = (TextNode) n;
Renderable textWidget;
//setup a lexical scope if we're going into a repeat widget (by reading the previous node)
final boolean shouldPopScope = lexicalClimb(pc, child);
// construct the text widget
try {
textWidget = registry.textWidget(cleanHtml(n), pc.lexicalScopes.peek());
// if there are no annotations, add the text widget to the chain
if (!child.hasAttr(ANNOTATION_KEY)) {
widgetChain.addWidget(textWidget);
} else {
// construct a new widget chain for this text node
WidgetChain childsChildren = Chains.proceeding().addWidget(textWidget);
// make a new widget for the annotation, making the text chain the child
String widgetName = child.attr(ANNOTATION_KEY).toLowerCase();
Renderable annotationWidget = registry.newWidget(widgetName, child.attr(ANNOTATION_CONTENT), childsChildren, pc.lexicalScopes.peek());
widgetChain.addWidget(annotationWidget);
}
} catch (ExpressionCompileException e) {
pc.errors.add(CompileError.in(node.outerHtml()).near(line(n)).causedBy(e));
}
if (shouldPopScope)
pc.lexicalScopes.pop();
} else if ((n instanceof Comment) || (n instanceof DataNode)) {
//process as raw text widget
try {
widgetChain.addWidget(registry.textWidget(cleanHtml(n), pc.lexicalScopes.peek()));
} catch (ExpressionCompileException e) {
pc.errors.add(CompileError.in(node.outerHtml()).near(line(node)).causedBy(e));
}
} else if (n instanceof XmlDeclaration) {
try {
widgetChain.addWidget(registry.xmlDirectiveWidget(((XmlDeclaration) n).getWholeDeclaration(), pc.lexicalScopes.peek()));
} catch (ExpressionCompileException e) {
pc.errors.add(CompileError.in(node.outerHtml()).near(line(node)).causedBy(e));
}
}
}
//return computed chain, or a terminal
return widgetChain;
}Example 68
| Project: MyTv-master File: TvMaoCrawler.java View source code |
/**
* 解�电视节目表
*
* @param html
* @return
*/
private List<ProgramTable> parseProgramTable(String html) {
Document doc = Jsoup.parse(html);
Elements dateElements = doc.select("div.pgmain div[class=\"mt10 clear\"] b:first-child");
String dateAndWeek = dateElements.get(0).text().trim();
String[] dateAndWeekArray = dateAndWeek.split("\\s+");
String date = Calendar.getInstance().get(Calendar.YEAR) + "-" + dateAndWeekArray[0];
String weekString = dateAndWeekArray[1];
int week = weekStringToInt(weekString);
Elements stationElements = doc.select("aside[class=\"related-aside rt\"] section[class=\"aside-section clear\"] div.bar");
String stationName = stationElements.get(0).text().trim();
Elements programElements = doc.select("ul#pgrow li");
List<ProgramTable> resultList = new ArrayList<ProgramTable>();
for (Element element : programElements) {
List<Node> children = element.childNodes();
int size = children.size();
if (size < 2) {
continue;
}
int i = 0;
// 查找节目æ’出时间
boolean foundAirTime = false;
for (; i < size; i++) {
Node child = children.get(i);
if (child instanceof Element && "SPAN".equalsIgnoreCase(((Element) child).tagName())) {
foundAirTime = true;
break;
}
}
if (!foundAirTime) {
logger.info("the program table of " + stationName + " at " + date + " does not exists.");
return resultList;
}
String airTime = ((Element) children.get(i++)).text().trim();
StringBuffer program = new StringBuffer();
// 查找节目å??ç§°
for (; i < size; i++) {
Node child = children.get(i);
if (child instanceof TextNode) {
program.append(((TextNode) child).text().trim());
} else if (child instanceof Element && "A".equalsIgnoreCase(((Element) child).tagName())) {
program.append(((Element) child).text().trim());
i++;
break;
}
}
if (i < size - 1) {
// 还有textnodeå…ƒç´
Node child = children.get(i);
if (child instanceof TextNode) {
program.append(((TextNode) child).text().trim());
}
}
ProgramTable pt = new ProgramTable();
pt.setAirDate(date);
pt.setAirTime(date + " " + airTime);
pt.setProgram(program.toString().trim());
pt.setStationName(stationName);
pt.setWeek(week);
for (CrawlEventListener listener : listeners) {
listener.itemFound(new ProgramTableFoundEvent(this, pt));
}
resultList.add(pt);
}
return resultList;
}Example 69
| Project: ScreenSlicer-master File: CommonUtil.java View source code |
private static Element sanitize(Document doc, final boolean ascii) {
if (ascii) {
doc.outputSettings().charset("ascii");
} else {
doc.outputSettings().charset("utf-8");
}
doc.traverse(new NodeVisitor() {
@Override
public void tail(Node n, int d) {
}
@Override
public void head(Node n, int d) {
try {
if (n.nodeName().equals("#text") && !CommonUtil.isEmpty(n.outerHtml())) {
((TextNode) n).text(HtmlCoder.decode(n.toString()));
}
} catch (Throwable t) {
Log.exception(t);
}
}
});
return doc;
}Example 70
| Project: slicer-master File: CommonUtil.java View source code |
private static Element sanitize(Document doc, final boolean ascii) {
if (ascii) {
doc.outputSettings().charset("ascii");
} else {
doc.outputSettings().charset("utf-8");
}
doc.traverse(new NodeVisitor() {
@Override
public void tail(Node n, int d) {
}
@Override
public void head(Node n, int d) {
try {
if (n.nodeName().equals("#text") && !CommonUtil.isEmpty(n.outerHtml())) {
((TextNode) n).text(HtmlCoder.decode(n.toString()));
}
} catch (Throwable t) {
Log.exception(t);
}
}
});
return doc;
}Example 71
| Project: GameRaven-master File: AllInOneV2.java View source code |
@SuppressLint("SetJavaScriptEnabled")
public void processContent(NetDesc desc, Document doc, String resUrl) {
if (BuildConfig.DEBUG)
wtl("GRAIO hNR fired, desc: " + desc.name());
swipeRefreshLayout.setEnabled(false);
if (searchIcon != null)
searchIcon.collapseActionView();
setAllMenuItemsExceptRefreshVisibility(false);
adapterRows.clear();
boolean isDefaultAcc = Session.getUser() != null && Session.getUser().equals(settings.getString("defaultAccount", HeaderSettings.NO_DEFAULT_ACCOUNT));
if (BuildConfig.DEBUG)
wtl("setting board, topic, message id to null");
boardID = null;
topicID = null;
messageIDForEditing = null;
Element tbody;
Element pj;
String headerTitle;
String firstPage = null;
String prevPage = null;
int[] pagesInfo = new int[] { 1, 1 };
String nextPage = null;
String lastPage = null;
String pagePrefix = null;
if (BuildConfig.DEBUG)
wtl("checking for board quick list");
Element boardsDropdown = null;
for (Element e : doc.select("ul.masthead_mygames_subnav")) {
if (e.previousElementSibling().ownText().equals("My Boards")) {
boardsDropdown = e;
break;
}
}
if (boardsDropdown != null) {
Elements dItems = boardsDropdown.getElementsByTag("a");
boardQuickListOptions = new String[dItems.size() + 1];
boardQuickListLinks = new String[dItems.size() + 1];
boardQuickListOptions[0] = "Go to Boards Page...";
int x = 1;
for (Element e : dItems) {
boardQuickListOptions[x] = e.text();
boardQuickListLinks[x] = e.attr("href");
x++;
}
}
contentList.setDividerHeight(Theming.convertDPtoPX(this, 1));
switch(desc) {
case BOARD_JUMPER:
case LOGIN_S2:
updateHeaderNoJumper("Board Jumper", NetDesc.BOARD_JUMPER);
setMenuItemVisibility(searchIcon, true);
processBoards(doc);
break;
case BOARD_LIST:
updateHeaderNoJumper(doc.getElementsByTag("th").get(4).text(), NetDesc.BOARD_LIST);
processBoards(doc);
break;
case NOTIFS_PAGE:
settings.edit().putLong("notifsLastCheck", System.currentTimeMillis()).apply();
tbody = doc.getElementsByTag("tbody").first();
headerTitle = Session.getUser() + "'s Notifications";
updateHeaderNoJumper(headerTitle, desc);
if (tbody != null) {
for (Element row : tbody.getElementsByTag("tr")) {
Elements cells = row.children();
// [title, url] [time] [read]
Element titleLinkElem = cells.get(0).children().first();
String title = titleLinkElem.text();
String link = titleLinkElem.attr("href");
String time = cells.get(1).text();
boolean isOld = false;
if (cells.get(2).text().equals("Read"))
isOld = true;
adapterRows.add(new NotifRowData(title, time, link, isOld));
}
} else {
adapterRows.add(new HeaderRowData("You have no notifications at this time."));
}
setMenuItemVisibility(clearUnreadNotifsIcon, true);
NotifierService.notifDismiss(this);
break;
case MENTIONS_PAGE:
tbody = doc.getElementsByTag("tbody").first();
headerTitle = Session.getUser() + "'s Mentions";
updateHeaderNoJumper(headerTitle, desc);
if (tbody != null) {
for (Element row : tbody.getElementsByTag("tr")) {
Elements cells = row.children();
// [topic] [board] [user] [time]
Element topicLinkElem = cells.get(0).children().first();
String topic = topicLinkElem.text();
String link = topicLinkElem.attr("href");
String board = cells.get(1).text();
String user = cells.get(2).text();
String time = cells.get(3).text();
adapterRows.add(new MentionRowData(topic, board, user, time, link));
}
} else {
adapterRows.add(new HeaderRowData("You have no mentions at this time."));
}
break;
case PM_INBOX:
case PM_OUTBOX:
tbody = doc.getElementsByTag("tbody").first();
boolean isInbox = false;
if (desc == NetDesc.PM_INBOX)
isInbox = true;
if (isInbox)
headerTitle = Session.getUser() + "'s PM Inbox";
else
headerTitle = Session.getUser() + "'s PM Outbox";
if (tbody != null) {
pj = doc.select("ul.paginate").first();
if (pj != null) {
pagesInfo = getPageJumperInfo(pj);
if (isInbox)
pagePrefix = "/pm/?page=";
else
pagePrefix = "/pm/sent?page=";
if (pagesInfo[0] > 1) {
firstPage = pagePrefix + 0;
prevPage = pagePrefix + (pagesInfo[0] - 2);
}
if (pagesInfo[0] != pagesInfo[1]) {
nextPage = pagePrefix + pagesInfo[0];
lastPage = pagePrefix + (pagesInfo[1] - 1);
}
}
updateHeader(headerTitle, firstPage, prevPage, pagesInfo[0], pagesInfo[1], nextPage, lastPage, pagePrefix, desc);
for (Element row : tbody.getElementsByTag("tr")) {
Elements cells = row.children();
// [icon] [sender] [subject] [time] [check]
boolean isOld = true;
if (cells.get(0).children().first().hasClass("fa-circle"))
isOld = false;
String sender = cells.get(1).text();
Element subjectLinkElem = cells.get(2).children().first();
String subject = subjectLinkElem.text();
String link = subjectLinkElem.attr("href");
String time = cells.get(3).text();
adapterRows.add(new PMRowData(subject, sender, time, link, isOld, isInbox));
}
} else {
updateHeaderNoJumper(headerTitle, desc);
adapterRows.add(new HeaderRowData("There are no private messages here at this time."));
}
fab.setVisibility(View.VISIBLE);
pMode = PostMode.NEW_PM;
if (isInbox)
setMenuItemVisibility(pmOutboxIcon, true);
else
setMenuItemVisibility(pmInboxIcon, true);
break;
case PM_INBOX_DETAIL:
case PM_OUTBOX_DETAIL:
String pmTitle = doc.select("h2.title").first().text();
String pmMessage = doc.select("div.body").first().outerHtml();
Element foot = doc.select("div.foot").first();
foot.child(1).remove();
String pmFoot = foot.outerHtml();
//Sent by: P4wn4g3 on 6/1/2013 2:15:55 PM
String footText = foot.text();
String sender = footText.substring(9, footText.indexOf(" on "));
updateHeaderNoJumper(pmTitle, desc);
if (desc == NetDesc.PM_INBOX_DETAIL) {
replyTo = sender;
if (!pmTitle.startsWith("Re: "))
replySubject = "Re: " + pmTitle;
else
replySubject = pmTitle;
setMenuItemVisibility(replyIcon, true);
}
adapterRows.add(new PMDetailRowData(sender, pmTitle, pmMessage + pmFoot));
break;
case AMP_LIST:
if (BuildConfig.DEBUG)
wtl("GRAIO hNR determined this is an amp response");
tbody = doc.getElementsByTag("tbody").first();
headerTitle = Session.getUser() + "'s Active Messages";
if (doc.select("ul.paginate").size() > 1) {
pj = doc.select("ul.paginate").get(1);
if (pj != null && !pj.hasClass("user") && !pj.hasClass("tsort")) {
pagesInfo = getPageJumperInfo(pj);
pagePrefix = buildAMPLink() + "&page=";
if (pagesInfo[0] > 1) {
firstPage = pagePrefix + 0;
prevPage = pagePrefix + (pagesInfo[0] - 2);
}
if (pagesInfo[0] != pagesInfo[1]) {
nextPage = pagePrefix + pagesInfo[0];
lastPage = pagePrefix + (pagesInfo[1] - 1);
}
}
}
updateHeader(headerTitle, firstPage, prevPage, pagesInfo[0], pagesInfo[1], nextPage, lastPage, pagePrefix, NetDesc.AMP_LIST);
if (!tbody.children().isEmpty()) {
for (Element row : tbody.children()) {
// [board] [read status] [title] [msg] [last post] [your last post]
Elements cells = row.children();
String board = cells.get(0).text();
Element titleLinkElem = cells.get(2).child(0);
String title = titleLinkElem.text();
String link = titleLinkElem.attr("href");
String mCount = cells.get(3).textNodes().get(0).text().trim();
Element lPostLinkElem = cells.get(4).child(1);
String lPost = lPostLinkElem.text();
String lPostLink = lPostLinkElem.attr("href");
ReadStatus status = ReadStatus.UNREAD;
String tImg = cells.get(1).child(0).className();
if (tImg.endsWith("_read"))
status = ReadStatus.READ;
else if (tImg.endsWith("_unread")) {
status = ReadStatus.NEW_POST;
lPostLink = cells.get(1).child(0).attr("href");
}
adapterRows.add(new AMPRowData(title, board, lPost, mCount, link, lPostLink, status));
}
} else {
adapterRows.add(new HeaderRowData("You have no active messages at this time."));
}
if (BuildConfig.DEBUG)
wtl("amp response block finished");
break;
case TRACKED_TOPICS:
headerTitle = Session.getUser() + "'s Tracked Topics";
updateHeaderNoJumper(headerTitle, desc);
tbody = doc.getElementsByTag("tbody").first();
if (tbody != null) {
for (Element row : tbody.children()) {
// [remove] [title] [board name] [msgs] [last [pst]
Elements cells = row.children();
int rsMod = 0;
if (cells.size() == 6)
rsMod = 1;
String removeLink = cells.get(0).child(0).attr("href");
String topicLink = cells.get(1 + rsMod).child(0).attr("href");
String topicText = cells.get(1 + rsMod).text();
String board = cells.get(2 + rsMod).text();
String msgs = cells.get(3 + rsMod).text();
String lPostLink = cells.get(4 + rsMod).child(0).attr("href");
String lPostText = cells.get(4 + rsMod).text();
ReadStatus status = ReadStatus.UNREAD;
if (rsMod == 1) {
String tImg = cells.get(1).child(0).className();
if (tImg.endsWith("_read"))
status = ReadStatus.READ;
else if (tImg.endsWith("_unread"))
status = ReadStatus.NEW_POST;
}
adapterRows.add(new TrackedTopicRowData(board, topicText, lPostText, msgs, topicLink, removeLink, lPostLink, status));
}
} else {
adapterRows.add(new HeaderRowData("You have no tracked topics at this time."));
}
break;
case BOARD:
if (BuildConfig.DEBUG)
wtl("GRAIO hNR determined this is a board response");
if (BuildConfig.DEBUG)
wtl("setting board id");
boardID = parseBoardID(resUrl);
boolean isSplitList = false;
if (doc.getElementsByTag("th").first() != null) {
if (doc.getElementsByTag("th").first().text().equals("Board Title")) {
if (BuildConfig.DEBUG)
wtl("is actually a split board list");
updateHeaderNoJumper(doc.select("h1.page-title").first().text(), NetDesc.BOARD);
processBoards(doc);
isSplitList = true;
}
}
if (!isSplitList) {
String searchQuery = EMPTY_STRING;
String searchPJAddition = EMPTY_STRING;
if (resUrl.contains("search=")) {
if (BuildConfig.DEBUG)
wtl("board search url: " + resUrl);
searchQuery = resUrl.substring(resUrl.indexOf("search=") + 7);
int i = searchQuery.indexOf('&');
if (i != -1)
searchQuery = searchQuery.replace(searchQuery.substring(i), EMPTY_STRING);
searchPJAddition = "&search=" + searchQuery;
try {
searchQuery = URLDecoder.decode(searchQuery, DocumentParser.CHARSET_NAME);
} catch (UnsupportedEncodingException e) {
throw new AssertionError(DocumentParser.CHARSET_NAME + " is unknown");
}
}
Element headerElem = doc.getElementsByClass("page-title").first();
if (headerElem != null)
headerTitle = headerElem.text();
else
headerTitle = "GFAQs Cache Error, Board Title Not Found";
if (searchQuery.length() > 0)
headerTitle += " (search: " + searchQuery + ")";
if (doc.select("ul.paginate").size() > 1) {
pj = doc.select("ul.paginate").get(1);
if (pj != null && !pj.hasClass("user")) {
pagesInfo = getPageJumperInfo(pj);
pagePrefix = "boards/" + boardID + "?page=";
if (pagesInfo[0] > 1) {
firstPage = pagePrefix + 0 + searchPJAddition;
prevPage = pagePrefix + (pagesInfo[0] - 2) + searchPJAddition;
}
if (pagesInfo[0] != pagesInfo[1]) {
nextPage = pagePrefix + pagesInfo[0] + searchPJAddition;
lastPage = pagePrefix + (pagesInfo[1] - 1) + searchPJAddition;
if (pagesInfo[0] > pagesInfo[1]) {
session.forceNoHistoryAddition();
session.forceSkipAIOCleanup();
Crouton.showText(this, "Page count higher than page amount, going to last page...", Theming.croutonStyle());
session.get(NetDesc.BOARD, lastPage);
return;
}
}
}
}
updateHeader(headerTitle, firstPage, prevPage, pagesInfo[0], pagesInfo[1], nextPage, lastPage, pagePrefix + searchPJAddition, NetDesc.BOARD);
setMenuItemVisibility(searchIcon, true);
if (Session.isLoggedIn()) {
Element favbtn = doc.getElementsByClass("user").first().getElementsByAttributeValueStarting("onclick", "post_click").first();
if (favbtn != null) {
String favtext = favbtn.text().toLowerCase();
String onclick = favbtn.attr("onclick");
int endPoint = onclick.lastIndexOf('\'');
int startPoint = onclick.lastIndexOf('\'', endPoint - 1) + 1;
favKey = onclick.substring(startPoint, endPoint);
fMode = FavMode.ON_BOARD;
if (favtext.contains("add to favorites"))
setMenuItemVisibility(addFavIcon, true);
else if (favtext.contains("remove favorite"))
setMenuItemVisibility(remFavIcon, true);
}
updatePostingRights(doc, false);
}
Element splitList = doc.select("p:contains(this is a split board)").first();
if (splitList != null) {
String splitListLink = splitList.child(0).attr("href");
adapterRows.add(new BoardRowData("This is a Split Board.", "Click here to return to the Split List.", null, null, null, splitListLink, BoardType.SPLIT));
}
Element table = doc.select("table.board").first();
if (table != null && !table.select("td").first().hasAttr("colspan")) {
table.getElementsByTag("col").get(2).remove();
table.getElementsByTag("th").get(2).remove();
table.getElementsByTag("col").get(0).remove();
table.getElementsByTag("th").get(0).remove();
if (BuildConfig.DEBUG)
wtl("board row parsing start");
boolean skipFirst = true;
Set<String> hlUsers = hlDB.getHighlightedUsers().keySet();
for (Element row : table.getElementsByTag("tr")) {
if (!skipFirst) {
Elements cells = row.getElementsByTag("td");
// cells = [image] [title] [author] [post count] [last post]
String tImg = cells.get(0).child(0).className();
Element titleLinkElem = cells.get(1).child(0);
String title = titleLinkElem.text();
String tUrl = titleLinkElem.attr("href");
String tc = cells.get(2).text();
Element lPostLinkElem = cells.get(4).child(0);
String lastPost = lPostLinkElem.text();
String lpUrl = lPostLinkElem.attr("href");
String mCount = cells.get(3).text();
TopicType type = TopicType.NORMAL;
if (tImg.contains("poll"))
type = TopicType.POLL;
else if (tImg.contains("sticky"))
type = TopicType.PINNED;
else if (tImg.contains("closed"))
type = TopicType.LOCKED;
else if (tImg.contains("archived"))
type = TopicType.ARCHIVED;
if (BuildConfig.DEBUG)
wtl(tImg + ", " + type.name());
ReadStatus status = ReadStatus.UNREAD;
if (tImg.endsWith("_read"))
status = ReadStatus.READ;
else if (tImg.endsWith("_unread")) {
status = ReadStatus.NEW_POST;
lpUrl = cells.get(0).child(0).attr("href");
}
int hlColor = 0;
if (hlUsers.contains(tc.toLowerCase(Locale.US))) {
HighlightedUser hUser = hlDB.getHighlightedUsers().get(tc.toLowerCase(Locale.US));
hlColor = hUser.getColor();
tc += " (" + hUser.getLabel() + ")";
}
adapterRows.add(new TopicRowData(title, tc, lastPost, mCount, tUrl, lpUrl, type, status, hlColor));
} else
skipFirst = false;
}
if (BuildConfig.DEBUG)
wtl("board row parsing end");
} else {
adapterRows.add(new HeaderRowData("There are no topics at this time."));
}
}
if (BuildConfig.DEBUG)
wtl("board response block finished");
break;
case TOPIC:
contentList.setDividerHeight(0);
boardID = parseBoardID(resUrl);
topicID = parseTopicID(resUrl);
tlUrl = "boards/" + boardID;
if (BuildConfig.DEBUG)
wtl(tlUrl);
setMenuItemVisibility(topicListIcon, true);
Element headerElem = doc.getElementsByClass("title").first();
if (headerElem != null)
headerTitle = headerElem.text();
else
headerTitle = "GFAQs Cache Error, Title Not Found";
if (headerTitle.equals("Log In to GameFAQs")) {
headerElem = doc.getElementsByClass("title").get(1);
if (headerElem != null)
headerTitle = headerElem.text();
}
if (doc.select("ul.paginate").size() > 1) {
pj = doc.select("ul.paginate").get(1);
if (pj != null && !pj.hasClass("user")) {
pagesInfo = getPageJumperInfo(pj);
pagePrefix = "boards/" + boardID + "/" + topicID + "?page=";
if (pagesInfo[0] > 1) {
firstPage = pagePrefix + 0;
prevPage = pagePrefix + (pagesInfo[0] - 2);
}
if (pagesInfo[0] != pagesInfo[1]) {
nextPage = pagePrefix + pagesInfo[0];
lastPage = pagePrefix + (pagesInfo[1] - 1);
if (pagesInfo[0] > pagesInfo[1]) {
session.forceNoHistoryAddition();
session.forceSkipAIOCleanup();
Crouton.showText(this, "Page count higher than page amount, going to last page...", Theming.croutonStyle());
session.get(NetDesc.TOPIC, lastPage);
return;
}
}
}
}
updateHeader(headerTitle, firstPage, prevPage, pagesInfo[0], pagesInfo[1], nextPage, lastPage, pagePrefix, NetDesc.TOPIC);
if (Session.isLoggedIn()) {
Element favbtn = doc.getElementsByClass("user").first().getElementsByAttributeValueStarting("onclick", "post_click").first();
if (favbtn != null) {
String favtext = favbtn.text().toLowerCase();
String onclick = favbtn.attr("onclick");
int endPoint = onclick.lastIndexOf('\'');
int startPoint = onclick.lastIndexOf('\'', endPoint - 1) + 1;
favKey = onclick.substring(startPoint, endPoint);
fMode = FavMode.ON_TOPIC;
if (favtext.contains("track topic"))
setMenuItemVisibility(addFavIcon, true);
else if (favtext.contains("stop tracking"))
setMenuItemVisibility(remFavIcon, true);
}
updatePostingRights(doc, true);
}
String goToThisPost = null;
if (goToUrlDefinedPost) {
if (resUrl.indexOf('#') != -1) {
goToThisPost = resUrl.substring(resUrl.indexOf('#'));
} else {
// goToUrlDefinedPost is true when there is no url defined post, oops
goToUrlDefinedPost = false;
}
}
Elements rows = doc.select("table.board").first().getElementsByTag("tr");
int rowCount = rows.size();
int msgIndex = 0;
Set<String> hlUsers = hlDB.getHighlightedUsers().keySet();
for (int x = 0; x < rowCount; x++) {
Element row = rows.get(x);
if (row.select("div.msg_deleted").isEmpty()) {
String user;
String postNum;
String postTime;
String mID = null;
String userTitles = EMPTY_STRING;
Element msgBody;
boolean canReport = false, canDelete = false, canEdit = false, canQuote = false;
Element infoBox = row.select("div.msg_infobox").first();
user = infoBox.getElementsByTag("b").first().text();
Element userInfo = infoBox.select("span.user_info").first();
if (userInfo != null)
userTitles = " " + userInfo.text();
Element userTag = infoBox.select("span.tag").first();
if (userTag != null)
userTitles += " (" + userTag.text() + ")";
postTime = infoBox.select("span.post_time").first().text();
Element number = infoBox.select("span.message_num").first();
postNum = number.text();
if (!number.children().isEmpty()) {
mID = parseMessageID(number.child(0).attr("href"));
}
msgBody = row.select("div.msg_body").first();
Element msgBelow = row.select("div.msg_below").first();
Element edited = msgBelow.select("span.edited").first();
if (edited != null)
userTitles += " (edited)";
Element belowOptions = msgBelow.select("span.options").first();
if (belowOptions != null) {
String options = belowOptions.text();
if (options.contains("report"))
canReport = true;
if (options.contains("delete"))
canDelete = true;
if (options.contains("edit"))
canEdit = true;
if (options.contains("quote"))
canQuote = true;
}
int hlColor = 0;
if (hlUsers.contains(user.toLowerCase(Locale.US))) {
HighlightedUser hUser = hlDB.getHighlightedUsers().get(user.toLowerCase(Locale.US));
hlColor = hUser.getColor();
userTitles += " (" + hUser.getLabel() + ")";
}
if (goToUrlDefinedPost) {
if (postNum.equals(goToThisPost))
goToThisIndex = msgIndex;
}
String avatarUrl = row.getElementsByClass("imgboxart").first().attr("src");
if (BuildConfig.DEBUG)
wtl("creating messagerowdata object");
adapterRows.add(new MessageRowData(user, userTitles, avatarUrl, postNum, postTime, msgBody, boardID, topicID, mID, hlColor, canReport, canDelete, canEdit, canQuote));
} else {
String postNum = row.select("span.message_num").first().text();
if (goToUrlDefinedPost) {
if (postNum.equals(goToThisPost))
goToThisIndex = msgIndex;
}
adapterRows.add(new MessageRowData(true, postNum));
}
msgIndex++;
}
break;
case MESSAGE_DETAIL:
updateHeaderNoJumper("Message Detail", NetDesc.MESSAGE_DETAIL);
boardID = parseBoardID(resUrl);
topicID = parseTopicID(resUrl);
String mID = parseMessageID(resUrl);
Elements msgRows = doc.select("td.msg");
adapterRows.add(new HeaderRowData("Current Version"));
MessageRowData msg;
int msgRowCount = msgRows.size();
for (int x = 0; x < msgRowCount; x++) {
if (x == 1)
adapterRows.add(new HeaderRowData("Previous Version(s)"));
Element currRow = msgRows.get(x);
Element msgInfobox = currRow.select("div.msg_infobox").first();
Element msgBody = currRow.select("div.msg_body").first();
String user = msgInfobox.getElementsByTag("b").first().text();
String postTime = msgInfobox.select("span.post_time").first().text();
msg = new MessageRowData(user, EMPTY_STRING, EMPTY_STRING, "#" + (msgRowCount - x), postTime, msgBody, boardID, topicID, mID, 0, false, false, false, false);
msg.disableTopClick();
adapterRows.add(msg);
}
break;
case USER_TAG:
if (BuildConfig.DEBUG)
wtl("starting check for user tag success");
Element error = doc.getElementsByClass("error").first();
if (error == null) {
Crouton.showText(this, "User tag updated successfully.", Theming.croutonStyle());
} else {
AlertDialog.Builder b = new AlertDialog.Builder(this);
b.setTitle("There was an error tagging the user...");
b.setMessage("Error message from GameFAQs:\n\n" + error.text());
b.setPositiveButton("OK", null);
b.show();
}
case USER_DETAIL:
if (BuildConfig.DEBUG)
wtl("starting user detail processing");
tbody = doc.select("table.board").first().getElementsByTag("tbody").first();
String name = null;
String ID = null;
String level = null;
String creation = null;
String lVisit = null;
String sig = null;
String karma = null;
String AMP = null;
String tagKey = null;
String tagText = null;
for (Element row : tbody.children()) {
String label = row.child(0).text().toLowerCase(Locale.US);
if (BuildConfig.DEBUG)
wtl("user detail row label: " + label);
switch(label) {
case "user name":
name = row.child(1).text();
break;
case "user id":
ID = row.child(1).text();
break;
case "board user level":
level = row.child(1).html();
if (BuildConfig.DEBUG)
wtl("set level: " + level);
break;
case "account created":
creation = row.child(1).text();
break;
case "last visit":
lVisit = row.child(1).text();
break;
case "signature":
sig = row.child(1).html();
break;
case "karma":
karma = row.child(1).text();
break;
case "active messages posted":
AMP = row.child(1).text();
break;
}
}
if (Session.isLoggedIn()) {
Element pmIcon = doc.select("i.fa-envelope").last();
if (pmIcon != null && pmIcon.attr("title").startsWith("Send a PM to"))
setMenuItemVisibility(sendUserPMIcon, true);
setMenuItemVisibility(tagUserIcon, true);
tagKey = doc.getElementsByAttributeValue("name", "key").attr("value");
tagText = doc.getElementsByAttributeValue("name", "tag_text").attr("value");
if (tagText == null)
tagText = "";
}
updateHeaderNoJumper(name + "'s Details", NetDesc.USER_DETAIL);
userDetailData = new UserDetailRowData(name, ID, level, creation, lVisit, sig, karma, AMP, tagKey, tagText, resUrl);
adapterRows.add(userDetailData);
break;
case GAME_SEARCH:
if (BuildConfig.DEBUG)
wtl("GRAIO hNR determined this is a game search response");
if (BuildConfig.DEBUG)
wtl("game search url: " + resUrl);
String searchQuery = resUrl.substring(resUrl.indexOf("game=") + 5);
int i = searchQuery.indexOf("&");
if (i != -1)
searchQuery = searchQuery.replace(searchQuery.substring(i), EMPTY_STRING);
int pageIndex = resUrl.indexOf("page=");
if (pageIndex != -1) {
String currPage = resUrl.substring(pageIndex + 5);
i = currPage.indexOf("&");
if (i != -1)
currPage = currPage.replace(currPage.substring(i), EMPTY_STRING);
pagesInfo[0] = Integer.parseInt(currPage) + 1;
} else {
pagesInfo[0] = 1;
}
if (pagesInfo[0] > 1) {
firstPage = "/search/index.html?game=" + searchQuery + "&page=0";
prevPage = "/search/index.html?game=" + searchQuery + "&page=" + (pagesInfo[0] - 2);
}
if (!doc.getElementsByClass("icon-angle-right").isEmpty()) {
nextPage = "/search/index.html?game=" + searchQuery + "&page=" + (pagesInfo[0]);
}
try {
headerTitle = "Searching games: " + URLDecoder.decode(searchQuery, DocumentParser.CHARSET_NAME) + EMPTY_STRING;
} catch (UnsupportedEncodingException e) {
throw new AssertionError(DocumentParser.CHARSET_NAME + " is unknown");
}
updateHeader(headerTitle, firstPage, prevPage, pagesInfo[0], -1, nextPage, lastPage, pagePrefix, NetDesc.GAME_SEARCH);
setMenuItemVisibility(searchIcon, true);
Elements gameSearchTables = doc.select("table.results");
int tCount = gameSearchTables.size();
int tCounter = 0;
if (!gameSearchTables.isEmpty()) {
for (Element table : gameSearchTables) {
tCounter++;
if (tCounter < tCount)
adapterRows.add(new HeaderRowData("Best Matches"));
else
adapterRows.add(new HeaderRowData("Good Matches"));
String prevPlatform = EMPTY_STRING;
if (BuildConfig.DEBUG)
wtl("board row parsing start");
for (Element row : table.getElementsByTag("tr")) {
if (row.parent().tagName().equals("tbody")) {
Elements cells = row.getElementsByTag("td");
// cells = [platform] [title] [faqs] [codes] [saves] [revs] [mygames] [q&a] [pics] [vids] [board]
String platform = cells.get(0).text();
String bName = cells.get(1).text();
String bUrl = cells.get(9).child(0).attr("href");
if (platform.codePointAt(0) == (' ')) {
platform = prevPlatform;
} else {
prevPlatform = platform;
}
adapterRows.add(new GameSearchRowData(bName, platform, bUrl));
}
}
if (BuildConfig.DEBUG)
wtl("board row parsing end");
}
} else {
adapterRows.add(new HeaderRowData("No results."));
}
if (BuildConfig.DEBUG)
wtl("game search response block finished");
break;
default:
if (BuildConfig.DEBUG)
wtl("GRAIO hNR determined response type is unhandled");
getSupportActionBar().setTitle("Page unhandled - " + resUrl);
break;
}
Element pmInboxLink = doc.select("i.fa-envelope").first();
String pmButtonLabel = getString(R.string.pm_inbox);
if (pmInboxLink != null) {
pmButtonLabel += " " + ((TextNode) pmInboxLink.nextSibling()).text();
}
dwrPMInboxItem.setTitle(pmButtonLabel);
Element notifsObject = doc.select("span.notifications").first();
notifsAdapter.clear();
notifsLinks.clear();
notifsLinks.add("filler");
String count = "0";
if (notifsObject != null) {
count = notifsObject.child(0).text();
if (count.equals("1"))
count = count + " " + getString(R.string.notification);
else
count = count + " " + getString(R.string.notifications);
notifsAdapter.add(count);
Elements notifsLines = notifsObject.getElementsByTag("li");
notifsLines.remove(notifsLines.size() - 1);
for (Element e : notifsLines) {
notifsAdapter.add(e.text());
notifsLinks.add(e.select("a").first().attr("href"));
}
notifsAdapter.add("View All");
notifsLinks.add(NOTIFS_PAGE_LINK);
notifsAdapter.add("Clear All");
notifsLinks.add(NOTIFS_CLEAR_LINK);
setMenuItemVisibility(unreadNotifsIcon, true);
} else {
notifsAdapter.add(count + " " + getString(R.string.notifications));
notifsAdapter.add("View All");
notifsLinks.add(NOTIFS_PAGE_LINK);
setMenuItemVisibility(unreadNotifsIcon, false);
}
notifsAdapter.notifyDataSetChanged();
swipeRefreshLayout.setEnabled(settings.getBoolean("enablePTR", false));
viewAdapter.notifyDataSetChanged();
if (consumeGoToUrlDefinedPost() && !Session.applySavedScroll) {
contentList.post(new Runnable() {
@Override
public void run() {
contentList.setSelection(goToThisIndex);
}
});
} else if (Session.applySavedScroll) {
contentList.post(new Runnable() {
@Override
public void run() {
contentList.setSelectionFromTop(Session.savedScrollVal[0], Session.savedScrollVal[1]);
Session.applySavedScroll = false;
}
});
} else {
contentList.post(new Runnable() {
@Override
public void run() {
contentList.setSelectionAfterHeaderView();
}
});
}
if (swipeRefreshLayout.isRefreshing())
swipeRefreshLayout.setRefreshing(false);
if (BuildConfig.DEBUG)
wtl("GRAIO hNR finishing");
}Example 72
| Project: structr-master File: Importer.java View source code |
private DOMNode createChildNodes(final Node startNode, final DOMNode parent, final Page page, final boolean removeHashAttribute, final int depth) throws FrameworkException {
DOMNode rootElement = null;
Linkable res = null;
String instructions = null;
final List<Node> children = startNode.childNodes();
for (Node node : children) {
String tag = node.nodeName();
// clean tag, remove non-word characters except : and #
if (tag != null) {
tag = tag.replaceAll("[^a-zA-Z0-9#:.-_]+", "");
}
String type = CaseHelper.toUpperCamelCase(tag);
String comment = null;
String content = null;
String id = null;
StringBuilder classString = new StringBuilder();
boolean isNewTemplateOrComponent = false;
if (ArrayUtils.contains(ignoreElementNames, type)) {
continue;
}
if (node instanceof Element) {
Element el = ((Element) node);
Set<String> classes = el.classNames();
for (String cls : classes) {
classString.append(cls).append(" ");
}
id = el.id();
// do not download files when called from DeployCommand!
if (!isDeployment) {
String downloadAddressAttr = (ArrayUtils.contains(srcElements, tag) ? "src" : ArrayUtils.contains(hrefElements, tag) ? "href" : null);
if (downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {
String downloadAddress = node.attr(downloadAddressAttr);
res = downloadFile(downloadAddress, originalUrl);
}
}
if (removeHashAttribute) {
// Remove data-structr-hash attribute
node.removeAttr(DOMNode.dataHashProperty.jsonName());
}
}
// Data and comment nodes: Trim the text and put it into the "content" field without changes
if (type.equals("#comment")) {
comment = ((Comment) node).getData();
tag = "";
// Don't add content node for whitespace
if (StringUtils.isBlank(comment)) {
continue;
}
// store for later use
commentSource.append(comment).append("\n");
// check if comment contains instructions
if (commentHandler != null && commentHandler.containsInstructions(comment)) {
if (instructions != null) {
// unhandled instructions from previous iteration => empty content element
createEmptyContentNode(page, parent, commentHandler, instructions);
}
instructions = comment;
continue;
}
} else if (type.equals("#data")) {
tag = "";
content = ((DataNode) node).getWholeData();
// Don't add content node for whitespace
if (StringUtils.isBlank(content)) {
continue;
}
} else // Text-only nodes: Trim the text and put it into the "content" field
{
if (type.equals("#text")) {
tag = "";
if (isDeployment) {
content = trimTrailingNewline(((TextNode) node).getWholeText());
if (content == null || content.length() == 0) {
continue;
}
} else {
content = trimTrailingNewline(((TextNode) node).text());
if (StringUtils.isBlank(content)) {
continue;
}
}
}
}
org.structr.web.entity.dom.DOMNode newNode = null;
// create node
if (StringUtils.isBlank(tag)) {
// create comment or content node
if (!StringUtils.isBlank(comment)) {
newNode = (DOMNode) page.createComment(comment);
newNode.setProperty(org.structr.web.entity.dom.Comment.contentType, "text/html");
} else {
newNode = (Content) page.createTextNode(content);
}
} else if ("structr:template".equals(tag)) {
final String src = node.attr("src");
if (src != null) {
DOMNode template = null;
if (DeployCommand.isUuid(src)) {
template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class).and(GraphObject.id, src).getFirst();
if (template == null) {
System.out.println("##################################### template with UUID " + src + " not found, this is a known bug");
}
} else {
template = Importer.findSharedComponentByName(src);
if (template == null) {
template = Importer.findTemplateByName(src);
if (template == null) {
template = createNewTemplateNode(parent, node.childNodes());
isNewTemplateOrComponent = true;
}
}
}
if (template != null) {
newNode = template;
if (template.isSharedComponent()) {
newNode = (DOMNode) template.cloneNode(false);
newNode.setProperty(DOMNode.sharedComponent, template);
newNode.setProperty(DOMNode.ownerDocument, page);
} else if (page != null) {
newNode.setProperty(DOMNode.ownerDocument, page);
}
} else {
logger.warn("Unable to find template or shared component {}, template ignored!", src);
}
} else {
logger.warn("Invalid template definition, missing src attribute!");
}
} else if ("structr:component".equals(tag)) {
final String src = node.attr("src");
if (src != null) {
DOMNode component = null;
if (DeployCommand.isUuid(src)) {
component = app.nodeQuery(DOMNode.class).and(GraphObject.id, src).getFirst();
} else {
component = Importer.findSharedComponentByName(src);
}
if (component == null) {
component = createSharedComponent(node);
}
isNewTemplateOrComponent = true;
if (component != null) {
newNode = (DOMNode) component.cloneNode(false);
newNode.setProperty(DOMNode.sharedComponent, component);
newNode.setProperty(DOMNode.ownerDocument, page);
} else {
logger.warn("Unable to find shared component {} - ignored!", src);
}
} else {
logger.warn("Invalid component definition, missing src attribute!");
}
} else {
newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag, true);
if (newNode == null) {
newNode = createNewHTMLTemplateNodeForUnsupportedTag(parent, node);
isNewTemplateOrComponent = true;
}
}
if (newNode != null) {
// save root element for later use
if (rootElement == null && !(newNode instanceof org.structr.web.entity.dom.Comment)) {
rootElement = newNode;
}
newNode.setProperty(AbstractNode.visibleToPublicUsers, publicVisible);
newNode.setProperty(AbstractNode.visibleToAuthenticatedUsers, authVisible);
if (res != null) {
newNode.setProperty(LinkSource.linkable, res);
}
// "id" attribute: Put it into the "_html_id" field
if (StringUtils.isNotBlank(id)) {
newNode.setProperty(DOMElement._id, id);
}
if (StringUtils.isNotBlank(classString.toString())) {
newNode.setProperty(DOMElement._class, StringUtils.trim(classString.toString()));
}
for (Attribute nodeAttr : node.attributes()) {
final String key = nodeAttr.getKey();
if (// Don't add text attribute as _html_text because the text is already contained in the 'content' attribute
!key.equals("text")) {
final String value = nodeAttr.getValue();
if (key.startsWith("data-")) {
if (// convert data-structr-meta-* attributes to local camel case properties on the node,
key.startsWith(DATA_META_PREFIX)) {
int l = DATA_META_PREFIX.length();
String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' }).replaceAll("-", "");
String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));
if (value != null) {
// store value using actual input converter
final PropertyKey actualKey = StructrApp.getConfiguration().getPropertyKeyForJSONName(newNode.getClass(), camelCaseKey, false);
if (actualKey != null) {
final PropertyConverter converter = actualKey.inputConverter(securityContext);
if (converter != null) {
final Object convertedValue = converter.convert(value);
newNode.setProperty(actualKey, convertedValue);
} else {
newNode.setProperty(actualKey, value);
}
} else {
logger.warn("Unknown meta property key {}, ignoring.", camelCaseKey);
}
}
} else if (// don't convert data-structr-* attributes as they are internal
key.startsWith(DATA_STRUCTR_PREFIX)) {
final PropertyKey propertyKey = config.getPropertyKeyForJSONName(newNode.getClass(), key);
if (propertyKey != null) {
final PropertyConverter inputConverter = propertyKey.inputConverter(securityContext);
if (value != null && inputConverter != null) {
newNode.setProperty(propertyKey, propertyKey.inputConverter(securityContext).convert(value));
} else {
newNode.setProperty(propertyKey, value);
}
}
} else {
// store data-* attributes in node
final PropertyKey propertyKey = new StringProperty(key);
if (value != null) {
newNode.setProperty(propertyKey, value);
}
}
} else {
boolean notBlank = StringUtils.isNotBlank(value);
boolean isAnchor = notBlank && value.startsWith("#");
boolean isLocal = notBlank && !value.startsWith("http");
boolean isActive = notBlank && value.contains("${");
boolean isStructrLib = notBlank && value.startsWith("/structr/js/");
if ("link".equals(tag) && "href".equals(key) && isLocal && !isActive && !isDeployment) {
newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), "${link.path}?${link.version}");
} else if (("href".equals(key) || "src".equals(key)) && isLocal && !isActive && !isAnchor && !isStructrLib && !isDeployment) {
newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), "${link.path}");
} else {
newNode.setProperty(new StringProperty(PropertyView.Html.concat(key)), value);
}
}
}
}
final StringProperty typeKey = new StringProperty(PropertyView.Html.concat("type"));
if ("script".equals(tag)) {
final String contentType = newNode.getProperty(typeKey);
if (contentType == null) {
// Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
newNode.setProperty(typeKey, "text/javascript");
} else if (contentType.equals("application/schema+json")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
// Import schema JSON
SchemaJsonImporter.importSchemaJson(source);
}
} else if (contentType.equals("application/x-cypher")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
// import Cypher queries from script source
final GraphGistImporter importer = app.command(GraphGistImporter.class);
final List<String> sources = new ArrayList<>();
sources.add(source);
importer.importCypher(sources);
}
continue;
} else if (contentType.equals("application/x-structr-script")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
Actions.execute(securityContext, null, source, null);
}
continue;
} else if (contentType.equals("application/x-structr-javascript")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
Actions.execute(securityContext, null, source, null);
}
continue;
}
}
if (instructions != null) {
if (instructions.contains("@structr:content") && !(newNode instanceof Content)) {
// unhandled instructions from previous iteration => empty content element
createEmptyContentNode(page, parent, commentHandler, instructions);
} else {
// apply instructions to new DOM element
if (commentHandler != null) {
commentHandler.handleComment(page, newNode, instructions, true);
}
}
instructions = null;
}
// allow parent to be null to prevent direct child relationship
if (parent != null) {
// special handling for <head> elements
if (newNode instanceof Head && parent instanceof Body) {
final org.w3c.dom.Node html = parent.getParentNode();
html.insertBefore(newNode, parent);
} else {
parent.appendChild(newNode);
}
}
// Step down and process child nodes except for newly created templates
if (!isNewTemplateOrComponent) {
createChildNodes(node, newNode, page, removeHashAttribute, depth + 1);
}
}
}
// reset instructions when leaving a level
if (instructions != null) {
createEmptyContentNode(page, parent, commentHandler, instructions);
instructions = null;
}
return rootElement;
}Example 73
| Project: Diary.Ru-Client-master File: NetworkService.java View source code |
/**
* ФункциÑ? длÑ? применениÑ? модификаций ко вÑ?ем загружаемым Ñ?траницам дневников
* Сюда вноÑ?Ñ?Ñ‚Ñ?Ñ? правки Ñ?траниц по проÑ?ьбам пользователей
* @param resultPage Ñ?траница, которую нужно модифицировать
*/
private void mutateContent(Document resultPage) {
// Ñ?траница будет иметь наш Ñ?тиль
String theme = mPreferences.getString("app.theme", "red");
resultPage.head().append("<link rel=\"stylesheet\" href=\"file:///android_asset/css/" + theme + ".css\" type=\"text/css\" media=\"all\" title=\"Стандарт\"/>");
// кнопка репоÑ?та указывает на нужную Ñ?Ñ?ылку
Elements shareLinks = resultPage.select(".postLinks li[class^=quote]");
for (Element shareLi : shareLinks) {
if (shareLi.childNodeSize() == 0)
continue;
Element repostLink = shareLi.child(0);
Element diaryRepost = shareLi.select("div a[href*=newpost]").first();
if (diaryRepost != null)
repostLink.attr("href", diaryRepost.attr("href"));
}
// текÑ?Ñ‚ вмеÑ?то кнопок правки
if (mUseTextInsteadOfImages) {
Elements postActionImages = resultPage.select("ul.postActionLinks img");
for (Element img : postActionImages) {
// переделываем на текÑ?Ñ‚
if (img.hasAttr("title")) {
Node text = new TextNode(img.attr("title"), resultPage.baseUri());
img.replaceWith(text);
}
}
}
// правка JS
Elements jsElems = resultPage.getElementsByAttribute("onclick");
for (Element js : jsElems) {
String link = js.attr("href");
if (!link.contains("#more") && !link.contains("subscribe") && !link.contains("showresult") && !link.contains("up&signature=") && !link.contains("down&signature=") && !link.contains("tag_showedit"))
// Убиваем веÑ?ÑŒ Ñ?ваÑ?крипт кроме MORE, поднÑ?тиÑ?/опуÑ?каниÑ? поÑ?тов, результатов голоÑ?ованиÑ? и подпиÑ?ки
js.removeAttr("onclick");
}
// Ñ?мена картинок, еÑ?ли автозагрузка выключена
if (!mLoadImages) {
Elements images = resultPage.select("img[src^=http], a:has(img)");
for (Element current : images) {
if (current.tagName().equals("img")) {
String src = current.attr("src");
if (!src.contains("diary.ru") && !current.parent().className().equals("avatar") && !src.startsWith("/")) {
// вÑ?е неподходÑ?щие под критерии изображениÑ? на Ñ?транице будут заменены на кнопки, по клику на которые и будут открыватьÑ?Ñ?
String jsButton = "<input type='image' src='file:///android_asset/images/load_image.png' onclick='return handleIMGDown(this, \"" + src + "\")' />";
current.after(jsButton);
current.remove();
}
}
if (current.tagName().equals("a")) {
String src = current.getElementsByTag("img").attr("src");
if (!src.contains("diary.ru") && !current.parent().className().equals("avatar") && !src.startsWith("/")) {
// вÑ?е неподходÑ?щие под критерии изображениÑ? на Ñ?транице будут заменены на кнопки, по клику на которые и будут открыватьÑ?Ñ?
String jsButton = "<input type='image' src='file:///android_asset/images/load_image.png' onclick='return handleADown(this, \"" + current.attr("href") + "\", \"" + src + "\")' />";
current.after(jsButton);
current.remove();
}
}
}
}
// включаем джаваÑ?крипт
resultPage.body().append(Utils.javascriptContent);
// Ñ?игнатура должна быть видна методам JS
resultPage.body().append("<script>var signature = '" + UserData.getInstance().getSignature() + "';</script>");
}Example 74
| Project: hn-android-master File: BaseHTMLParser.java View source code |
public static String getFirstTextValueInElementChildren(Element element) {
if (element == null)
return "";
for (org.jsoup.nodes.Node node : element.childNodes()) if (node instanceof TextNode)
return ((TextNode) node).text();
return "";
}Example 75
| Project: webmagic-master File: CssSelector.java View source code |
protected String getText(Element element) {
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
accum.append(textNode.text());
}
}
return accum.toString();
}Example 76
| Project: karma-exchange-master File: HtmlUtil.java View source code |
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
// TextNodes carry all user-readable text in the DOM.
append(((TextNode) node).text());
else if (name.equals("li"))
append("\n * ");
}Example 77
| Project: nate-master File: JsoupBackedNateDocumentFragment.java View source code |
@Override
public void setTextContent(String text) {
this.pseudoRoot = createPseudoRootElement();
this.pseudoRoot.appendChild(new TextNode(text, JsoupBackedNateDocumentFactory.BASE_URI));
}Example 78
| Project: opensearchserver-master File: JSoupHtmlNode.java View source code |
@Override
public String getText() {
TextNode textNode = (TextNode) node;
return textNode.text();
}Example 79
| Project: FitGoodies-master File: FitCell.java View source code |
public void info(String message) {
if (message == null) {
return;
}
rawInfo(new TextNode(message, td.baseUri()).outerHtml());
}Example 80
| Project: StartupNews-master File: BaseHTMLParser.java View source code |
public static String getFirstTextValueInElementChildren(Element element) {
if (element == null) {
return "";
}
for (org.jsoup.nodes.Node node : element.childNodes()) {
if (node instanceof TextNode) {
return ((TextNode) node).text();
}
}
return "";
}Example 81
| Project: FudanBBS-master File: HtmlToPlainText.java View source code |
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
// TextNodes carry all user-readable text in the DOM.
append(((TextNode) node).text());
else if (name.equals("li"))
append("\n * ");
}Example 82
| Project: jenkinsmobi-api-master File: GoogleSsoHandler.java View source code |
private String getDivText(final Element errorDiv) {
for (final Node child : errorDiv.childNodes()) {
if (child instanceof TextNode) {
return ((TextNode) child).getWholeText().trim();
}
}
return "";
}Example 83
| Project: tika-wrapper-master File: HtmlToPlaintTextSimple.java View source code |
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
// TextNodes carry all user-readable text in the DOM.
append(((TextNode) node).text());
else if (name.equals("li"))
append("\n * ");
}Example 84
| Project: validadorAcessibilidade-master File: HtmlToPlainText.java View source code |
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
// TextNodes carry all user-readable text in the DOM.
append(((TextNode) node).text());
else if (name.equals("li"))
append("\n * ");
}Example 85
| Project: zafu_jwc-master File: HtmlToPlainText.java View source code |
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
// TextNodes carry all user-readable text in the DOM.
append(((TextNode) node).text());
else if (name.equals("li"))
append("\n * ");
}Example 86
| Project: act-master File: PatentDocument.java View source code |
@Override
public void head(org.jsoup.nodes.Node node, int i) {
// This borrows a page from HtmlToPlainText's book.
if (node instanceof TextNode) {
String text = ((TextNode) node).text();
if (text != null && text.length() > 0) {
segmentBuilder.append(((TextNode) node).text());
}
}
}