Java Examples for org.jsoup.nodes.Node

The following java examples will help you to understand the usage of org.jsoup.nodes.Node. These source code samples are taken from different open source projects.

Example 1
Project: mechanize-master  File: JsoupNodeHelper.java View source code
@Override
public Index getIndexInParent(final Node node, final boolean byType) {
    String type = byType ? getName(node) : Selector.UNIVERSAL_TAG;
    List<? extends Node> children;
    Node parent = node.parent();
    if (parent == null)
        children = Collections.emptyList();
    else
        children = getChildNodes(parent, type);
    return new Index(children.indexOf(node), children.size());
}
Example 2
Project: serverside-elements-master  File: RootImpl.java View source code
private void addCommand(String name, Node target, JsonValue... params) {
    assert target == null || target.getRoot() == this;
    JsonArray c = Json.createArray();
    c.set(0, name);
    if (target != null) {
        c.set(1, nodeToId.get(target).doubleValue());
    }
    Arrays.asList(params).forEach( p -> c.set(c.length(), p));
    pendingCommands.set(pendingCommands.length(), c);
    owner.markAsDirty();
}
Example 3
Project: metricminer2-master  File: HtmlNodeVisitor.java View source code
private void visitNodeByClass(Node node) {
    Method visitorMethod = findVisitorMethodForNodeClass(node);
    if (visitorMethod != null && visitorMethod.getReturnType().equals(void.class)) {
        try {
            visitorMethod.invoke(this, node);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
Example 4
Project: moulder-j-master  File: SubMoulder.java View source code
public List<Node> process(Element element) {
    final Document doc = new Document(element.baseUri());
    final Element copy = JsoupHelper.copy(element);
    doc.appendChild(copy);
    for (TemplatorConfig c : registry.getConfig()) {
        Elements elements = copy.select(c.selector);
        for (Element e : elements) {
            Collection<Node> oes = MouldersApplier.applyMoulders(c.templators, Arrays.<Node>asList(e));
            // replace e with oes
            for (Node oe : oes) {
                e.before(oe.outerHtml());
            }
            e.remove();
        }
    }
    return doc.childNodes();
}
Example 5
Project: CN1ML-NetbeansModule-master  File: ElementsTest.java View source code
@Test
public void traverse() {
    Document doc = Jsoup.parse("<div><p>Hello</p></div><div>There</div>");
    final StringBuilder accum = new StringBuilder();
    doc.select("div").traverse(new NodeVisitor() {

        public void head(Node node, int depth) {
            accum.append("<" + node.nodeName() + ">");
        }

        public void tail(Node node, int depth) {
            accum.append("</" + node.nodeName() + ">");
        }
    });
    assertEquals("<div><p><#text></#text></p></div><div><#text></#text></div>", accum.toString());
}
Example 6
Project: Crud2Go-master  File: LoadingIndicatorBootstrapListener.java View source code
@Override
public void modifyBootstrapFragment(BootstrapFragmentResponse response) {
    String message = getMessage(response);
    if (!Strings.isNullOrEmpty(message)) {
        List<Node> nodes = response.getFragmentNodes();
        for (Node node : nodes) {
            if (isMainDiv(node)) {
                addLoadingIndicator((Element) node, message);
            }
        }
    }
}
Example 7
Project: FudanBBS-master  File: Parser.java View source code
/**
     * Parse a fragment of HTML into the {@code body} of a Document.
     *
     * @param bodyHtml fragment of HTML
     * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
     *
     * @return Document, with empty head, and HTML parsed into body
     */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    // the node list gets modified when re-parented
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
Example 8
Project: jsoup-master  File: ElementsTest.java View source code
@Test
public void traverse() {
    Document doc = Jsoup.parse("<div><p>Hello</p></div><div>There</div>");
    final StringBuilder accum = new StringBuilder();
    doc.select("div").traverse(new NodeVisitor() {

        public void head(Node node, int depth) {
            accum.append("<" + node.nodeName() + ">");
        }

        public void tail(Node node, int depth) {
            accum.append("</" + node.nodeName() + ">");
        }
    });
    assertEquals("<div><p><#text></#text></p></div><div><#text></#text></div>", accum.toString());
}
Example 9
Project: validadorAcessibilidade-master  File: Parser.java View source code
/**
     * Parse a fragment of HTML into the {@code body} of a Document.
     *
     * @param bodyHtml fragment of HTML
     * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
     *
     * @return Document, with empty head, and HTML parsed into body
     */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    // the node list gets modified when re-parented
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
Example 10
Project: zafu_jwc-master  File: Parser.java View source code
/**
     * Parse a fragment of HTML into the {@code body} of a Document.
     *
     * @param bodyHtml fragment of HTML
     * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
     *
     * @return Document, with empty head, and HTML parsed into body
     */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    // the node list gets modified when re-parented
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
Example 11
Project: Vega-master  File: NodeImpl.java View source code
static NodeImpl createFromJsoupNode(org.jsoup.nodes.Node node, Document ownerDocument) {
    if (node == null)
        return null;
    else if (node instanceof org.jsoup.nodes.Element)
        return HTMLElementImpl.create((Element) node, ownerDocument);
    else if (node instanceof org.jsoup.nodes.TextNode)
        return new TextImpl((org.jsoup.nodes.TextNode) node, ownerDocument);
    else if (node instanceof org.jsoup.nodes.Comment)
        return new CommentImpl((org.jsoup.nodes.Comment) node, ownerDocument);
    else if (node instanceof org.jsoup.nodes.DataNode)
        return new CharacterDataImpl((DataNode) node, ((DataNode) node).getWholeData(), ownerDocument);
    else
        return new NodeImpl(node, ownerDocument);
}
Example 12
Project: alfresco-apache-storm-demo-master  File: JSoupDOMBuilder.java View source code
/**
     * The internal helper that copies content from the specified Jsoup <tt>Node</tt> into a W3C
     * {@link Node}.
     *
     * @param node The Jsoup node containing the content to copy to the specified W3C {@link Node}.
     * @param out The W3C {@link Node} that receives the DOM content.
     */
public static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
    if (node instanceof org.jsoup.nodes.Document) {
        org.jsoup.nodes.Document d = ((org.jsoup.nodes.Document) node);
        for (org.jsoup.nodes.Node n : d.childNodes()) {
            createDOM(n, out, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.Element) {
        org.jsoup.nodes.Element e = ((org.jsoup.nodes.Element) node);
        org.w3c.dom.Element _e = doc.createElement(e.tagName());
        out.appendChild(_e);
        org.jsoup.nodes.Attributes atts = e.attributes();
        for (org.jsoup.nodes.Attribute a : atts) {
            String attName = a.getKey();
            // omit xhtml namespace
            if (attName.equals("xmlns")) {
                continue;
            }
            String attPrefix = getNSPrefix(attName);
            if (attPrefix != null) {
                if (attPrefix.equals("xmlns")) {
                    ns.put(getLocalName(attName), a.getValue());
                } else if (!attPrefix.equals("xml")) {
                    String namespace = ns.get(attPrefix);
                    if (namespace == null) {
                        // fix attribute names looking like qnames
                        attName = attName.replace(':', '_');
                    }
                }
            }
            _e.setAttribute(attName, a.getValue());
        }
        for (org.jsoup.nodes.Node n : e.childNodes()) {
            createDOM(n, _e, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.TextNode) {
        org.jsoup.nodes.TextNode t = ((org.jsoup.nodes.TextNode) node);
        if (!(out instanceof Document)) {
            out.appendChild(doc.createTextNode(t.text()));
        }
    }
}
Example 13
Project: stanbol-master  File: DOMBuilder.java View source code
/**
   * The internal helper that copies content from the specified Jsoup <tt>Node</tt> into a W3C {@link Node}.
   * @param node The Jsoup node containing the content to copy to the specified W3C {@link Node}.
   * @param out The W3C {@link Node} that receives the DOM content.
   */
private static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
    if (node instanceof org.jsoup.nodes.Document) {
        org.jsoup.nodes.Document d = ((org.jsoup.nodes.Document) node);
        for (org.jsoup.nodes.Node n : d.childNodes()) {
            createDOM(n, out, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.Element) {
        org.jsoup.nodes.Element e = ((org.jsoup.nodes.Element) node);
        org.w3c.dom.Element _e = doc.createElement(e.tagName());
        out.appendChild(_e);
        org.jsoup.nodes.Attributes atts = e.attributes();
        for (org.jsoup.nodes.Attribute a : atts) {
            String attName = a.getKey();
            //omit xhtml namespace
            if (attName.equals("xmlns")) {
                continue;
            }
            String attPrefix = getNSPrefix(attName);
            if (attPrefix != null) {
                if (attPrefix.equals("xmlns")) {
                    ns.put(getLocalName(attName), a.getValue());
                } else if (!attPrefix.equals("xml")) {
                    String namespace = ns.get(attPrefix);
                    if (namespace == null) {
                        //fix attribute names looking like qnames
                        attName = attName.replace(':', '_');
                    }
                }
            }
            _e.setAttribute(attName, a.getValue());
        }
        for (org.jsoup.nodes.Node n : e.childNodes()) {
            createDOM(n, _e, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.TextNode) {
        org.jsoup.nodes.TextNode t = ((org.jsoup.nodes.TextNode) node);
        if (!(out instanceof Document)) {
            out.appendChild(doc.createTextNode(t.text()));
        }
    }
}
Example 14
Project: tori-master  File: DOMBuilder.java View source code
/**
     * The internal helper that copies content from the specified Jsoup
     * <tt>Node</tt> into a W3C {@link Node}.
     * 
     * @param node
     *            The Jsoup node containing the content to copy to the specified
     *            W3C {@link Node}.
     * @param out
     *            The W3C {@link Node} that receives the DOM content.
     */
private static void createDOM(final org.jsoup.nodes.Node node, final Node out, final Document doc, final Map<String, String> ns) {
    if (node instanceof org.jsoup.nodes.Document) {
        org.jsoup.nodes.Document d = ((org.jsoup.nodes.Document) node);
        for (org.jsoup.nodes.Node n : d.childNodes()) {
            createDOM(n, out, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.Element) {
        org.jsoup.nodes.Element e = ((org.jsoup.nodes.Element) node);
        org.w3c.dom.Element _e = doc.createElement(e.tagName());
        out.appendChild(_e);
        org.jsoup.nodes.Attributes atts = e.attributes();
        for (org.jsoup.nodes.Attribute a : atts) {
            String attName = a.getKey();
            // omit xhtml namespace
            if (attName.equals("xmlns")) {
                continue;
            }
            String attPrefix = getNSPrefix(attName);
            if (attPrefix != null) {
                if (attPrefix.equals("xmlns")) {
                    ns.put(getLocalName(attName), a.getValue());
                } else if (!attPrefix.equals("xml")) {
                    String namespace = ns.get(attPrefix);
                    if (namespace == null) {
                        // fix attribute names looking like qnames
                        attName = attName.replace(':', '_');
                    }
                }
            }
            _e.setAttribute(attName, a.getValue());
        }
        for (org.jsoup.nodes.Node n : e.childNodes()) {
            createDOM(n, _e, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.TextNode) {
        org.jsoup.nodes.TextNode t = ((org.jsoup.nodes.TextNode) node);
        if (!(out instanceof Document)) {
            out.appendChild(doc.createTextNode(t.text()));
        }
    }
}
Example 15
Project: sisob-academic-data-extractor-master  File: cUtils.java View source code
public static String lookPatternByText(boolean bReverse, org.jsoup.nodes.Node lst1, org.jsoup.nodes.Node lst2) {
    String s1 = lst1.toString();
    String s2 = lst2.toString();
    String sPattern = "";
    boolean bEnd = false;
    int i = 0;
    while (!bEnd && i < s1.length()) {
        int iAux = bReverse ? s1.length() - 1 - i : i;
        int iAux2 = bReverse ? s2.length() - 1 - i : i;
        if (s1.charAt(iAux) == s2.charAt(iAux2)) {
            if (bReverse)
                sPattern = s1.charAt(iAux) + sPattern;
            else
                sPattern += s1.charAt(iAux);
            i++;
        } else {
            bEnd = true;
        }
    }
    return sPattern;
}
Example 16
Project: jresponder-master  File: TextUtil.java View source code
/* ====================================================================== */
/**
	 * @param cell element that contains whitespace formatting
	 * @return
	 */
public String getWholeText(Element cell) {
    String text = null;
    List<Node> childNodes = cell.childNodes();
    if (childNodes.size() > 0) {
        Node childNode = childNodes.get(0);
        if (childNode instanceof TextNode) {
            text = ((TextNode) childNode).getWholeText();
        }
    }
    if (text == null) {
        text = cell.text();
    }
    return text;
}
Example 17
Project: mbox_tools-master  File: HTMLStripUtil.java View source code
@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        TextNode textNode = (TextNode) node;
        // non breaking space
        String text = textNode.text().replace(' ', ' ').trim();
        if (!text.isEmpty()) {
            buffer.append(text);
            if (!text.endsWith(" ")) {
                // the last text gets appended the extra space too but we remove it later
                buffer.append(" ");
            }
        }
    }
}
Example 18
Project: mylyn.docs-master  File: WhitespaceCleanupProcessor.java View source code
private void moveLeadingOrTrailingSpaceOutOfElements(Element body) {
    Set<Node> affectedParents = new HashSet<Node>();
    for (Element element : body.getAllElements()) {
        if (!Html.isWhitespacePreserve(element)) {
            normalizeTextNodes(element);
            List<Node> children = element.childNodes();
            if (!children.isEmpty()) {
                Node firstChild = children.get(0);
                if (firstChild instanceof TextNode) {
                    TextNode textNode = (TextNode) firstChild;
                    String text = textNode.getWholeText();
                    int nonWhitespaceIndex = firstIndexOfNonWhitespace(text);
                    if (nonWhitespaceIndex > 0) {
                        affectedParents.add(textNode.parent());
                        // split
                        textNode.splitText(nonWhitespaceIndex);
                        // move outside
                        textNode.remove();
                        computeBeforeTarget(element).before(textNode);
                        affectedParents.add(textNode.parent());
                    } else if (nonWhitespaceIndex == -1) {
                        // move outside
                        textNode.remove();
                        computeAfterTarget(element).after(textNode);
                        affectedParents.add(textNode.parent());
                    }
                }
                normalizeEmptySpaceBetweenNodes(element);
                children = element.childNodes();
                if (!children.isEmpty()) {
                    Node lastChild = children.get(children.size() - 1);
                    if (lastChild instanceof TextNode) {
                        TextNode textNode = (TextNode) lastChild;
                        String text = textNode.getWholeText();
                        int lastNonWhitespaceIndex = lastIndexOfNonWhitespace(text);
                        if (lastNonWhitespaceIndex < 0) {
                            // move outside
                            textNode.remove();
                            computeAfterTarget(element).after(textNode);
                            affectedParents.add(textNode.parent());
                        } else if (lastNonWhitespaceIndex < (text.length() - 1)) {
                            affectedParents.add(textNode.parent());
                            // split
                            textNode.splitText(lastNonWhitespaceIndex + 1);
                            // move outside
                            textNode = (TextNode) textNode.nextSibling();
                            textNode.remove();
                            computeAfterTarget(element).after(textNode);
                            affectedParents.add(textNode.parent());
                        }
                    }
                }
            }
            if (!affectedParents.isEmpty()) {
                for (Node parent : affectedParents) {
                    if (parent instanceof Element) {
                        normalizeTextNodes((Element) parent);
                    }
                }
                affectedParents.clear();
            }
        }
    }
}
Example 19
Project: org.eclipse.mylyn.docs-master  File: WhitespaceCleanupProcessor.java View source code
private void moveLeadingOrTrailingSpaceOutOfElements(Element body) {
    Set<Node> affectedParents = new HashSet<Node>();
    for (Element element : body.getAllElements()) {
        if (!Html.isWhitespacePreserve(element)) {
            normalizeTextNodes(element);
            List<Node> children = element.childNodes();
            if (!children.isEmpty()) {
                Node firstChild = children.get(0);
                if (firstChild instanceof TextNode) {
                    TextNode textNode = (TextNode) firstChild;
                    String text = textNode.getWholeText();
                    int nonWhitespaceIndex = firstIndexOfNonWhitespace(text);
                    if (nonWhitespaceIndex > 0) {
                        affectedParents.add(textNode.parent());
                        // split
                        textNode.splitText(nonWhitespaceIndex);
                        // move outside
                        textNode.remove();
                        computeBeforeTarget(element).before(textNode);
                        affectedParents.add(textNode.parent());
                    } else if (nonWhitespaceIndex == -1) {
                        // move outside
                        textNode.remove();
                        computeAfterTarget(element).after(textNode);
                        affectedParents.add(textNode.parent());
                    }
                }
                normalizeEmptySpaceBetweenNodes(element);
                children = element.childNodes();
                if (!children.isEmpty()) {
                    Node lastChild = children.get(children.size() - 1);
                    if (lastChild instanceof TextNode) {
                        TextNode textNode = (TextNode) lastChild;
                        String text = textNode.getWholeText();
                        int lastNonWhitespaceIndex = lastIndexOfNonWhitespace(text);
                        if (lastNonWhitespaceIndex < 0) {
                            // move outside
                            textNode.remove();
                            computeAfterTarget(element).after(textNode);
                            affectedParents.add(textNode.parent());
                        } else if (lastNonWhitespaceIndex < (text.length() - 1)) {
                            affectedParents.add(textNode.parent());
                            // split
                            textNode.splitText(lastNonWhitespaceIndex + 1);
                            // move outside
                            textNode = (TextNode) textNode.nextSibling();
                            textNode.remove();
                            computeAfterTarget(element).after(textNode);
                            affectedParents.add(textNode.parent());
                        }
                    }
                }
            }
            if (!affectedParents.isEmpty()) {
                for (Node parent : affectedParents) {
                    if (parent instanceof Element) {
                        normalizeTextNodes((Element) parent);
                    }
                }
                affectedParents.clear();
            }
        }
    }
}
Example 20
Project: SMSnatcher-master  File: ParseUtils.java View source code
// Lovingly borrowed from https://gist.github.com/491407
public static void removeComments(Node node) {
    // as we are removing child nodes while iterating, we cannot use a normal foreach over children,
    // or will get a concurrent list modification error.
    int i = 0;
    while (i < node.childNodes().size()) {
        Node child = node.childNode(i);
        if (child.nodeName().equals("#comment"))
            child.remove();
        else {
            removeComments(child);
            i++;
        }
    }
}
Example 21
Project: WebCollector-master  File: ContentExtractor.java View source code
protected CountInfo computeInfo(Node node) {
    if (node instanceof Element) {
        Element tag = (Element) node;
        CountInfo countInfo = new CountInfo();
        for (Node childNode : tag.childNodes()) {
            CountInfo childCountInfo = computeInfo(childNode);
            countInfo.textCount += childCountInfo.textCount;
            countInfo.linkTextCount += childCountInfo.linkTextCount;
            countInfo.tagCount += childCountInfo.tagCount;
            countInfo.linkTagCount += childCountInfo.linkTagCount;
            countInfo.leafList.addAll(childCountInfo.leafList);
            countInfo.densitySum += childCountInfo.density;
            countInfo.pCount += childCountInfo.pCount;
        }
        countInfo.tagCount++;
        String tagName = tag.tagName();
        if (tagName.equals("a")) {
            countInfo.linkTextCount = countInfo.textCount;
            countInfo.linkTagCount++;
        } else if (tagName.equals("p")) {
            countInfo.pCount++;
        }
        int pureLen = countInfo.textCount - countInfo.linkTextCount;
        int len = countInfo.tagCount - countInfo.linkTagCount;
        if (pureLen == 0 || len == 0) {
            countInfo.density = 0;
        } else {
            countInfo.density = (pureLen + 0.0) / len;
        }
        infoMap.put(tag, countInfo);
        return countInfo;
    } else if (node instanceof TextNode) {
        TextNode tn = (TextNode) node;
        CountInfo countInfo = new CountInfo();
        String text = tn.text();
        int len = text.length();
        countInfo.textCount = len;
        countInfo.leafList.add(len);
        return countInfo;
    } else {
        return new CountInfo();
    }
}
Example 22
Project: XCoLab-master  File: EmailNotification.java View source code
@Override
protected Node resolvePlaceholderTag(Element tag) {
    final Node node = super.resolvePlaceholderTag(tag);
    if (node != null) {
        return node;
    }
    Contest contest = getContest();
    Proposal proposal = getProposal();
    final boolean hasProposal = contest != null && proposal != null;
    final ContestType contestType = contest != null ? ContestClientUtil.getContestType(contest.getContestTypeId()) : null;
    switch(tag.nodeName()) {
        case COLAB_NAME_PLACEHOLDER:
            return new TextNode(ConfigurationAttributeKey.COLAB_NAME.get(), "");
        case COLAB_URL_PLACEHOLDER:
            return new TextNode(ConfigurationAttributeKey.COLAB_URL.get(), "");
        case COLAB_ADMIN_EMAIL_PLACEHOLDER:
            return new TextNode(ConfigurationAttributeKey.ADMIN_EMAIL.get(), "");
        case FIRSTNAME_PLACEHOLDER:
            return new TextNode(getRecipient().getFirstName(), "");
        case FULL_NAME_PLACEHOLDER:
            return new TextNode(getRecipient().getFullName(), "");
        case CONTEST_LINK_PLACEHOLDER:
            if (contest != null) {
                return parseXmlNode(getContestLink(contest));
            }
            break;
        case PROPOSAL_LINK_PLACEHOLDER:
            if (hasProposal) {
                final String tab = tag.hasAttr("tab") ? tag.attr("tab") : null;
                final String linkText;
                if (StringUtils.isNotBlank(tag.ownText())) {
                    linkText = tag.ownText();
                } else {
                    linkText = getProposalAttributeHelper().getAttributeValueString(ProposalAttributeKeys.NAME, "");
                }
                return parseXmlNode(getProposalLinkWithLinkText(contest, proposal, linkText, tab));
            }
            break;
        case PROPOSAL_STRING_PLACEHOLDER:
            if (contest != null && contestType != null) {
                return new TextNode(contestType.getProposalName(), "");
            }
            break;
        case PROPOSALS_STRING_PLACEHOLDER:
            if (contest != null && contestType != null) {
                return new TextNode(contestType.getProposalNamePlural(), "");
            }
            break;
        case CONTEST_STRING_PLACEHOLDER:
            if (contest != null && contestType != null) {
                return new TextNode(contestType.getContestName(), "");
            }
            break;
        case CONTESTS_STRING_PLACEHOLDER:
            if (contest != null && contestType != null) {
                return new TextNode(contestType.getContestNamePlural(), "");
            }
            break;
        case TWITTER_PLACEHOLDER:
            if (hasProposal) {
                return parseXmlNode(getTwitterShareLink(getProposalLinkUrl(contest, proposal), tag.ownText()));
            }
            break;
        case PINTEREST_PLACEHOLDER:
            if (hasProposal) {
                return parseXmlNode(getPinterestShareLink(getProposalLinkUrl(contest, proposal), tag.ownText()));
            }
            break;
        case FACEBOOK_PLACEHOLDER:
            if (hasProposal) {
                return parseXmlNode(getFacebookShareLink(getProposalLinkUrl(contest, proposal)));
            }
            break;
        case LINKEDIN_PLACEHOLDER:
            if (hasProposal) {
                return parseXmlNode(getLinkedInShareLink(getProposalLinkUrl(contest, proposal), tag.attr("title"), tag.ownText()));
            }
            break;
        default:
    }
    return null;
}
Example 23
Project: iee-master  File: TextPadParser.java View source code
@Override
public void head(org.jsoup.nodes.Node node, int depth) {
    INode newNode;
    if (node instanceof org.jsoup.nodes.TextNode) {
        newNode = new Text().setText(((org.jsoup.nodes.TextNode) node).text());
    } else if (node instanceof org.jsoup.nodes.Element) {
        Span span = new Span();
        if (node.hasAttr("style")) {
            TextStyle style = span.getStyle();
            try {
                CSSStyleDeclaration styleDecl = parser.parseStyleDeclaration(new InputSource(new StringReader(node.attr("style"))));
                if ("italic".equals(styleDecl.getPropertyValue("font-style"))) {
                    style.setItalic(true);
                } else {
                    style.setItalic(false);
                }
                if ("bold".equals(styleDecl.getPropertyValue("font-weight"))) {
                    style.setBold(true);
                } else {
                    style.setBold(false);
                }
                if (styleDecl.getPropertyValue("font-family") != null) {
                    style.setFont(styleDecl.getPropertyValue("font-family"));
                }
                if (!Strings.isNullOrEmpty(styleDecl.getPropertyValue("font-size"))) {
                    style.setFontSize(new Integer(styleDecl.getPropertyValue("font-size")));
                }
                if (!Strings.isNullOrEmpty(styleDecl.getPropertyValue("color"))) {
                    RGBColor rgbColorValue = ((CSSPrimitiveValue) styleDecl.getPropertyCSSValue("color")).getRGBColorValue();
                    Color fg = new Color((int) rgbColorValue.getRed().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getGreen().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getBlue().getFloatValue(CSSPrimitiveValue.CSS_NUMBER));
                    style.setFgColor(fg);
                }
                if (!Strings.isNullOrEmpty(styleDecl.getPropertyValue("background-color"))) {
                    RGBColor rgbColorValue = ((CSSPrimitiveValue) styleDecl.getPropertyCSSValue("background-color")).getRGBColorValue();
                    Color bg = new Color((int) rgbColorValue.getRed().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getGreen().getFloatValue(CSSPrimitiveValue.CSS_NUMBER), (int) rgbColorValue.getBlue().getFloatValue(CSSPrimitiveValue.CSS_NUMBER));
                    style.setBgColor(bg);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        newNode = span;
    } else {
        newNode = new Span();
    }
    stack.push(newNode);
}
Example 24
Project: blade.tools-master  File: MarkdownParser.java View source code
private static Map<String, String> parseHtml(String html) {
    Map<String, String> retval = new HashMap<>();
    Document document = Jsoup.parse(html);
    Elements elements = document.select("a[href] > h3");
    for (Element h3 : elements) {
        Element a = h3.parent();
        int index = a.siblingIndex();
        List<Node> siblings = a.siblingNodes();
        StringBuilder sb = new StringBuilder();
        List<Node> interesting = new ArrayList<>();
        for (int i = index; i < siblings.size(); i++) {
            Node sibling = siblings.get(i);
            if (sibling.toString().startsWith("<hr")) {
                break;
            } else {
                interesting.add(sibling);
            }
        }
        for (Node node : interesting) {
            sb.append(node.toString());
        }
        String href = a.attr("href");
        retval.put(href, sb.toString());
    }
    return retval;
}
Example 25
Project: java-autolinker-master  File: UrlAutoLinkerTest.java View source code
@Test
public void createLinksShouldWork() {
    final UrlAutoLinker autoLinker = new UrlAutoLinker(30);
    List<Node> result;
    Element a;
    result = autoLinker.createLinks(new TextNode("das ist ein test ohne urls", ""));
    Assert.assertTrue(result.size() == 1);
    Assert.assertTrue(result.get(0) instanceof TextNode);
    Assert.assertEquals("das ist ein test ohne urls", ((TextNode) result.get(0)).getWholeText());
    result = autoLinker.createLinks(new TextNode("das ist eine url ohne twitter.com ohne protocoll", ""));
    Assert.assertTrue(result.size() == 3);
    Assert.assertTrue(result.get(0) instanceof TextNode);
    Assert.assertEquals("das ist eine url ohne ", ((TextNode) result.get(0)).getWholeText());
    Assert.assertTrue(result.get(1) instanceof Element);
    a = (Element) result.get(1);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("http://twitter.com", a.attr("href"));
    Assert.assertEquals("http://twitter.com", a.attr("title"));
    Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
    Assert.assertTrue(result.get(2) instanceof TextNode);
    Assert.assertEquals(" ohne protocoll", ((TextNode) result.get(2)).getWholeText());
    result = autoLinker.createLinks(new TextNode("twitter.com ohne protocoll am anfang", ""));
    Assert.assertTrue(result.size() == 2);
    Assert.assertTrue(result.get(0) instanceof Element);
    a = (Element) result.get(0);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("http://twitter.com", a.attr("href"));
    Assert.assertEquals("http://twitter.com", a.attr("title"));
    Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
    Assert.assertTrue(result.get(1) instanceof TextNode);
    Assert.assertEquals(" ohne protocoll am anfang", ((TextNode) result.get(1)).getWholeText());
    result = autoLinker.createLinks(new TextNode("twitter.com ohne protocoll am anfang mit am ende https://dailyfratze.de?foo=bar", ""));
    Assert.assertTrue(result.size() == 3);
    Assert.assertTrue(result.get(0) instanceof Element);
    a = (Element) result.get(0);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("http://twitter.com", a.attr("href"));
    Assert.assertEquals("http://twitter.com", a.attr("title"));
    Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
    Assert.assertTrue(result.get(1) instanceof TextNode);
    Assert.assertEquals(" ohne protocoll am anfang mit am ende ", ((TextNode) result.get(1)).getWholeText());
    Assert.assertTrue(result.get(2) instanceof Element);
    a = (Element) result.get(2);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("href"));
    Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("title"));
    Assert.assertEquals("dailyfratze.de?foo=bar", ((TextNode) a.childNode(0)).getWholeText());
    Assert.assertTrue(result.get(2) instanceof Element);
    a = (Element) result.get(2);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("href"));
    Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("title"));
    Assert.assertEquals("dailyfratze.de?foo=bar", ((TextNode) a.childNode(0)).getWholeText());
    result = autoLinker.createLinks(new TextNode("das ist eine url ohne https://dailyfratze.de/app/tags/CoStarring/Anton#taggedPictures ohne protocoll", ""));
    Assert.assertTrue(result.get(1) instanceof Element);
    a = (Element) result.get(1);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("https://dailyfratze.de/app/tags/CoStarring/Anton#taggedPictures", a.attr("href"));
    Assert.assertEquals("https://dailyfratze.de/app/tags/CoStarring/Anton#taggedPictures", a.attr("title"));
    Assert.assertEquals("dailyfratze.de/app/tags/CoSta…", ((TextNode) a.childNode(0)).getWholeText());
    result = autoLinker.createLinks(new TextNode("  twitter.com ohne protocoll am anfang mit am ende https://dailyfratze.de?foo=bar  ", ""));
    Assert.assertTrue(result.size() == 5);
    Assert.assertTrue(result.get(0) instanceof TextNode);
    Assert.assertEquals("  ", ((TextNode) result.get(0)).getWholeText());
    Assert.assertTrue(result.get(1) instanceof Element);
    a = (Element) result.get(1);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("http://twitter.com", a.attr("href"));
    Assert.assertEquals("http://twitter.com", a.attr("title"));
    Assert.assertEquals("twitter.com", ((TextNode) a.childNode(0)).getWholeText());
    Assert.assertTrue(result.get(2) instanceof TextNode);
    Assert.assertEquals(" ohne protocoll am anfang mit am ende ", ((TextNode) result.get(2)).getWholeText());
    Assert.assertTrue(result.get(3) instanceof Element);
    a = (Element) result.get(3);
    Assert.assertEquals("a", a.tagName());
    Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("href"));
    Assert.assertEquals("https://dailyfratze.de?foo=bar", a.attr("title"));
    Assert.assertEquals("dailyfratze.de?foo=bar", ((TextNode) a.childNode(0)).getWholeText());
    Assert.assertTrue(result.get(4) instanceof TextNode);
    Assert.assertEquals("  ", ((TextNode) result.get(4)).getWholeText());
}
Example 26
Project: nate-master  File: JsoupBackedNateElement.java View source code
@Override
public void replaceChildren(NateDocument newChildrenSource) {
    verifyState();
    removeChildren();
    if (!(newChildrenSource instanceof JsoupBackedNateDocumentFragment)) {
        throw new IllegalStateException("Internal Error.  Expected JsoupBackedNateDocumentFragment, but got: " + newChildrenSource);
    }
    Collection<Node> newChildren = ((JsoupBackedAbstractNode) newChildrenSource).getJsoupNodes();
    for (Node node : newChildren) {
        this.element.appendChild(node.clone());
    }
}
Example 27
Project: sitebricks-master  File: HtmlTemplateCompiler.java View source code
/**
     * Walks the DOM recursively, and converts elements into corresponding sitebricks widgets.
     */
@NotNull
private <N extends Node> WidgetChain walk(PageCompilingContext pc, N node) {
    WidgetChain widgetChain = Chains.proceeding();
    for (Node n : node.childNodes()) {
        if (n instanceof Element) {
            final Element child = (Element) n;
            //push form if this is a form tag
            if (child.tagName().equals("form"))
                pc.form = (Element) n;
            //setup a lexical scope if we're going into a repeat widget (by reading the previous node)
            final boolean shouldPopScope = lexicalClimb(pc, child);
            //continue recursing down, perform a post-order, depth-first traversal of the DOM
            WidgetChain childsChildren;
            try {
                childsChildren = walk(pc, child);
                //process the widget itself into a Renderable with child tree
                widgetChain.addWidget(widgetize(pc, child, childsChildren));
            } finally {
                lexicalDescend(pc, child, shouldPopScope);
            }
        } else if (n instanceof TextNode) {
            TextNode child = (TextNode) n;
            Renderable textWidget;
            //setup a lexical scope if we're going into a repeat widget (by reading the previous node)
            final boolean shouldPopScope = lexicalClimb(pc, child);
            // construct the text widget
            try {
                textWidget = registry.textWidget(cleanHtml(n), pc.lexicalScopes.peek());
                // if there are no annotations, add the text widget to the chain
                if (!child.hasAttr(ANNOTATION_KEY)) {
                    widgetChain.addWidget(textWidget);
                } else {
                    // construct a new widget chain for this text node 
                    WidgetChain childsChildren = Chains.proceeding().addWidget(textWidget);
                    // make a new widget for the annotation, making the text chain the child
                    String widgetName = child.attr(ANNOTATION_KEY).toLowerCase();
                    Renderable annotationWidget = registry.newWidget(widgetName, child.attr(ANNOTATION_CONTENT), childsChildren, pc.lexicalScopes.peek());
                    widgetChain.addWidget(annotationWidget);
                }
            } catch (ExpressionCompileException e) {
                pc.errors.add(CompileError.in(node.outerHtml()).near(line(n)).causedBy(e));
            }
            if (shouldPopScope)
                pc.lexicalScopes.pop();
        } else if ((n instanceof Comment) || (n instanceof DataNode)) {
            //process as raw text widget
            try {
                widgetChain.addWidget(registry.textWidget(cleanHtml(n), pc.lexicalScopes.peek()));
            } catch (ExpressionCompileException e) {
                pc.errors.add(CompileError.in(node.outerHtml()).near(line(node)).causedBy(e));
            }
        } else if (n instanceof XmlDeclaration) {
            try {
                widgetChain.addWidget(registry.xmlDirectiveWidget(((XmlDeclaration) n).getWholeDeclaration(), pc.lexicalScopes.peek()));
            } catch (ExpressionCompileException e) {
                pc.errors.add(CompileError.in(node.outerHtml()).near(line(node)).causedBy(e));
            }
        }
    }
    //return computed chain, or a terminal
    return widgetChain;
}
Example 28
Project: Tanaguru-master  File: HTMLJsoupCleanerImpl.java View source code
/**
     * Remove the comments of the page 
     * 
     * @param node 
     */
private void removeComments(Node node) {
    // as we are removing child nodes while iterating, we cannot use a normal foreach over children,
    // or will get a concurrent list modification error.
    int i = 0;
    while (i < node.childNodes().size()) {
        Node child = node.childNode(i);
        if (child.nodeName().equals("#comment"))
            child.remove();
        else {
            removeComments(child);
            i++;
        }
    }
}
Example 29
Project: storm-crawler-master  File: JSoupDOMBuilder.java View source code
/**
     * The internal helper that copies content from the specified Jsoup
     * <tt>Node</tt> into a W3C {@link Node}.
     * 
     * @param node
     *            The Jsoup node containing the content to copy to the specified
     *            W3C {@link Node}.
     * @param out
     *            The W3C {@link Node} that receives the DOM content.
     */
public static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
    if (node instanceof org.jsoup.nodes.Document) {
        org.jsoup.nodes.Document d = (org.jsoup.nodes.Document) node;
        for (org.jsoup.nodes.Node n : d.childNodes()) {
            createDOM(n, out, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.Element) {
        org.jsoup.nodes.Element e = (org.jsoup.nodes.Element) node;
        org.w3c.dom.Element _e = doc.createElement(e.tagName());
        out.appendChild(_e);
        org.jsoup.nodes.Attributes atts = e.attributes();
        for (org.jsoup.nodes.Attribute a : atts) {
            String attName = a.getKey();
            // omit xhtml namespace
            if (attName.equals("xmlns")) {
                continue;
            }
            String attPrefix = getNSPrefix(attName);
            if (attPrefix != null) {
                if (attPrefix.equals("xmlns")) {
                    ns.put(getLocalName(attName), a.getValue());
                } else if (!attPrefix.equals("xml")) {
                    String namespace = ns.get(attPrefix);
                    if (namespace == null) {
                        // fix attribute names looking like qnames
                        attName = attName.replace(':', '_');
                    }
                }
            }
            _e.setAttribute(attName, a.getValue());
        }
        for (org.jsoup.nodes.Node n : e.childNodes()) {
            createDOM(n, _e, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.TextNode) {
        org.jsoup.nodes.TextNode t = (org.jsoup.nodes.TextNode) node;
        if (!(out instanceof Document)) {
            out.appendChild(doc.createTextNode(t.text()));
        }
    } else if (node instanceof org.jsoup.nodes.Comment) {
        if (!(out instanceof Document)) {
            org.jsoup.nodes.Comment comment = (org.jsoup.nodes.Comment) node;
            out.appendChild(doc.createComment(comment.getData()));
        }
    } else if (node instanceof org.jsoup.nodes.DataNode) {
        if (!(out instanceof Document)) {
            org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) node;
            String whole = sourceData.getWholeData();
            out.appendChild(doc.createTextNode(whole));
        }
    }
}
Example 30
Project: web-crawler-master  File: JSoupDOMBuilder.java View source code
/**
     * The internal helper that copies content from the specified Jsoup
     * <tt>Node</tt> into a W3C {@link Node}.
     * 
     * @param node
     *            The Jsoup node containing the content to copy to the specified
     *            W3C {@link Node}.
     * @param out
     *            The W3C {@link Node} that receives the DOM content.
     */
public static void createDOM(org.jsoup.nodes.Node node, Node out, Document doc, Map<String, String> ns) {
    if (node instanceof org.jsoup.nodes.Document) {
        org.jsoup.nodes.Document d = (org.jsoup.nodes.Document) node;
        for (org.jsoup.nodes.Node n : d.childNodes()) {
            createDOM(n, out, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.Element) {
        org.jsoup.nodes.Element e = (org.jsoup.nodes.Element) node;
        org.w3c.dom.Element _e = doc.createElement(e.tagName());
        out.appendChild(_e);
        org.jsoup.nodes.Attributes atts = e.attributes();
        for (org.jsoup.nodes.Attribute a : atts) {
            String attName = a.getKey();
            // omit xhtml namespace
            if (attName.equals("xmlns")) {
                continue;
            }
            String attPrefix = getNSPrefix(attName);
            if (attPrefix != null) {
                if (attPrefix.equals("xmlns")) {
                    ns.put(getLocalName(attName), a.getValue());
                } else if (!attPrefix.equals("xml")) {
                    String namespace = ns.get(attPrefix);
                    if (namespace == null) {
                        // fix attribute names looking like qnames
                        attName = attName.replace(':', '_');
                    }
                }
            }
            _e.setAttribute(attName, a.getValue());
        }
        for (org.jsoup.nodes.Node n : e.childNodes()) {
            createDOM(n, _e, doc, ns);
        }
    } else if (node instanceof org.jsoup.nodes.TextNode) {
        org.jsoup.nodes.TextNode t = (org.jsoup.nodes.TextNode) node;
        if (!(out instanceof Document)) {
            out.appendChild(doc.createTextNode(t.text()));
        }
    } else if (node instanceof org.jsoup.nodes.Comment) {
        if (!(out instanceof Document)) {
            org.jsoup.nodes.Comment comment = (org.jsoup.nodes.Comment) node;
            out.appendChild(doc.createComment(comment.getData()));
        }
    } else if (node instanceof org.jsoup.nodes.DataNode) {
        if (!(out instanceof Document)) {
            org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) node;
            String whole = sourceData.getWholeData();
            out.appendChild(doc.createTextNode(whole));
        }
    }
}
Example 31
Project: asta4d-master  File: Asta4DTagSupportHtmlTreeBuilderState.java View source code
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) {
    switch(t.type) {
        case Character:
            {
                Token.Character c = t.asCharacter();
                if (c.getData().equals(nullString)) {
                    // todo confirm that check
                    tb.error(this);
                    return false;
                } else if (tb.framesetOk() && isWhitespace(c)) {
                    // don't check if whitespace if frames already closed
                    tb.reconstructFormattingElements();
                    tb.insert(c);
                } else {
                    tb.reconstructFormattingElements();
                    tb.insert(c);
                    tb.framesetOk(false);
                }
                break;
            }
        case Comment:
            {
                tb.insert(t.asComment());
                break;
            }
        case Doctype:
            {
                tb.error(this);
                return false;
            }
        case StartTag:
            Token.StartTag startTag = t.asStartTag();
            String name = startTag.name();
            if (name.equals("html")) {
                tb.error(this);
                // merge attributes onto real html
                Element html = tb.getStack().getFirst();
                for (Attribute attribute : startTag.getAttributes()) {
                    if (!html.hasAttr(attribute.getKey()))
                        html.attributes().put(attribute);
                }
            } else if (StringUtil.in(name, Constants.InBodyStartToHead)) {
                return tb.process(t, InHead);
            } else if (name.equals("body")) {
                tb.error(this);
                LinkedList<Element> stack = tb.getStack();
                if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
                    // ignore
                    return false;
                } else {
                    tb.framesetOk(false);
                    Element body = stack.get(1);
                    for (Attribute attribute : startTag.getAttributes()) {
                        if (!body.hasAttr(attribute.getKey()))
                            body.attributes().put(attribute);
                    }
                }
            } else if (name.equals("frameset")) {
                tb.error(this);
                LinkedList<Element> stack = tb.getStack();
                if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
                    // ignore
                    return false;
                } else if (!tb.framesetOk()) {
                    // ignore frameset
                    return false;
                } else {
                    Element second = stack.get(1);
                    if (second.parent() != null)
                        second.remove();
                    // pop up to html element
                    while (stack.size() > 1) stack.removeLast();
                    tb.insert(startTag);
                    tb.transition(InFrameset);
                }
            } else if (StringUtil.in(name, Constants.InBodyStartPClosers)) {
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insert(startTag);
            } else if (StringUtil.in(name, Constants.Headings)) {
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                if (StringUtil.in(tb.currentElement().nodeName(), Constants.Headings)) {
                    tb.error(this);
                    tb.pop();
                }
                tb.insert(startTag);
            } else if (StringUtil.in(name, Constants.InBodyStartPreListing)) {
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insert(startTag);
                // todo: ignore LF if next token
                tb.framesetOk(false);
            } else if (name.equals("form")) {
                if (tb.getFormElement() != null) {
                    tb.error(this);
                    return false;
                }
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insertForm(startTag, true);
            } else if (name.equals("li")) {
                tb.framesetOk(false);
                LinkedList<Element> stack = tb.getStack();
                for (int i = stack.size() - 1; i > 0; i--) {
                    Element el = stack.get(i);
                    if (el.nodeName().equals("li")) {
                        tb.process(new Token.EndTag("li"));
                        break;
                    }
                    if (tb.isSpecial(el) && !StringUtil.in(el.nodeName(), Constants.InBodyStartLiBreakers))
                        break;
                }
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insert(startTag);
            } else if (StringUtil.in(name, Constants.DdDt)) {
                tb.framesetOk(false);
                LinkedList<Element> stack = tb.getStack();
                for (int i = stack.size() - 1; i > 0; i--) {
                    Element el = stack.get(i);
                    if (StringUtil.in(el.nodeName(), Constants.DdDt)) {
                        tb.process(new Token.EndTag(el.nodeName()));
                        break;
                    }
                    if (tb.isSpecial(el) && !StringUtil.in(el.nodeName(), Constants.InBodyStartLiBreakers))
                        break;
                }
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insert(startTag);
            } else if (name.equals("plaintext")) {
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insert(startTag);
                // once in, never gets out
                tb.tokeniser.transition(TokeniserState.PLAINTEXT);
            } else if (name.equals("button")) {
                if (tb.inButtonScope("button")) {
                    // close and reprocess
                    tb.error(this);
                    tb.process(new Token.EndTag("button"));
                    tb.process(startTag);
                } else {
                    tb.reconstructFormattingElements();
                    tb.insert(startTag);
                    tb.framesetOk(false);
                }
            } else if (name.equals("a")) {
                if (tb.getActiveFormattingElement("a") != null) {
                    tb.error(this);
                    tb.process(new Token.EndTag("a"));
                    // still on stack?
                    Element remainingA = tb.getFromStack("a");
                    if (remainingA != null) {
                        tb.removeFromActiveFormattingElements(remainingA);
                        tb.removeFromStack(remainingA);
                    }
                }
                tb.reconstructFormattingElements();
                Element a = tb.insert(startTag);
                tb.pushActiveFormattingElements(a);
            } else if (StringUtil.in(name, Constants.Formatters)) {
                tb.reconstructFormattingElements();
                Element el = tb.insert(startTag);
                tb.pushActiveFormattingElements(el);
            } else if (name.equals("nobr")) {
                tb.reconstructFormattingElements();
                if (tb.inScope("nobr")) {
                    tb.error(this);
                    tb.process(new Token.EndTag("nobr"));
                    tb.reconstructFormattingElements();
                }
                Element el = tb.insert(startTag);
                tb.pushActiveFormattingElements(el);
            } else if (StringUtil.in(name, Constants.InBodyStartApplets)) {
                tb.reconstructFormattingElements();
                tb.insert(startTag);
                tb.insertMarkerToFormattingElements();
                tb.framesetOk(false);
            } else if (name.equals("table")) {
                if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insert(startTag);
                tb.framesetOk(false);
                tb.transition(InTable);
            } else if (StringUtil.in(name, Constants.InBodyStartEmptyFormatters)) {
                tb.reconstructFormattingElements();
                tb.insertEmpty(startTag);
                tb.framesetOk(false);
            } else if (name.equals("input")) {
                tb.reconstructFormattingElements();
                Element el = tb.insertEmpty(startTag);
                if (!el.attr("type").equalsIgnoreCase("hidden"))
                    tb.framesetOk(false);
            } else if (StringUtil.in(name, Constants.InBodyStartMedia)) {
                tb.insertEmpty(startTag);
            } else if (name.equals("hr")) {
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.insertEmpty(startTag);
                tb.framesetOk(false);
            } else if (name.equals("image")) {
                if (tb.getFromStack("svg") == null)
                    // change <image> to <img>, unless in svg
                    return tb.process(startTag.name("img"));
                else
                    tb.insert(startTag);
            } else if (name.equals("isindex")) {
                // how much do we care about the early 90s?
                tb.error(this);
                if (tb.getFormElement() != null)
                    return false;
                tb.tokeniser.acknowledgeSelfClosingFlag();
                tb.process(new Token.StartTag("form"));
                if (startTag.attributes.hasKey("action")) {
                    Element form = tb.getFormElement();
                    form.attr("action", startTag.attributes.get("action"));
                }
                tb.process(new Token.StartTag("hr"));
                tb.process(new Token.StartTag("label"));
                // hope you like english.
                String prompt = startTag.attributes.hasKey("prompt") ? startTag.attributes.get("prompt") : "This is a searchable index. Enter search keywords: ";
                tb.process(new Token.Character(prompt));
                // input
                Attributes inputAttribs = new Attributes();
                for (Attribute attr : startTag.attributes) {
                    if (!StringUtil.in(attr.getKey(), Constants.InBodyStartInputAttribs))
                        inputAttribs.put(attr);
                }
                inputAttribs.put("name", "isindex");
                tb.process(new Token.StartTag("input", inputAttribs));
                tb.process(new Token.EndTag("label"));
                tb.process(new Token.StartTag("hr"));
                tb.process(new Token.EndTag("form"));
            } else if (name.equals("textarea")) {
                tb.insert(startTag);
                // todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next
                // one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
                tb.tokeniser.transition(TokeniserState.Rcdata);
                tb.markInsertionMode();
                tb.framesetOk(false);
                tb.transition(Text);
            } else if (name.equals("xmp")) {
                if (tb.inButtonScope("p")) {
                    tb.process(new Token.EndTag("p"));
                }
                tb.reconstructFormattingElements();
                tb.framesetOk(false);
                handleRawtext(startTag, tb);
            } else if (name.equals("iframe")) {
                tb.framesetOk(false);
                handleRawtext(startTag, tb);
            } else if (name.equals("noembed")) {
                // also handle noscript if script enabled
                handleRawtext(startTag, tb);
            } else if (name.equals("select")) {
                tb.reconstructFormattingElements();
                tb.insert(startTag);
                tb.framesetOk(false);
                Asta4DTagSupportHtmlTreeBuilderState state = tb.state();
                if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTableBody) || state.equals(InRow) || state.equals(InCell))
                    tb.transition(InSelectInTable);
                else
                    tb.transition(InSelect);
            } else if (StringUtil.in(name, Constants.InBodyStartOptions)) {
                if (tb.currentElement().nodeName().equals("option"))
                    tb.process(new Token.EndTag("option"));
                tb.reconstructFormattingElements();
                tb.insert(startTag);
            } else if (StringUtil.in(name, Constants.InBodyStartRuby)) {
                if (tb.inScope("ruby")) {
                    tb.generateImpliedEndTags();
                    if (!tb.currentElement().nodeName().equals("ruby")) {
                        tb.error(this);
                        // i.e. close up to but not include name
                        tb.popStackToBefore("ruby");
                    }
                    tb.insert(startTag);
                }
            } else if (name.equals("math")) {
                tb.reconstructFormattingElements();
                // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml)
                tb.insert(startTag);
                tb.tokeniser.acknowledgeSelfClosingFlag();
            } else if (name.equals("svg")) {
                tb.reconstructFormattingElements();
                // todo: handle A start tag whose tag name is "svg" (xlink, svg)
                tb.insert(startTag);
                tb.tokeniser.acknowledgeSelfClosingFlag();
            } else if (StringUtil.in(name, Constants.InBodyStartDrop)) {
                tb.error(this);
                return false;
            } else {
                tb.reconstructFormattingElements();
                tb.insert(startTag);
            }
            break;
        case EndTag:
            Token.EndTag endTag = t.asEndTag();
            name = endTag.name();
            if (name.equals("body")) {
                if (!tb.inScope("body")) {
                    tb.error(this);
                    return false;
                } else {
                    // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead,
                    // tr, body, html
                    tb.transition(AfterBody);
                }
            } else if (name.equals("html")) {
                boolean notIgnored = tb.process(new Token.EndTag("body"));
                if (notIgnored)
                    return tb.process(endTag);
            } else if (StringUtil.in(name, Constants.InBodyEndClosers)) {
                if (!tb.inScope(name)) {
                    // nothing to close
                    tb.error(this);
                    return false;
                } else {
                    tb.generateImpliedEndTags();
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    tb.popStackToClose(name);
                }
            } else if (name.equals("form")) {
                Element currentForm = tb.getFormElement();
                tb.setFormElement(null);
                if (currentForm == null || !tb.inScope(name)) {
                    tb.error(this);
                    return false;
                } else {
                    tb.generateImpliedEndTags();
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    // remove currentForm from stack. will shift anything under up.
                    tb.removeFromStack(currentForm);
                }
            } else if (name.equals("p")) {
                if (!tb.inButtonScope(name)) {
                    tb.error(this);
                    // if no p to close, creates an empty <p></p>
                    tb.process(new Token.StartTag(name));
                    return tb.process(endTag);
                } else {
                    tb.generateImpliedEndTags(name);
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    tb.popStackToClose(name);
                }
            } else if (name.equals("li")) {
                if (!tb.inListItemScope(name)) {
                    tb.error(this);
                    return false;
                } else {
                    tb.generateImpliedEndTags(name);
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    tb.popStackToClose(name);
                }
            } else if (StringUtil.in(name, Constants.DdDt)) {
                if (!tb.inScope(name)) {
                    tb.error(this);
                    return false;
                } else {
                    tb.generateImpliedEndTags(name);
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    tb.popStackToClose(name);
                }
            } else if (StringUtil.in(name, Constants.Headings)) {
                if (!tb.inScope(Constants.Headings)) {
                    tb.error(this);
                    return false;
                } else {
                    tb.generateImpliedEndTags(name);
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    tb.popStackToClose(Constants.Headings);
                }
            } else if (name.equals("sarcasm")) {
                // *sigh*
                return anyOtherEndTag(t, tb);
            } else if (StringUtil.in(name, Constants.InBodyEndAdoptionFormatters)) {
                // Adoption Agency Algorithm.
                OUTER: for (int i = 0; i < 8; i++) {
                    Element formatEl = tb.getActiveFormattingElement(name);
                    if (formatEl == null)
                        return anyOtherEndTag(t, tb);
                    else if (!tb.onStack(formatEl)) {
                        tb.error(this);
                        tb.removeFromActiveFormattingElements(formatEl);
                        return true;
                    } else if (!tb.inScope(formatEl.nodeName())) {
                        tb.error(this);
                        return false;
                    } else if (tb.currentElement() != formatEl)
                        tb.error(this);
                    Element furthestBlock = null;
                    Element commonAncestor = null;
                    boolean seenFormattingElement = false;
                    LinkedList<Element> stack = tb.getStack();
                    // the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents
                    // run-aways
                    final int stackSize = stack.size();
                    for (int si = 0; si < stackSize && si < 64; si++) {
                        Element el = stack.get(si);
                        if (el == formatEl) {
                            commonAncestor = stack.get(si - 1);
                            seenFormattingElement = true;
                        } else if (seenFormattingElement && tb.isSpecial(el)) {
                            furthestBlock = el;
                            break;
                        }
                    }
                    if (furthestBlock == null) {
                        tb.popStackToClose(formatEl.nodeName());
                        tb.removeFromActiveFormattingElements(formatEl);
                        return true;
                    }
                    // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements
                    // relative to the elements on either side of it in the list.
                    // does that mean: int pos of format el in list?
                    Element node = furthestBlock;
                    Element lastNode = furthestBlock;
                    INNER: for (int j = 0; j < 3; j++) {
                        if (tb.onStack(node))
                            node = tb.aboveOnStack(node);
                        if (!tb.isInActiveFormattingElements(node)) {
                            // note no bookmark check
                            tb.removeFromStack(node);
                            continue INNER;
                        } else if (node == formatEl)
                            break INNER;
                        Element replacement = new Element(Tag.valueOf(node.nodeName()), tb.getBaseUri());
                        tb.replaceActiveFormattingElement(node, replacement);
                        tb.replaceOnStack(node, replacement);
                        node = replacement;
                        if (lastNode == furthestBlock) {
                        // todo: move the aforementioned bookmark to be immediately after the new node in the list of active
                        // formatting elements.
                        // not getting how this bookmark both straddles the element above, but is inbetween here...
                        }
                        if (lastNode.parent() != null)
                            lastNode.remove();
                        node.appendChild(lastNode);
                        lastNode = node;
                    }
                    if (StringUtil.in(commonAncestor.nodeName(), Constants.InBodyEndTableFosters)) {
                        if (lastNode.parent() != null)
                            lastNode.remove();
                        tb.insertInFosterParent(lastNode);
                    } else {
                        if (lastNode.parent() != null)
                            lastNode.remove();
                        commonAncestor.appendChild(lastNode);
                    }
                    Element adopter = new Element(formatEl.tag(), tb.getBaseUri());
                    adopter.attributes().addAll(formatEl.attributes());
                    Node[] childNodes = furthestBlock.childNodes().toArray(new Node[furthestBlock.childNodeSize()]);
                    for (Node childNode : childNodes) {
                        // append will reparent. thus the clone to avoid concurrent mod.
                        adopter.appendChild(childNode);
                    }
                    furthestBlock.appendChild(adopter);
                    tb.removeFromActiveFormattingElements(formatEl);
                    // todo: insert the new element into the list of active formatting elements at the position of the aforementioned
                    // bookmark.
                    tb.removeFromStack(formatEl);
                    tb.insertOnStackAfter(furthestBlock, adopter);
                }
            } else if (StringUtil.in(name, Constants.InBodyStartApplets)) {
                if (!tb.inScope("name")) {
                    if (!tb.inScope(name)) {
                        tb.error(this);
                        return false;
                    }
                    tb.generateImpliedEndTags();
                    if (!tb.currentElement().nodeName().equals(name))
                        tb.error(this);
                    tb.popStackToClose(name);
                    tb.clearFormattingElementsToLastMarker();
                }
            } else if (name.equals("br")) {
                tb.error(this);
                tb.process(new Token.StartTag("br"));
                return false;
            } else {
                return anyOtherEndTag(t, tb);
            }
            break;
        case EOF:
            // stop parsing
            break;
    }
    return true;
}
Example 32
Project: baleen-master  File: NewLineToNewParagraph.java View source code
/**
	 * Collect tags which are on the same line (unbroken by BRs)
	 *
	 * @param document the document
	 * @param e the e
	 * @return the list
	 */
private List<Element> collectRuns(Document document, Element e) {
    List<Element> runs = new LinkedList<>();
    Element run = null;
    for (Node c : e.childNodesCopy()) {
        if (c instanceof Element && ("br".equalsIgnoreCase(((Element) c).tagName()))) {
            // If we hit a br then add the old run and start a new one
            if (run != null) {
                runs.add(run);
                run = null;
            }
        } else {
            // If not a br then add this node to the other
            if (run == null) {
                run = document.createElement("p");
            }
            run.appendChild(c);
        }
    }
    // Add the last run
    if (run != null) {
        runs.add(run);
    }
    return runs;
}
Example 33
Project: facelets-lite-master  File: Test.java View source code
String toNormalHtml(Document doc) {
    doc.normalise();
    doc.traverse(new NodeVisitor() {

        @Override
        public void tail(Node node, int depth) {
            if (node instanceof TextNode) {
                TextNode textNode = (TextNode) node;
                textNode.text(textNode.text().trim());
            }
        }

        @Override
        public void head(Node arg0, int arg1) {
        }
    });
    return cleaner.clean(doc).html();
}
Example 34
Project: framework-master  File: MenuBar.java View source code
protected MenuItem readMenuElement(Element menuElement) {
    Resource icon = null;
    if (menuElement.hasAttr("icon")) {
        icon = DesignAttributeHandler.getFormatter().parse(menuElement.attr("icon"), Resource.class);
    }
    String caption = "";
    List<Element> subMenus = new ArrayList<>();
    for (Node node : menuElement.childNodes()) {
        if (node instanceof Element && ((Element) node).tagName().equals("menu")) {
            subMenus.add((Element) node);
        } else {
            caption += node.toString();
        }
    }
    MenuItem menu = new MenuItem(caption.trim(), icon, null);
    Attributes attr = menuElement.attributes();
    if (menuElement.hasAttr("icon")) {
        menu.setIcon(DesignAttributeHandler.readAttribute("icon", attr, Resource.class));
    }
    if (menuElement.hasAttr("disabled")) {
        menu.setEnabled(!DesignAttributeHandler.readAttribute("disabled", attr, boolean.class));
    }
    if (menuElement.hasAttr("visible")) {
        menu.setVisible(DesignAttributeHandler.readAttribute("visible", attr, boolean.class));
    }
    if (menuElement.hasAttr("separator")) {
        menu.setSeparator(DesignAttributeHandler.readAttribute("separator", attr, boolean.class));
    }
    if (menuElement.hasAttr("checkable")) {
        menu.setCheckable(DesignAttributeHandler.readAttribute("checkable", attr, boolean.class));
    }
    if (menuElement.hasAttr("checked")) {
        menu.setChecked(DesignAttributeHandler.readAttribute("checked", attr, boolean.class));
    }
    if (menuElement.hasAttr("description")) {
        menu.setDescription(DesignAttributeHandler.readAttribute("description", attr, String.class));
    }
    if (menuElement.hasAttr("style-name")) {
        menu.setStyleName(DesignAttributeHandler.readAttribute("style-name", attr, String.class));
    }
    if (!subMenus.isEmpty()) {
        menu.itsChildren = new ArrayList<>();
    }
    for (Element subMenu : subMenus) {
        MenuItem newItem = readMenuElement(subMenu);
        newItem.setParent(menu);
        menu.itsChildren.add(newItem);
    }
    return menu;
}
Example 35
Project: jinjava-master  File: TruncateHtmlFilter.java View source code
@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        TextNode text = (TextNode) node;
        String textContent = text.text();
        if (textLen >= maxTextLen) {
            text.text("");
        } else if (textLen + textContent.length() > maxTextLen) {
            int ptr = maxTextLen - textLen;
            if (!killwords) {
                ptr = Functions.movePointerToJustBeforeLastWord(ptr, textContent) - 1;
            }
            text.text(textContent.substring(0, ptr) + ending);
            textLen = maxTextLen;
        } else {
            textLen += textContent.length();
        }
    }
}
Example 36
Project: kune-master  File: ContentUnrenderer.java View source code
// private static final Logger LOG =
// Logger.getLogger(ContentUnrenderer.class.getName());
/**
   * Helper method to recursively parse a HTML element and construct a wave
   * document.
   *
   * @param parent the parent
   * @param output the output
   * @param elements the elements
   * @param annotations the annotations
   */
private static void unrender(final Node parent, final StringBuilder output, final Map<Integer, com.google.wave.api.Element> elements, final Annotations annotations) {
    for (final Node node : parent.childNodes()) {
        if (node instanceof TextNode) {
            output.append(((TextNode) node).text());
        } else if (node instanceof Element) {
            final int position = output.length();
            final Element element = (Element) node;
            final String name = element.tag().getName();
            if ("p".equalsIgnoreCase(name)) {
                elements.put(position, new Line());
            // handle any attributes?
            }
            // Additional HTML element tags here.
            unrender(element, output, elements, annotations);
        }
    }
}
Example 37
Project: ScreenSlicer-master  File: Scrape.java View source code
private static String getHelper(final Browser browser, final boolean throttle, final Node urlNode, final String url, final boolean p_cached, final String runGuid, final boolean toNewWindow, final boolean init, final HtmlNode[] postFetchClicks) {
    if (!CommonUtil.isEmpty(url) || urlNode != null) {
        final Object resultLock = new Object();
        final String initVal;
        final String[] result;
        synchronized (resultLock) {
            initVal = Random.next();
            result = new String[] { initVal };
        }
        final AtomicBoolean started = new AtomicBoolean();
        Thread thread = new Thread(new Runnable() {

            @Override
            public void run() {
                boolean terminate = false;
                started.set(true);
                boolean cached = p_cached;
                String newHandle = null;
                String origHandle = null;
                try {
                    origHandle = browser.getWindowHandle();
                    String content = null;
                    if (!cached) {
                        try {
                            BrowserUtil.get(browser, url, urlNode, false, toNewWindow, init);
                        } catch (Browser.Retry r) {
                            terminate = true;
                            throw r;
                        } catch (Browser.Fatal f) {
                            terminate = true;
                            throw f;
                        } catch (Throwable t) {
                            if (urlNode != null) {
                                BrowserUtil.newWindow(browser, init);
                            }
                            BrowserUtil.get(browser, url, false, init);
                        }
                        if (urlNode != null) {
                            newHandle = browser.getWindowHandle();
                        }
                        BrowserUtil.doClicks(browser, postFetchClicks, null, null);
                        content = browser.getPageSource();
                        if (WebApp.DEBUG && (postFetchClicks == null || postFetchClicks.length == 0)) {
                            try {
                                long filename = System.currentTimeMillis();
                                Files.copy(browser.getScreenshotAs(OutputType.FILE), new File("./" + filename + ".log.scrape.png"));
                                FileUtils.writeStringToFile(new File("./" + filename + ".log.scrape.htm"), content, "utf-8");
                            } catch (IOException e) {
                            }
                        }
                        if (CommonUtil.isEmpty(content)) {
                            cached = true;
                        }
                    }
                    if (cached) {
                        if (ScreenSlicerBatch.isCancelled(runGuid)) {
                            return;
                        }
                        try {
                            BrowserUtil.get(browser, toCacheUrl(url, false), false, init);
                        } catch (Browser.Retry r) {
                            terminate = true;
                            throw r;
                        } catch (Browser.Fatal f) {
                            terminate = true;
                            throw f;
                        } catch (Throwable t) {
                            BrowserUtil.get(browser, toCacheUrl(url, true), false, init);
                        }
                        content = browser.getPageSource();
                    }
                    content = NodeUtil.clean(content, browser.getCurrentUrl()).outerHtml();
                    //            }
                    synchronized (resultLock) {
                        result[0] = content;
                    }
                } catch (Browser.Retry r) {
                    terminate = true;
                    throw r;
                } catch (Browser.Fatal f) {
                    terminate = true;
                    throw f;
                } catch (Throwable t) {
                    Log.exception(t);
                } finally {
                    synchronized (resultLock) {
                        if (initVal.equals(result[0])) {
                            result[0] = null;
                        }
                    }
                    if (!terminate) {
                        BrowserUtil.browserSleepLong(throttle);
                        if (init && newHandle != null && origHandle != null) {
                            try {
                                BrowserUtil.handleNewWindows(browser, origHandle, true);
                            } catch (Browser.Retry r) {
                                throw r;
                            } catch (Browser.Fatal f) {
                                throw f;
                            } catch (Throwable t) {
                                Log.exception(t);
                            }
                        }
                    }
                }
            }
        });
        thread.start();
        try {
            while (!started.get()) {
                try {
                    Thread.sleep(WAIT);
                } catch (Throwable t) {
                }
            }
            thread.join(HANG_TIME);
            synchronized (resultLock) {
                if (initVal.equals(result[0])) {
                    Log.exception(new Exception("Browser is hanging"));
                    try {
                        thread.interrupt();
                    } catch (Throwable t) {
                        Log.exception(t);
                    }
                    throw new Browser.Retry();
                }
                return result[0];
            }
        } catch (Browser.Retry r) {
            throw r;
        } catch (Browser.Fatal f) {
            throw f;
        } catch (Throwable t) {
            Log.exception(t);
        }
    }
    return null;
}
Example 38
Project: slicer-master  File: Scrape.java View source code
private static String getHelper(final Browser browser, final boolean throttle, final Node urlNode, final String url, final boolean p_cached, final String runGuid, final boolean toNewWindow, final boolean init, final HtmlNode[] postFetchClicks) {
    if (!CommonUtil.isEmpty(url) || urlNode != null) {
        final Object resultLock = new Object();
        final String initVal;
        final String[] result;
        synchronized (resultLock) {
            initVal = Random.next();
            result = new String[] { initVal };
        }
        final AtomicBoolean started = new AtomicBoolean();
        Thread thread = new Thread(new Runnable() {

            @Override
            public void run() {
                boolean terminate = false;
                started.set(true);
                boolean cached = p_cached;
                String newHandle = null;
                String origHandle = null;
                try {
                    origHandle = browser.getWindowHandle();
                    String content = null;
                    if (!cached) {
                        try {
                            BrowserUtil.get(browser, url, urlNode, false, toNewWindow, init);
                        } catch (Browser.Retry r) {
                            terminate = true;
                            throw r;
                        } catch (Browser.Fatal f) {
                            terminate = true;
                            throw f;
                        } catch (Throwable t) {
                            if (urlNode != null) {
                                BrowserUtil.newWindow(browser, init);
                            }
                            BrowserUtil.get(browser, url, false, init);
                        }
                        if (urlNode != null) {
                            newHandle = browser.getWindowHandle();
                        }
                        BrowserUtil.doClicks(browser, postFetchClicks, null, null);
                        content = browser.getPageSource();
                        if (WebApp.DEBUG && (postFetchClicks == null || postFetchClicks.length == 0)) {
                            try {
                                long filename = System.currentTimeMillis();
                                Files.copy(browser.getScreenshotAs(OutputType.FILE), new File("./" + filename + ".log.scrape.png"));
                                FileUtils.writeStringToFile(new File("./" + filename + ".log.scrape.htm"), content, "utf-8");
                            } catch (IOException e) {
                            }
                        }
                        if (CommonUtil.isEmpty(content)) {
                            cached = true;
                        }
                    }
                    if (cached) {
                        if (ScreenSlicerBatch.isCancelled(runGuid)) {
                            return;
                        }
                        try {
                            BrowserUtil.get(browser, toCacheUrl(url, false), false, init);
                        } catch (Browser.Retry r) {
                            terminate = true;
                            throw r;
                        } catch (Browser.Fatal f) {
                            terminate = true;
                            throw f;
                        } catch (Throwable t) {
                            BrowserUtil.get(browser, toCacheUrl(url, true), false, init);
                        }
                        content = browser.getPageSource();
                    }
                    content = NodeUtil.clean(content, browser.getCurrentUrl()).outerHtml();
                    //            }
                    synchronized (resultLock) {
                        result[0] = content;
                    }
                } catch (Browser.Retry r) {
                    terminate = true;
                    throw r;
                } catch (Browser.Fatal f) {
                    terminate = true;
                    throw f;
                } catch (Throwable t) {
                    Log.exception(t);
                } finally {
                    synchronized (resultLock) {
                        if (initVal.equals(result[0])) {
                            result[0] = null;
                        }
                    }
                    if (!terminate) {
                        BrowserUtil.browserSleepLong(throttle);
                        if (init && newHandle != null && origHandle != null) {
                            try {
                                BrowserUtil.handleNewWindows(browser, origHandle, true);
                            } catch (Browser.Retry r) {
                                throw r;
                            } catch (Browser.Fatal f) {
                                throw f;
                            } catch (Throwable t) {
                                Log.exception(t);
                            }
                        }
                    }
                }
            }
        });
        thread.start();
        try {
            while (!started.get()) {
                try {
                    Thread.sleep(WAIT);
                } catch (Throwable t) {
                }
            }
            thread.join(HANG_TIME);
            synchronized (resultLock) {
                if (initVal.equals(result[0])) {
                    Log.exception(new Exception("Browser is hanging"));
                    try {
                        thread.interrupt();
                    } catch (Throwable t) {
                        Log.exception(t);
                    }
                    throw new Browser.Retry();
                }
                return result[0];
            }
        } catch (Browser.Retry r) {
            throw r;
        } catch (Browser.Fatal f) {
            throw f;
        } catch (Throwable t) {
            Log.exception(t);
        }
    }
    return null;
}
Example 39
Project: TuCanMobile-master  File: EventsScraper.java View source code
/**
	 * Gibt einzelne Events in einem ListAdapter zurück.
	 * 
	 * @param content
	 *            Content div Element
	 * @return ListAdapter
	 * @author Daniel Thiem
	 */
private ListAdapter getApplicationSingleItems(Element content) {
    final Element coursestatusTable = content.select("table.tbcoursestatus").first();
    if (coursestatusTable != null) {
        Elements moduleTable = coursestatusTable.select("tr");
        ListAdapter singleEventAdapter = null;
        if (moduleTable.size() > 0) {
            // Einzelne Veranstaltungen werden angeboten
            ArrayList<String> itemName = new ArrayList<String>();
            ArrayList<String> itemInstructor = new ArrayList<String>();
            ArrayList<String> itemDate = new ArrayList<String>();
            ArrayList<Boolean> isModule = new ArrayList<Boolean>();
            for (Element next : moduleTable) {
                final Elements cols = next.select("td");
                Element firstCol = cols.first();
                if (firstCol != null && cols.size() == 4) {
                    final Element secondCol = cols.get(1);
                    List<Node> innerChilds = secondCol.childNodes();
                    if (firstCol.hasClass("tbsubhead")) {
                        if (innerChilds.size() == 4) {
                            final Node instructorNode = innerChilds.get(3);
                            if (instructorNode instanceof TextNode) {
                                String moduleInstructor = ((TextNode) instructorNode).text();
                                String moduleName = secondCol.select("span.eventTitle").text();
                                String moduleDeadline = cols.get(2).text();
                                itemName.add(moduleName);
                                itemInstructor.add(moduleInstructor);
                                itemDate.add(moduleDeadline);
                                isModule.add(true);
                            }
                        }
                    } else if (firstCol.hasClass("tbdata")) {
                        // Es handelt sich um ein Event
                        String eventName = null, eventInstructor = null, eventDates = null;
                        if (innerChilds.size() == 1) {
                            // Event nur mit Namen
                            final String evNmHtml = secondCol.html();
                            eventName = TucanMobile.getEventNameByString(evNmHtml);
                            eventInstructor = "";
                            eventDates = "";
                        } else if (innerChilds.size() == 7) {
                            // Event mit Vollinformationen
                            final Node instructorNode = innerChilds.get(4);
                            final Node dateNode = innerChilds.get(6);
                            if (instructorNode instanceof TextNode && dateNode instanceof TextNode) {
                                eventName = secondCol.select("span.eventTitle").text();
                                eventInstructor = ((TextNode) instructorNode).text().trim();
                                eventDates = ((TextNode) dateNode).text().trim();
                            }
                        } else if (innerChilds.size() == 5) {
                            // Event ohne Datum
                            final Node instructorNode = innerChilds.get(4);
                            if (instructorNode instanceof TextNode) {
                                eventName = secondCol.select("span.eventTitle").text();
                                eventInstructor = ((TextNode) instructorNode).text().trim();
                                eventDates = "";
                            }
                        }
                        itemName.add(eventName);
                        itemInstructor.add(eventInstructor);
                        itemDate.add(eventDates);
                        isModule.add(false);
                    }
                }
            }
            // Adapter zum zurückgeben erstellen
            singleEventAdapter = new HighlightedThreeLinesAdapter(context, itemName, itemInstructor, itemDate, isModule);
        }
        return singleEventAdapter;
    }
    return null;
}
Example 40
Project: vaadin-master  File: MenuBar.java View source code
protected MenuItem readMenuElement(Element menuElement) {
    Resource icon = null;
    if (menuElement.hasAttr("icon")) {
        icon = DesignAttributeHandler.getFormatter().parse(menuElement.attr("icon"), Resource.class);
    }
    String caption = "";
    List<Element> subMenus = new ArrayList<>();
    for (Node node : menuElement.childNodes()) {
        if (node instanceof Element && ((Element) node).tagName().equals("menu")) {
            subMenus.add((Element) node);
        } else {
            caption += node.toString();
        }
    }
    MenuItem menu = new MenuItem(caption.trim(), icon, null);
    Attributes attr = menuElement.attributes();
    if (menuElement.hasAttr("icon")) {
        menu.setIcon(DesignAttributeHandler.readAttribute("icon", attr, Resource.class));
    }
    if (menuElement.hasAttr("disabled")) {
        menu.setEnabled(!DesignAttributeHandler.readAttribute("disabled", attr, boolean.class));
    }
    if (menuElement.hasAttr("visible")) {
        menu.setVisible(DesignAttributeHandler.readAttribute("visible", attr, boolean.class));
    }
    if (menuElement.hasAttr("separator")) {
        menu.setSeparator(DesignAttributeHandler.readAttribute("separator", attr, boolean.class));
    }
    if (menuElement.hasAttr("checkable")) {
        menu.setCheckable(DesignAttributeHandler.readAttribute("checkable", attr, boolean.class));
    }
    if (menuElement.hasAttr("checked")) {
        menu.setChecked(DesignAttributeHandler.readAttribute("checked", attr, boolean.class));
    }
    if (menuElement.hasAttr("description")) {
        menu.setDescription(DesignAttributeHandler.readAttribute("description", attr, String.class));
    }
    if (menuElement.hasAttr("style-name")) {
        menu.setStyleName(DesignAttributeHandler.readAttribute("style-name", attr, String.class));
    }
    if (!subMenus.isEmpty()) {
        menu.itsChildren = new ArrayList<>();
    }
    for (Element subMenu : subMenus) {
        MenuItem newItem = readMenuElement(subMenu);
        newItem.setParent(menu);
        menu.itsChildren.add(newItem);
    }
    return menu;
}
Example 41
Project: web-entity-extractor-ACL2014-master  File: KnowledgeTreeBuilder.java View source code
/**
   * Convert jsoup Element (= an HTML tag and its content) into a knowledge tree.
   * Contents inside style tag (CSS) and script tag (JavaScript) are ignored.
   * 
   * @param elt       The jsoup Element corresponding to the root of the tree
   * @param parent    The parent of the created tree's root node.
   */
public void convertElementToKTree(Element elt, KNode parent) {
    String eltText = LingUtils.normalize(elt.text(), opts.earlyNormalizeEntities);
    KNode currentNode = parent.createChild(KNode.Type.TAG, elt.tagName(), eltText.length() > opts.maxFullTextLength ? null : eltText);
    // Add children
    for (Node child : elt.childNodes()) {
        if (child instanceof Element) {
            convertElementToKTree((Element) child, currentNode);
        } else if (child instanceof TextNode) {
            if (!opts.ignoreTextNodes) {
                String text = LingUtils.normalize(((TextNode) child).text(), opts.earlyNormalizeEntities);
                if (!text.isEmpty()) {
                    //currentNode.createChild(KNode.Type.TEXT, text, text);
                    currentNode.createChild(KNode.Type.TAG, "text", text.length() > opts.maxFullTextLength ? null : text);
                }
            }
        }
    }
    // Add attributes
    for (Attribute attr : elt.attributes()) {
        currentNode.createAttribute(attr.getKey(), attr.getValue());
    }
}
Example 42
Project: zongtui-webcrawler-master  File: ElementOperator.java View source code
@Override
public String operate(Element element) {
    int index = 0;
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            if (group == 0) {
                accum.append(textNode.text());
            } else if (++index == group) {
                return textNode.text();
            }
        }
    }
    return accum.toString();
}
Example 43
Project: android-essentials-toolbox-master  File: GenerateUndocumentedPermissions.java View source code
/**
	 * Searches for the preceeding sibling level comment before the given xml permission element
	 * @param permissionElement
	 * @return
	 */
private static org.jsoup.nodes.Comment getPreceedingComment(org.jsoup.nodes.Element permissionElement) {
    org.jsoup.nodes.Node node = permissionElement;
    while (true) {
        node = node.previousSibling();
        if (node instanceof Comment) {
            return (org.jsoup.nodes.Comment) node;
        } else if (node instanceof org.jsoup.nodes.TextNode) {
            // important, there is a trailing whitespace character after the comment that is considered as a node
            continue;
        } else if (node instanceof org.jsoup.nodes.Element) {
            return null;
        }
    }
}
Example 44
Project: bavrd-core-master  File: Face.java View source code
public String formatText(String htmlBody) {
    String cleanHtml = Jsoup.clean(htmlBody, FORMATTED_TEXT_WHITELIST);
    Document bodyFragment = Jsoup.parseBodyFragment(cleanHtml);
    StringBuffer output = new StringBuffer();
    for (Node n : bodyFragment.body().childNodes()) {
        output.append(sanitize(n));
    }
    return output.toString();
}
Example 45
Project: jmeter-master  File: JsoupBasedHtmlParser.java View source code
@Override
public void head(Node node, int depth) {
    if (!(node instanceof Element)) {
        return;
    }
    Element tag = (Element) node;
    String tagName = tag.tagName().toLowerCase();
    if (tagName.equals(TAG_BODY)) {
        extractAttribute(tag, ATT_BACKGROUND);
    } else if (tagName.equals(TAG_SCRIPT)) {
        extractAttribute(tag, ATT_SRC);
    } else if (tagName.equals(TAG_BASE)) {
        String baseref = tag.attr(ATT_HREF);
        try {
            if (// Bugzilla 30713
            !StringUtils.isEmpty(baseref)) {
                baseUrl.url = ConversionUtils.makeRelativeURL(baseUrl.url, baseref);
            }
        } catch (MalformedURLException e1) {
            throw new RuntimeException(e1);
        }
    } else if (tagName.equals(TAG_IMAGE)) {
        extractAttribute(tag, ATT_SRC);
    } else if (tagName.equals(TAG_APPLET)) {
        extractAttribute(tag, ATT_CODE);
    } else if (tagName.equals(TAG_OBJECT)) {
        extractAttribute(tag, ATT_CODEBASE);
        extractAttribute(tag, ATT_DATA);
    } else if (tagName.equals(TAG_INPUT)) {
        // we check the input tag type for image
        if (ATT_IS_IMAGE.equalsIgnoreCase(tag.attr(ATT_TYPE))) {
            // then we need to download the binary
            extractAttribute(tag, ATT_SRC);
        }
    // Bug 51750
    } else if (tagName.equals(TAG_FRAME) || tagName.equals(TAG_IFRAME)) {
        extractAttribute(tag, ATT_SRC);
    } else if (tagName.equals(TAG_EMBED)) {
        extractAttribute(tag, ATT_SRC);
    } else if (tagName.equals(TAG_BGSOUND)) {
        extractAttribute(tag, ATT_SRC);
    } else if (tagName.equals(TAG_LINK)) {
        // Putting the string first means it works even if the attribute is null
        if (STYLESHEET.equalsIgnoreCase(tag.attr(ATT_REL))) {
            extractAttribute(tag, ATT_HREF);
        }
    } else {
        extractAttribute(tag, ATT_BACKGROUND);
    }
    // Now look for URLs in the STYLE attribute
    String styleTagStr = tag.attr(ATT_STYLE);
    if (styleTagStr != null) {
        HtmlParsingUtils.extractStyleURLs(baseUrl.url, urls, styleTagStr);
    }
}
Example 46
Project: link-bubble-master  File: OutputFormatter.java View source code
boolean unlikely(Node e) {
    if (e.attr("class") != null && e.attr("class").toLowerCase().contains("caption"))
        return true;
    String style = e.attr("style");
    String clazz = e.attr("class");
    if (unlikelyPattern.matcher(style).find() || unlikelyPattern.matcher(clazz).find())
        return true;
    return false;
}
Example 47
Project: open-data-service-master  File: PegelPortalMvSourceAdapter.java View source code
private String extractText(Element element) {
    StringBuilder builder = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            builder.append(node.toString());
        } else if (node instanceof Element) {
            builder.append(extractText((Element) node));
        }
    }
    return builder.toString();
}
Example 48
Project: opensearchserver-master  File: JSoupUtils.java View source code
private static final void getNodes(Node parent, int pos, String[] path, List<Node> nodes) {
    if (pos == path.length) {
        nodes.add(parent);
        return;
    }
    List<Node> childrens = parent.childNodes();
    int l = childrens.size();
    int nextPos = pos + 1;
    for (int i = 0; i < l; i++) {
        Node node = childrens.get(i);
        if (node instanceof Element) {
            if (node.nodeName().equals(path[pos]))
                getNodes(node, nextPos, path, nodes);
        }
    }
}
Example 49
Project: owsi-core-parent-master  File: AbstractNotificationContentDescriptorFactory.java View source code
@Override
public void head(Node node, int depth) {
    if (cssRegistry != null) {
        String style = cleanAttribute(cssRegistry.getStyle(node));
        if (StringUtils.hasText(style)) {
            String existingStyleAttribute = cleanAttribute(node.attr(STYLE_ATTRIBUTE));
            StringBuilder styleAttributeSb = new StringBuilder();
            if (StringUtils.hasText(existingStyleAttribute)) {
                styleAttributeSb.append(existingStyleAttribute);
                styleAttributeSb.append(STYLE_ATTRIBUTE_SEPARATOR);
            }
            styleAttributeSb.append(style);
            node.attr(STYLE_ATTRIBUTE, styleAttributeSb.toString());
        }
    }
    if (LINK_TAG.equals(node.nodeName())) {
        node.attr(LINK_TARGET_ATTRIBUTE, LINK_TARGET_ATTRIBUTE_BLANK_VALUE);
    }
}
Example 50
Project: structr-master  File: Importer.java View source code
/**
	 * Parse the code previously read by {@link Importer#readPage()} and treat it as page fragment.
	 *
	 * @param fragment
	 * @return
	 * @throws FrameworkException
	 */
public boolean parse(final boolean fragment) throws FrameworkException {
    init();
    if (StringUtils.isNotBlank(code)) {
        if (!isDeployment) {
            logger.info("##### Start parsing code for page {} #####", new Object[] { name });
        } else {
            // a trailing slash to all void/self-closing tags so the XML parser can parse it correctly
            code = code.replaceAll("<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)([^>]*)>", "<$1$2/>");
        }
        if (fragment) {
            if (isDeployment) {
                final List<Node> nodeList = Parser.parseXmlFragment(code, "");
                parsedDocument = Document.createShell("");
                final Element body = parsedDocument.body();
                final Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);
                for (int i = nodes.length - 1; i > 0; i--) {
                    nodes[i].remove();
                }
                for (Node node : nodes) {
                    body.appendChild(node);
                }
            } else {
                parsedDocument = Jsoup.parseBodyFragment(code);
            }
        } else {
            if (isDeployment) {
                parsedDocument = Jsoup.parse(code, "", Parser.xmlParser());
            } else {
                parsedDocument = Jsoup.parse(code);
            }
        }
    } else {
        if (!isDeployment) {
            logger.info("##### Start fetching {} for page {} #####", new Object[] { address, name });
        }
        code = HttpHelper.get(address);
        parsedDocument = Jsoup.parse(code);
    }
    return true;
}
Example 51
Project: structured-content-tools-master  File: StripHtmlPreprocessor.java View source code
@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        TextNode textNode = (TextNode) node;
        // non breaking space
        String text = textNode.text().replace(' ', ' ').trim();
        if (!text.isEmpty()) {
            buffer.append(text);
            if (!text.endsWith(" ")) {
                // the last text gets appended the extra space too but we remove it later
                buffer.append(" ");
            }
        }
    }
}
Example 52
Project: Vaadin-SignatureField-master  File: DeclarativeTestBaseBase.java View source code
/**
     * Produce predictable html (attributes in alphabetical order), always
     * include close tags
     */
private String elementToHtml(Element producedElem, StringBuilder sb) {
    ArrayList<String> names = new ArrayList<String>();
    for (Attribute a : producedElem.attributes().asList()) {
        names.add(a.getKey());
    }
    Collections.sort(names);
    sb.append("<" + producedElem.tagName() + "");
    for (String attrName : names) {
        sb.append(" ").append(attrName).append("=").append("\'").append(producedElem.attr(attrName)).append("\'");
    }
    sb.append(">");
    for (Node child : producedElem.childNodes()) {
        if (child instanceof Element) {
            elementToHtml((Element) child, sb);
        } else if (child instanceof TextNode) {
            String text = ((TextNode) child).text();
            sb.append(text.trim());
        }
    }
    sb.append("</").append(producedElem.tagName()).append(">");
    return sb.toString();
}
Example 53
Project: symphony-master  File: Markdowns.java View source code
@Override
public void head(final org.jsoup.nodes.Node node, int depth) {
    if (node instanceof org.jsoup.nodes.TextNode) {
        final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
        final org.jsoup.nodes.Node parent = textNode.parent();
        if (parent instanceof org.jsoup.nodes.Element) {
            final Element parentElem = (Element) parent;
            if (!parentElem.tagName().equals("code")) {
                String text = textNode.getWholeText();
                if (null != userQueryService) {
                    try {
                        final Set<String> userNames = userQueryService.getUserNames(text);
                        for (final String userName : userNames) {
                            text = text.replace('@' + userName + " ", "@<a href='" + Latkes.getServePath() + "/member/" + userName + "'>" + userName + "</a> ");
                        }
                        text = text.replace("@participants ", "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
                    } finally {
                        JdbcRepository.dispose();
                    }
                }
                if (text.contains("@<a href=")) {
                    final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem, "");
                    final int index = textNode.siblingIndex();
                    parentElem.insertChildren(index, nodes);
                    toRemove.add(node);
                } else {
                    textNode.text(Pangu.spacingText(text));
                }
            }
        }
    }
}
Example 54
Project: aMatch-master  File: QuestionSearch.java View source code
public Question[] loadRecentQuestions() throws IOException {
    List<Question> questionsList = new ArrayList<Question>();
    String url = urlParser.ParseUrl();
    // fetch the specified URL and parse to a HTML DOM
    Document doc = Jsoup.connect(url).userAgent(userAgent).timeout(timeout).get();
    // POPULATE QUESTION TEXT
    String selector = "span[class=entry] > a";
    // get each element that matches the CSS selector
    Elements elements = doc.select(selector);
    for (Element element : elements) {
        String plainText = getPlainText(element);
        int lastNewLineIndex = plainText.lastIndexOf('\n');
        String questionText;
        if (lastNewLineIndex == -1) {
            questionText = "";
        } else {
            questionText = plainText.substring(0, plainText.lastIndexOf('\n'));
        }
        Question nextQuestion = new Question(questionText, urlParser.getParsedPageNumber());
        String[] lineCounter = questionText.split("\n");
        int lineCount = lineCounter.length + 1;
        nextQuestion.questionTextLineCount = lineCount;
        questionsList.add(nextQuestion);
    //            System.out.println(plainText);
    //            System.out.println(questionText);
    //            System.out.println(lineCount);
    //            System.out.println(questionsList.size());
    }
    // POPULATE ID
    selector = "span[class=entry] a[href~=/question\\?id]";
    // get each element that matches the CSS selector
    elements = doc.select(selector);
    int index = 0;
    for (Element element : elements) {
        String plainText = element.attr("href");
        //            int idIndex = plainText.indexOf("=");
        //            String id = plainText.substring(idIndex+1);
        Question nextQuestion = questionsList.get(index);
        nextQuestion.id = plainText;
        ++index;
    //            System.out.println(plainText);
    }
    // POPULATE COMPANY
    selector = "span[class=company] img";
    // get each element that matches the CSS selector
    elements = doc.select(selector);
    index = 0;
    for (Element element : elements) {
        String companyTitle = element.attr("title");
        Question nextQuestion = questionsList.get(index);
        nextQuestion.company = companyTitle;
        ++index;
    //            System.out.println(companyTitle);
    }
    // POPULATE COMPANY URL
    selector = "span[class=company] img";
    // get each element that matches the CSS selector
    elements = doc.select(selector);
    index = 0;
    for (Element element : elements) {
        String companyImgURL = element.attr("src");
        Question nextQuestion = questionsList.get(index);
        nextQuestion.companyImgURL = companyImgURL;
        ++index;
    //            System.out.println(companyImgURL);
    //            System.out.println(index + "size = " + questionsList.size());
    }
    // POPULATE DATES AND LOCATIONS
    selector = "abbr[class=timeago]";
    // get each element that matches the CSS selector
    elements = doc.select(selector);
    index = 0;
    for (Element element : elements) {
        Question nextQuestion = questionsList.get(index);
        nextQuestion.dateText = element.text();
        nextQuestion.location = element.nextSibling().toString();
        ++index;
    }
    // POPULATE TAGS
    selector = "span[class=tags]";
    // get each element that matches the CSS selector
    elements = doc.select(selector);
    index = 0;
    for (Element element : elements) {
        List<String> tagsList = new ArrayList<String>();
        for (Node child : element.childNodes()) {
            String tagsRaw = child.toString();
            int parseStart = tagsRaw.indexOf(">");
            int parseEnd = tagsRaw.lastIndexOf("<");
            if (parseEnd != -1) {
                String tags = tagsRaw.substring(parseStart + 1, parseEnd);
                //                    System.out.println(tags);
                tagsList.add(tags);
            }
        }
        String[] tags = tagsList.toArray(new String[tagsList.size()]);
        Question nextQuestion = questionsList.get(index);
        nextQuestion.tags = tags;
        ++index;
    }
    return questionsList.toArray(new Question[questionsList.size()]);
}
Example 55
Project: Android_RssReader-master  File: Readability.java View source code
//    private static String GetArticleTitle(Element htmlNode)
//    {        
//        if (htmlNode.getElementsByTag("title") == null) 
//        	return null;
//        
//        Element titleNode = htmlNode.getElementsByTag("title").get(0);
//
//        String currTitle, origTitle;
//        currTitle = origTitle = GetInnerText(titleNode);
//
//        if (Regex.IsMatch(currTitle, @" [\|\-] "))
//        {
//            currTitle = Regex.Replace(origTitle,  @"(.*)[\|\-] .*", "$1");
//
//            if (currTitle.Split(' ').Length < 3)
//            {
//                currTitle = origTitle.Replace(@"[^\|\-]*[\|\-](.*)", "$1");
//            }
//        }
//        else if (currTitle.IndexOf(": ") != -1)
//        {
//            currTitle = Regex.Replace(origTitle, @".*:(.*)", "$1");
//
//            if(currTitle.Split(' ').Length < 3)
//            {
//                currTitle = Regex.Replace(origTitle, @"[^:]*[:](.*)", "$1");
//            }
//        }
//        else if (currTitle.Length > 150 || currTitle.Length < 15)
//        {
//            var hOnes = htmlNode.GetElementsByTagName("h1");
//            if (hOnes.Count == 1)
//            {
//                currTitle = GetInnerText(hOnes[0]);
//            }
//        }
//
//        if (currTitle.Split(' ').Length <= 4)
//        {
//            currTitle = origTitle;
//        }
//    
//        return currTitle.Trim();
//    }
private static String GetArticleContent(Document doc) {
    Element body = doc.body();
    List<Element> allElements = body.getAllElements();
    List<Element> nodesToScore = new ArrayList<Element>();
    for (int nodeIndex = 0, len = allElements.size(); nodeIndex < len; nodeIndex++) {
        Element node = allElements.get(nodeIndex);
        String unlikelyMatchString = node.hasAttr("class") ? node.attr("class") : "" + node.attr("id");
        if (s_unlikelyCandidates.matcher(unlikelyMatchString).find() && !s_okMaybeItsACandidate.matcher(unlikelyMatchString).find() && !node.nodeName().equals("body") && !node.nodeName().equals("html") && !node.nodeName().equals("head")) {
            node.remove();
            continue;
        }
        if (node.nodeName().equals("p") || node.nodeName().equals("td") || node.nodeName().equals("pre")) {
            nodesToScore.add(node);
        }
        if (node.nodeName().equals("div")) {
            if (!s_divToPElements.matcher(node.html()).find()) {
                if (node.ownerDocument() != null) {
                    Element newNode = node.ownerDocument().createElement("p");
                    newNode.html(node.html());
                    node.replaceWith(newNode);
                    nodesToScore.add(newNode);
                }
            } else {
                for (Node childNode : node.childNodes()) {
                    if (childNode instanceof TextNode) {
                        if (node.ownerDocument() != null) {
                            Element p = node.ownerDocument().createElement("p");
                            p.html(((TextNode) childNode).text());
                            childNode.replaceWith(p);
                        }
                    }
                }
            }
        }
    }
    Map<Element, Integer> scores = new HashMap<Element, Integer>();
    List<Element> candidates = new ArrayList<Element>();
    for (int pt = 0, len = nodesToScore.size(); pt < len; pt++) {
        Element parentNode = nodesToScore.get(pt).parent();
        Element grandParentNode = parentNode != null ? parentNode.parent() : null;
        String innerText = GetInnerText(nodesToScore.get(pt));
        if (parentNode == null)
            continue;
        if (parentNode.nodeName().equals("body"))
            continue;
        if (parentNode.nodeName().equals("html"))
            continue;
        if (parentNode.nodeName().equals("footer"))
            continue;
        if (parentNode != null && parentNode.hasAttr("class") && parentNode.attr("class").equals("copyright"))
            continue;
        if (innerText.length() < 25)
            continue;
        if (!scores.containsKey(parentNode)) {
            scores.put(parentNode, CalculateNodeScore(parentNode));
            candidates.add(parentNode);
        }
        if (grandParentNode != null && !scores.containsKey(grandParentNode)) {
            scores.put(grandParentNode, CalculateNodeScore(grandParentNode));
            candidates.add(grandParentNode);
        }
        int contentScore = 0;
        contentScore++;
        //for embed flash case
        if (innerText.contains("embed") && (innerText.contains("youku") || innerText.contains("tudou") || innerText.contains("ku6") || innerText.contains("sohu") || innerText.contains("weiphone") || innerText.contains("56") || innerText.contains("youtube") || innerText.contains("qq")))
            contentScore += 50;
        contentScore += innerText.split("[,]|[,]").length;
        contentScore += Math.min(innerText.length() / 100, 3);
        int v = scores.get(parentNode);
        v += contentScore;
        scores.put(parentNode, v);
        if (grandParentNode != null) {
            v = scores.get(grandParentNode);
            v += contentScore / 2;
            scores.put(grandParentNode, v);
        }
    }
    Element topCandidate = null;
    for (Element cand : candidates) {
        int v = scores.get(cand);
        v = (int) (v * (1 - GetLinkDensity(cand)));
        scores.put(cand, v);
        if (topCandidate == null || scores.get(cand) > scores.get(topCandidate)) {
            topCandidate = cand;
        }
        if (topCandidate == null || topCandidate.nodeName().equals("body")) {
            topCandidate = doc.createElement("div");
            topCandidate.html(body.html());
            body.html("");
            body.appendChild(topCandidate);
            scores.put(topCandidate, CalculateNodeScore(topCandidate));
        }
    }
    return topCandidate == null ? null : topCandidate.html();
}
Example 56
Project: brightspot-cms-master  File: RichTextViewBuilder.java View source code
// Traverses the siblings all the way down the tree, collapsing balanced
// blocks of HTML that do NOT contain any rich text elements into a single
// HTML string.
private List<RichTextViewNode<V>> toViewNodes(List<Node> siblings) {
    List<RichTextViewNode<V>> viewNodes = new ArrayList<>();
    for (Node sibling : siblings) {
        if (sibling instanceof Element) {
            Element element = (Element) sibling;
            RichTextElement rte = RichTextElement.fromElement(element);
            ObjectType tagType = rte != null ? rte.getState().getType() : null;
            if (rte != null && elementToView != null) {
                viewNodes.add(new ElementRichTextViewNode<>(rte, elementToView));
            } else if (tagType == null || keepUnboundElements) {
                List<RichTextViewNode<V>> childViewNodes = toViewNodes(element.childNodes());
                String html = element.outerHtml();
                if (element.tag().isSelfClosing()) {
                    viewNodes.add(new StringRichTextViewNode<>(html, htmlToView));
                } else {
                    int firstGtAt = html.indexOf('>');
                    int lastLtAt = html.lastIndexOf('<');
                    // This deliberately does not validate the index values
                    // above, since non-self-closing element should always
                    // have those characters present in the HTML.
                    viewNodes.add(new StringRichTextViewNode<>(html.substring(0, firstGtAt + 1), htmlToView));
                    viewNodes.addAll(childViewNodes);
                    viewNodes.add(new StringRichTextViewNode<>(html.substring(lastLtAt), htmlToView));
                }
            }
        } else if (sibling instanceof TextNode) {
            viewNodes.add(new StringRichTextViewNode<>(((TextNode) sibling).text(), htmlToView));
        } else if (sibling instanceof DataNode) {
            viewNodes.add(new StringRichTextViewNode<>(((DataNode) sibling).getWholeData(), htmlToView));
        }
    }
    // Collapse the nodes as much as possible.
    List<RichTextViewNode<V>> collapsed = new ArrayList<>();
    List<StringRichTextViewNode<V>> adjacent = new ArrayList<>();
    for (RichTextViewNode<V> childBuilderNode : viewNodes) {
        if (childBuilderNode instanceof StringRichTextViewNode) {
            adjacent.add((StringRichTextViewNode<V>) childBuilderNode);
        } else {
            collapsed.add(new StringRichTextViewNode<>(adjacent.stream().map(StringRichTextViewNode::getHtml).collect(Collectors.joining()), htmlToView));
            adjacent.clear();
            collapsed.add(childBuilderNode);
        }
    }
    if (!adjacent.isEmpty()) {
        collapsed.add(new StringRichTextViewNode<>(adjacent.stream().map(StringRichTextViewNode::getHtml).collect(Collectors.joining()), htmlToView));
    }
    return collapsed;
}
Example 57
Project: dogeared-extruder-master  File: Readability.java View source code
private Element changeElementTag(Element e, String newTag) {
    Element newElement = document.createElement(newTag);
    /* JSoup gives us the live child list, so we need to make a copy. */
    List<Node> copyOfChildNodeList = new ArrayList<Node>();
    copyOfChildNodeList.addAll(e.childNodes());
    for (Node n : copyOfChildNodeList) {
        n.remove();
        newElement.appendChild(n);
    }
    e.replaceWith(newElement);
    return newElement;
}
Example 58
Project: ez-vcard-master  File: HCardElementTest.java View source code
@Test
public void append_with_newlines() {
    HCardElement element = build("<div />");
    element.append("Append\rthis\n\ntext\r\nplease.");
    Iterator<Node> it = element.getElement().childNodes().iterator();
    assertTextNodeValue(it.next(), "Append");
    assertTagName(it.next(), "br");
    assertTextNodeValue(it.next(), "this");
    assertTagName(it.next(), "br");
    assertTagName(it.next(), "br");
    assertTextNodeValue(it.next(), "text");
    assertTagName(it.next(), "br");
    assertTextNodeValue(it.next(), "please.");
    assertFalse(it.hasNext());
}
Example 59
Project: Java-readability-master  File: Readability.java View source code
private Element changeElementTag(Element e, String newTag) {
    Element newElement = document.createElement(newTag);
    /* JSoup gives us the live child list, so we need to make a copy. */
    List<Node> copyOfChildNodeList = new ArrayList<Node>();
    copyOfChildNodeList.addAll(e.childNodes());
    for (Node n : copyOfChildNodeList) {
        n.remove();
        newElement.appendChild(n);
    }
    e.replaceWith(newElement);
    return newElement;
}
Example 60
Project: jooby-master  File: Doc.java View source code
@Override
public void head(final Node node, final int depth) {
    if (!isInToc) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            // non-break spaces
            String txt = textNode.text().replaceAll(" ", " ");
            builder.append(txt);
        } else if (node instanceof Element) {
            Element element = (Element) node;
            switch(element.tagName()) {
                case "span":
                case "blockquote":
                    // ignored
                    break;
                case "ol":
                case "ul":
                    listDepth += 1;
                case "br":
                case "p":
                    builder.append("\n");
                    break;
                case "div":
                    builder.append("\n");
                    break;
                case "h1":
                    builder.append("\n# ");
                    break;
                case "h2":
                    builder.append("\n## ");
                    break;
                case "h3":
                    builder.append("\n### ");
                    break;
                case "h4":
                    builder.append("\n#### ");
                case "b":
                case "strong":
                    builder.append("**");
                    break;
                case "cite":
                case "i":
                case "u":
                    builder.append("*");
                    break;
                case "a":
                    builder.append('[');
                    break;
                case "li":
                    for (int i = 0; i < listDepth - 1; i++) {
                        builder.append(" ");
                    }
                    builder.append(element.parent().tagName().equals("ol") ? "1. " : "* ");
                    break;
                case "code":
                    builder.append("`");
                    break;
                case "strike":
                    builder.append("<").append(element.tagName()).append(">");
                    break;
                case "img":
                    String src = element.attr("src");
                    String alt = element.attr("alt");
                    alt = alt == null ? "" : alt;
                    if (src != null) {
                        builder.append("![").append(alt).append("](").append(src).append(")\n");
                    }
                    break;
                case "pre":
                    builder.append("```\n");
                    break;
                case "hr":
                    builder.append("\n***\n");
                    break;
                case "font":
                    String face = element.attr("face");
                    if (face != null && face.contains("monospace")) {
                        builder.append("`");
                    }
                    break;
                default:
                    log.debug("Unhandled element {}", element.tagName());
            }
        }
    }
}
Example 61
Project: LastCalc-master  File: MainPageServlet.java View source code
@Override
protected void doGet(final javax.servlet.http.HttpServletRequest req, final javax.servlet.http.HttpServletResponse resp) throws javax.servlet.ServletException, java.io.IOException {
    final boolean skipUACheck = req.getParameterMap().containsKey("skipuacheck");
    if (!skipUACheck && req.getHeader("User-Agent").contains("MSIE")) {
        resp.sendRedirect("/noie.html");
        return;
    }
    final URL requestURL = new URL(req.getRequestURL().toString());
    final String path = requestURL.getPath();
    final Objectify obj = DAO.begin();
    if (path.equals("/favicon.ico")) {
        resp.sendError(404);
        return;
    }
    if (path.equals("/")) {
        // Create a new worksheet and redirect to it
        final Worksheet worksheet = new Worksheet();
        obj.save().entity(worksheet).now();
        resp.sendRedirect("/" + worksheet.id + (skipUACheck ? "?skipuacheck=1" : ""));
    } else {
        final String worksheetId = path.substring(1);
        if (worksheetId.length() == 8) {
            // This is readonly, duplicate it and redirect to
            // a new id
            final Worksheet worksheet = new Worksheet();
            final Worksheet template = obj.load().type(Worksheet.class).filter("readOnlyId", worksheetId).first().get();
            if (template == null) {
                resp.sendError(404);
                return;
            }
            worksheet.parentId = worksheet.id;
            worksheet.qaPairs = template.qaPairs;
            obj.save().entity(worksheet);
            resp.sendRedirect("/" + worksheet.id);
        } else {
            final Worksheet worksheet;
            try {
                worksheet = obj.load().type(Worksheet.class).id(worksheetId).get();
            } catch (final NotFoundException e) {
                resp.sendError(404, "Worksheet not found");
                return;
            }
            final Document doc = createDocument(requestURL, worksheet);
            // doc.body().appendElement("iframe").attr("id",
            // "helpframe").attr("src", "/help")
            // .attr("frameBorder", "0");
            final Element helpDiv = doc.body().appendElement("div").attr("id", "helpframe").attr("style", "display: none;");
            for (final Node n : Help.getHelpDoc().body().childNodes()) {
                helpDiv.appendChild(n.clone());
            }
            int lineNo = 1;
            final SequentialParser sp = SequentialParser.create();
            Element worksheetElement = doc.body().select("#worksheet").first();
            for (final Line qa : worksheet.qaPairs) {
                sp.processNextAnswer(qa.answer);
                final Element lineEl = worksheetElement.appendElement("div").addClass("line").attr("id", "line" + lineNo);
                if (lineNo == 1) {
                    lineEl.addClass("firstLine");
                }
                final Element lineNumber = lineEl.appendElement("div").attr("class", "lineNumberMarker");
                lineNumber.text(lineNo + ".");
                final Element question = lineEl.appendElement("div").attr("class", "question").attr("contentEditable", "true");
                question.text(qa.question);
                final TokenList strippedAnswer = sp.stripUDF(qa.answer);
                final AnswerType aType = WorksheetServlet.getAnswerType(strippedAnswer);
                if (aType.equals(AnswerType.NORMAL)) {
                    lineEl.appendElement("div").attr("class", "equals").text("=");
                    lineEl.appendElement("div").attr("class", "answer").html(Renderers.toHtml("/", strippedAnswer).toString());
                } else {
                    lineEl.appendElement("div").attr("class", "equals").html("<span style=\"font-size:10pt;\">✓</span>");
                    lineEl.appendElement("div").attr("class", "answer");
                }
                lineNo++;
            }
            doc.body().attr("data-variables", Misc.gson.toJson(sp.getUserDefinedKeywordMap()));
            final Element lineEl = worksheetElement.appendElement("div").addClass("line").attr("id", "line" + lineNo);
            if (lineNo == 1) {
                lineEl.addClass("firstLine");
            }
            final Element lineNumber = lineEl.appendElement("div").attr("class", "lineNumberMarker");
            lineNumber.text(lineNo + ".");
            final Element question = lineEl.appendElement("div").attr("class", "question").attr("contentEditable", "true");
            final Element equals = lineEl.appendElement("div").attr("class", "equals").text("=").attr("style", "display:none;");
            lineEl.appendElement("div").attr("class", "answer").attr("style", "display:none;");
            resp.setContentType("text/html; charset=UTF-8");
            resp.getWriter().append(doc.toString());
        }
    }
}
Example 62
Project: Lightning-Browser-master  File: OutputFormatter.java View source code
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
Example 63
Project: NiceText-master  File: NTHelper.java View source code
@Override
public void head(Node node, int depth) {
    if (node instanceof Element) {
        Element innerElement = (Element) node;
        Element parentElement = innerElement.parent();
        if (parentElement != null) {
            parentTextSize = parentElement.ownText().length();
        }
        //if ((innerElement.isBlock() || POSSIBLE_TEXT_NODES.matcher(innerElement.tagName()).matches())&& innerElement.text().length()>50) {
        if (innerElement.ownText().length() >= WORDS_T && parentTextSize == 0) {
            flatDOM.add(innerElement);
        }
    }
}
Example 64
Project: OpenLegislation-master  File: BillTextTest.java View source code
void processNode(Element ele, StringBuilder stringBuilder) {
    for (Node t : ele.childNodes()) {
        if (t instanceof Element) {
            Element e = (Element) t;
            if (e.tag().getName().equals("u")) {
                stringBuilder.append(e.text().toUpperCase());
                stringBuilder.append("\n");
            } else {
                processNode(e, stringBuilder);
            }
        } else if (t instanceof TextNode) {
            stringBuilder.append(((TextNode) t).text());
            stringBuilder.append("\n");
        }
    }
}
Example 65
Project: shopb2b-master  File: Article.java View source code
@Transient
public String[] getPageContents() {
    if (StringUtils.isEmpty(this.content))
        return new String[] { "" };
    if (this.content.contains(contentBreake))
        return this.content.split(contentBreake);
    ArrayList<String> localArrayList = new ArrayList<String>();
    org.jsoup.nodes.Document localDocument = Jsoup.parse(this.content);
    List<Node> localList = localDocument.body().childNodes();
    if (localList != null) {
        int i = 0;
        StringBuffer localStringBuffer = new StringBuffer();
        Iterator<Node> localIterator = localList.iterator();
        while (localIterator.hasNext()) {
            Node localObject1 = (Node) localIterator.next();
            Object localObject2;
            if ((localObject1 instanceof org.jsoup.nodes.Element)) {
                localObject2 = (org.jsoup.nodes.Element) localObject1;
                localStringBuffer.append(((org.jsoup.nodes.Element) localObject2).outerHtml());
                i += ((org.jsoup.nodes.Element) localObject2).text().length();
                if (i < MAX_PAGE_CONTENT_COUNT)
                    continue;
                localArrayList.add(localStringBuffer.toString());
                i = 0;
                localStringBuffer.setLength(0);
            } else {
                if (!(localObject1 instanceof TextNode))
                    continue;
                localObject2 = (TextNode) localObject1;
                String str1 = ((TextNode) localObject2).text();
                String[] arrayOfString1 = pattern.split(str1);
                Matcher localMatcher = pattern.matcher(str1);
                for (String str2 : arrayOfString1) {
                    if (localMatcher.find())
                        str2 = str2 + localMatcher.group();
                    localStringBuffer.append(str2);
                    i += str2.length();
                    if (i < MAX_PAGE_CONTENT_COUNT)
                        continue;
                    localArrayList.add(localStringBuffer.toString());
                    i = 0;
                    localStringBuffer.setLength(0);
                }
            }
        }
        String localObject1 = localStringBuffer.toString();
        if (StringUtils.isNotEmpty((String) localObject1))
            localArrayList.add(localObject1);
    }
    return (String[]) localArrayList.toArray(new String[localArrayList.size()]);
}
Example 66
Project: act-master  File: PatentDocument.java View source code
@Override
public void tail(org.jsoup.nodes.Node node, int i) {
    String nodeName = node.nodeName();
    if (nodeName.equals("a")) {
        // Same as Jsoup's HtmlToPlainText.
        segmentBuilder.append(String.format(" <%s>", node.absUrl("href")));
    } else if (SEGMENTING_NODES.contains(nodeName) && segmentBuilder.length() > 0) {
        String segmentText = segmentBuilder.toString();
        // Ignore blank lines, as we'll be tagging each line separately.
        if (!SPACE_PATTERN.matcher(segmentText).matches()) {
            this.textSegments.add(segmentText);
        }
        // TODO: is it better to drop the old one than clear the existing?
        segmentBuilder.setLength(0);
    }
}
Example 67
Project: awesome-blogs-android-master  File: DocumentConverter.java View source code
// Utility method to quickly walk the DOM tree and estimate the size of the
// buffer necessary to hold the result.
private static int calculateLength(Element el, int depth) {
    int result = 0;
    for (final Node n : el.childNodes()) {
        if (n instanceof Element) {
            result += (4 * depth) + calculateLength((Element) n, depth + 1);
        } else if (n instanceof TextNode) {
            result += ((TextNode) n).text().length();
        }
    }
    return result;
}
Example 68
Project: elasticsearch-river-remote-master  File: GetSitemapHtmlClient.java View source code
@Override
public void head(Node node, int depth) {
    if (node instanceof TextNode) {
        TextNode textNode = (TextNode) node;
        // non breaking space
        String text = textNode.text().replace(' ', ' ').trim();
        if (!text.isEmpty()) {
            buffer.append(text);
            if (!text.endsWith(" ")) {
                buffer.append(" ");
            }
        }
    }
}
Example 69
Project: JAVMovieScraper-master  File: AvEntertainmentParsingProfile.java View source code
@Override
public Runtime scrapeRuntime() {
    String runtime = "";
    Elements elements = document.select("div[id=titlebox] ul li");
    for (Element element : elements) {
        if (element.childNodeSize() == 3) {
            Node childNode = element.childNode(2);
            if (childNode instanceof TextNode && (element.childNode(1).childNode(0).toString().startsWith("Playing time") || element.childNode(1).childNode(0).toString().startsWith("�録時間"))) {
                String data = element.childNode(2).toString();
                Pattern pattern = Pattern.compile("\\d+");
                Matcher matcher = pattern.matcher(data);
                if (matcher.find()) {
                    runtime = matcher.group();
                    break;
                }
            }
        }
    }
    return new Runtime(runtime);
}
Example 70
Project: jHTML2Md-master  File: HTML2Md.java View source code
private static String getTextContent(Element element) {
    ArrayList<MDLine> lines = new ArrayList<MDLine>();
    List<Node> children = element.childNodes();
    for (Node child : children) {
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            MDLine line = getLastLine(lines);
            if (line.getContent().equals("")) {
                if (!textNode.isBlank()) {
                    line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*"));
                }
            } else {
                line.append(textNode.text().replaceAll("#", "/#").replaceAll("\\*", "/\\*"));
            }
        } else if (child instanceof Element) {
            Element childElement = (Element) child;
            processElement(childElement, lines);
        } else {
            System.out.println();
        }
    }
    int blankLines = 0;
    StringBuilder result = new StringBuilder();
    for (int i = 0; i < lines.size(); i++) {
        String line = lines.get(i).toString().trim();
        if (line.equals("")) {
            blankLines++;
        } else {
            blankLines = 0;
        }
        if (blankLines < 2) {
            result.append(line);
            if (i < lines.size() - 1) {
                result.append("\n");
            }
        }
    }
    return result.toString();
}
Example 71
Project: jodtemplate-master  File: HtmlStylizer.java View source code
private List<Element> process(final org.jsoup.nodes.Element element, final Element arPr, final Element apPr, final Slide slide) throws IOException {
    if (BR_TAG.equals(element.tagName())) {
        return Arrays.asList(new Element(PPTXDocument.BR_ELEMENT, getDrawingmlNamespace()));
    }
    final List<org.jsoup.nodes.Element> tags = getAllTags(element);
    final List<Element> elements = new ArrayList<>();
    for (Node node : element.childNodes()) {
        if (node instanceof org.jsoup.nodes.Element) {
            elements.addAll(process((org.jsoup.nodes.Element) node, arPr, apPr, slide));
        } else if (node instanceof TextNode) {
            final TextNode textNode = (TextNode) node;
            elements.add(createTextElement(tags, arPr, textNode, slide));
        }
    }
    if (LI_TAG.equals(element.tagName())) {
        return createListElements(tags, elements, apPr, element);
    }
    if (P_TAG.equals(element.tagName())) {
        return Arrays.asList(createParagraphElement(elements, apPr));
    }
    return elements;
}
Example 72
Project: Prophet-master  File: QTreeHTMLHandler.java View source code
/**
     * Creates a 'table' <code>Element</code> (using {@link Object#toString()} from the given data.
     * Any <code>null</code> values in <code>header</code> or <code>rows</code> (and its sub-arrays) will be ignored.
     * Any cells of the table that should be interpreted as HTML must be given as <code>Node</code> instances. Otherwise
     * HTML in the <code>String</code> returned by {@link Object#toString()} will be escaped.
     *
     * @param header the optional header for the table
     * @param rows the rows for the table
     * @return the 'table' <code>Element</code>
     */
public static Element table(Object[] header, Object[]... rows) {
    Element table = new Element(Tag.valueOf("table"), "");
    if (header != null) {
        Element headerRowEl = table.appendElement("tr");
        Element headerColEl;
        for (Object headerData : header) {
            if (headerData == null) {
                continue;
            }
            headerColEl = headerRowEl.appendElement("th");
            if (headerData instanceof Node) {
                headerColEl.appendChild((Node) headerData);
            } else {
                headerColEl.text(headerData.toString());
            }
        }
    }
    if (rows != null) {
        Element rowEl;
        Element colEl;
        for (Object[] row : rows) {
            if (row == null) {
                continue;
            }
            rowEl = table.appendElement("tr");
            for (Object rowData : row) {
                if (rowData == null) {
                    continue;
                }
                colEl = rowEl.appendElement("td");
                if (rowData instanceof Node) {
                    colEl.appendChild((Node) rowData);
                } else {
                    colEl.text(rowData.toString());
                }
            }
        }
    }
    return table;
}
Example 73
Project: scheduler-legacy-master  File: CourseParser.java View source code
/**
	 * Parse the Section Detail information page to retrieve the seating availability, registration restrictions,
	 * and prerequisites information
	 * 
	 * @param document the Section Detail page HTML document
	 * @param values the retrieved course data set, including the newly added Section Detail values
	 */
private void parseCourseDetail(Document document, Map<String, String> values) {
    Elements availabilityHeaders = document.select("caption:containsOwn(Registration Availability) + tbody th.ddheader span");
    Elements availabilityValues = document.select("caption:containsOwn(Registration Availability) + tbody td.dddefault");
    for (int pos = 0; pos < availabilityHeaders.size(); pos++) {
        String header = availabilityHeaders.get(pos).text();
        String value = availabilityValues.get(pos).text();
        values.put("seating." + header, value);
    }
    Element restrictionElement = document.select("span:containsOwn(Restriction)").first();
    try {
        for (Node node = restrictionElement.nextSibling(); !(node instanceof Element && ((Element) node).tag().equals(Tag.valueOf("span"))); node = node.nextSibling()) {
            logger.debug("Restriction: {}", node);
        ///TODO handle the restrictions list - grouping of restrictions (or restriction list elements) indicated by indentation
        }
    } catch (NullPointerException e) {
        logger.debug("No restriction found", e);
    }
    Element prerequisiteElement = document.select("span:containsOwn(Prerequisite)").first();
    try {
        for (Node node = prerequisiteElement.nextSibling(); node != null; node = node.nextSibling()) {
            logger.debug("Prereq: {}", node);
        //TODO handle the prerequisite list - can be AND-OR or OR-AND formatted (keywords 'and' 'or' present to indicate w/ parentheses for grouping
        }
    } catch (Exception e) {
        logger.debug("No prequisite found", e);
    }
}
Example 74
Project: WiFiAfterConnect-master  File: HtmlPage.java View source code
@Override
public boolean parse(String html) {
    Log.d(Constants.TAG, "Page " + this);
    if (!super.parse(html))
        return false;
    Document doc = Jsoup.parse(html);
    if (doc == null) {
        Log.d(Constants.TAG, "Parsing html: doc == null");
        return false;
    }
    Log.d(Constants.TAG, "Parsing html: doc html == {" + doc.html() + "}");
    // some portals sneak form to outside of <div id="content"> - the bastards!
    Element content = doc;
    for (Element meta : content.getElementsByTag("meta")) {
        String c = meta.attr("content");
        if (!c.isEmpty()) {
            if (meta.hasAttr("http-equiv"))
                httpEquivMetas.put(meta.attr("http-equiv").toLowerCase(Locale.ENGLISH), c);
            else if (meta.hasAttr("name"))
                namedMetas.put(meta.attr("name").toLowerCase(Locale.ENGLISH), c);
        }
    }
    for (Element te : content.getElementsByTag("title")) {
        title = te.data();
        if (!title.isEmpty())
            break;
    }
    for (Element body : content.getElementsByTag("body")) {
        Log.d(Constants.TAG, "Parsing html: body found.");
        if (body.hasAttr("onLoad")) {
            onLoad = body.attr("onLoad");
            break;
        }
    }
    for (Element fe : content.getElementsByTag("form")) {
        HtmlForm f = new HtmlForm(fe);
        forms.add(f);
        Log.d(Constants.TAG, "Parsing html: form added. Forms == " + forms.toString());
        String fid = f.getId();
        if (!fid.isEmpty())
            namedForms.put(fid, f);
    }
    for (Element head : content.getElementsByTag("head")) {
        for (Element jse : head.getElementsByTag("script")) {
            if (isJavaScript(jse)) {
                JavaScript j = new JavaScript(jse);
                headJavaScripts.add(j);
                Log.d(Constants.TAG, "Parsing html: HEAD JS added. javaScripts = " + headJavaScripts.toString());
            }
        }
        if (!headJavaScripts.isEmpty())
            checkJavaScriptForMetaRefresh();
    }
    for (Element body : content.getElementsByTag("body")) {
        for (Element jse : body.getElementsByTag("script")) {
            if (isJavaScript(jse)) {
                JavaScript j = new JavaScript(jse);
                bodyJavaScripts.add(j);
                Log.d(Constants.TAG, "Parsing html: HEAD JS added. javaScripts = " + bodyJavaScripts.toString());
            }
        }
    }
    for (Element ie : content.getElementsByTag("input")) {
        HtmlInput i = new HtmlInput(ie, false);
        String fid = i.getFormId();
        if (!fid.isEmpty()) {
            HtmlForm f = namedForms.get(fid);
            if (f != null)
                f.addInput(i);
        }
    }
    for (Element e : doc.getAllElements()) {
        for (Node n : e.childNodes()) {
            if (n instanceof Comment) {
                String commentData = ((Comment) n).getData();
                if (commentData.startsWith("<?xml")) {
                    WISPAccessGatewayParam wp = WISPAccessGatewayParam.parse(commentData);
                    if (wp != null)
                        wISPr = wp;
                }
            }
        }
    }
    return true;
}
Example 75
Project: Ouroboros-master  File: CommentParser.java View source code
private CharSequence parseFormatting(Element bodyLine, String currentBoard, String resto, FragmentManager fragmentManager, InfiniteDbHelper infiniteDbHelper) {
    CharSequence parsedText = "";
    for (Node childNode : bodyLine.childNodes()) {
        if (childNode instanceof TextNode) {
            parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(((TextNode) childNode).text())));
        } else if (childNode instanceof Element) {
            Element childElement = (Element) childNode;
            switch(childElement.tagName()) {
                default:
                    parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(childElement.text())));
                    break;
                case "span":
                    CharSequence spanText = parseSpanText(childElement);
                    parsedText = TextUtils.concat(parsedText, spanText);
                    break;
                case "em":
                    parsedText = TextUtils.concat(parsedText, parseItalicText(new SpannableString(childElement.text())));
                    break;
                case "strong":
                    parsedText = TextUtils.concat(parsedText, parseBoldText(new SpannableString(childElement.text())));
                    break;
                case "u":
                    parsedText = TextUtils.concat(parsedText, parseUnderlineText(new SpannableString(childElement.text())));
                    break;
                case "s":
                    parsedText = TextUtils.concat(parsedText, parseStrikethroughText(new SpannableString(childElement.text())));
                    break;
                case "a":
                    parsedText = TextUtils.concat(parsedText, parseAnchorText(childElement, currentBoard, resto, fragmentManager, infiniteDbHelper));
            }
        }
    }
    return parsedText;
}
Example 76
Project: Skype4J-master  File: RichText.java View source code
private static RichText parse(RichText root, Node node) {
    RichText current = root;
    if (node instanceof Element) {
        Element elem = (Element) node;
        applyTag(current, elem);
        String inner = elem.html();
        Elements children = elem.children();
        if (children.size() > 0) {
            String[] parts = new String[children.size() + 1];
            int i = 0;
            int index = 0;
            for (Element child : children) {
                int startChild = inner.indexOf("<" + child.tag().toString(), index);
                int endChild = startChild + child.outerHtml().length();
                parts[i++] = inner.substring(index, startChild);
                index = endChild;
            }
            parts[i] = inner.substring(index);
            Element last = elem;
            for (int j = 0; j < parts.length; j++) {
                if (hasTag(root, last)) {
                    current.appendText(parts[j]);
                } else {
                    current = current.append(parts[j], true);
                    current.copyFormat(root);
                }
                if (j < children.size()) {
                    Element child = children.get(j);
                    if (!hasTag(current, child)) {
                        current = current.append("", true);
                        current.copyFormat(root);
                    }
                    current = parse(current, child);
                    last = child;
                }
            }
        } else {
            current.appendText(inner);
        }
    }
    return current;
}
Example 77
Project: thredds-master  File: NcepHtmlScraper.java View source code
//////////////////////////////////////////////////////////////////
// http://www.nco.ncep.noaa.gov/pmb/docs/grib2/grib2_doc.shtml
void parseTopDoc() throws IOException {
    String source = "http://www.nco.ncep.noaa.gov/pmb/docs/grib2/grib2_doc.shtml";
    // 5 sec timeout
    Document doc = Jsoup.parse(new URL(source), 5 * 1000);
    //System.out.printf("%s%n", doc);
    Elements links = doc.select("a[href]");
    for (Element link : links) {
        //System.out.printf("%s", link);
        Node sib = link.nextSibling();
        String title = null;
        if (sib != null) {
            String sibt = sib.toString();
            title = StringUtil2.remove(sibt, "-").trim();
        //System.out.printf(" == '%s'", title);
        }
        if (link.text().equals("Table 4.2")) {
            //System.out.printf(" == ");
            parseTable42(link.attr("abs:href"), link.text(), title);
        } else {
            if (link.text().startsWith("Table 4")) {
                //System.out.printf(" == ");
                parseCodeTable(link.attr("abs:href"), link.text(), title);
            }
        }
    //System.out.printf("%n");
    }
}
Example 78
Project: GPXConverter-master  File: GarminForm.java View source code
private static String findFlowKey(Node node) {
    String key = null;
    for (int i = 0; i < node.childNodes().size(); ) {
        Node child = node.childNode(i);
        if (child.nodeName().equals("#comment")) {
            //System.out.println(child.toString());
            String flowKeyPattern = "\\<\\!-- flowExecutionKey\\: \\[(e1s1)\\] --\\>";
            key = child.toString().replaceAll(flowKeyPattern, "$1").trim();
            break;
        } else {
            findFlowKey(child);
            i++;
        }
    }
    return key;
}
Example 79
Project: opacclient-master  File: SISIS.java View source code
public SearchRequestResult parse_search(String html, int page) throws OpacErrorException, SingleResultFound {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/searchfoo");
    if (doc.select(".error").size() > 0) {
        throw new OpacErrorException(doc.select(".error").text().trim());
    } else if (doc.select(".nohits").size() > 0) {
        throw new OpacErrorException(doc.select(".nohits").text().trim());
    } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }
    int results_total = -1;
    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        throw new SingleResultFound();
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }
    Elements table = doc.select("table.data tbody tr");
    identifier = null;
    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td img[title]").size() > 0) {
            String title = tr.select("td img").get(0).attr("title");
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
            MediaType default_by_title = defaulttypes.get(title);
            MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONExceptionIllegalArgumentException |  e) {
                    sr.setType(default_name);
                }
            } else {
                sr.setType(default_name);
            }
        }
        String alltext = tr.text();
        if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
            sr.setType(MediaType.MP3);
        } else if (alltext.contains("eVideo")) {
            sr.setType(MediaType.EVIDEO);
        } else if (alltext.contains("eBook")) {
            sr.setType(MediaType.EBOOK);
        } else if (alltext.contains("Munzinger")) {
            sr.setType(MediaType.EDOC);
        }
        if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
            sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
            if (sr.getCover().contains("showCover.do")) {
                downloadCover(sr);
            }
        }
        Element middlething;
        if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
            middlething = tr.child(2);
        } else {
            middlething = tr.child(1);
        }
        List<Node> children = middlething.childNodes();
        if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
            Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
            if (indiv.children().size() > 1) {
                children = indiv.childNodes();
            }
        } else if (middlething.select("span.titleData").size() == 1) {
            children = middlething.select("span.titleData").first().childNodes();
        }
        int childrennum = children.size();
        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {
                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
                        }
                    }
                }
            }
        }
        StringBuilder description = null;
        if (tr.select("span.Z3988").size() == 1) {
            // Sometimes there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        boolean described = false;
        if (description != null && description.length() > 0) {
            sr.setInnerhtml(description.toString());
            described = true;
        } else {
            description = new StringBuilder();
        }
        int k = 0;
        boolean yearfound = false;
        boolean titlefound = false;
        boolean sigfound = false;
        for (String[] part : strings) {
            if (!described) {
                if (part[0].equals("a") && (k == 0 || !titlefound)) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append("<b>").append(part[2]).append("</b>");
                    titlefound = true;
                } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
                    yearfound = true;
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound) {
                    description.append("<br />");
                    description.append(part[2]);
                } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
                    description.append("<br />");
                    description.append(part[2]);
                }
            }
            if (part.length == 4) {
                if (part[0].equals("span") && part[3].equals("textgruen")) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (part[0].equals("span") && part[3].equals("textrot")) {
                    sr.setStatus(SearchResult.Status.RED);
                }
            } else if (part.length == 5) {
                if (part[4].contains("purple")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                }
            }
            if (sr.getStatus() == null) {
                if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht möglich")) || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurückgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
            k++;
        }
        if (!described) {
            sr.setInnerhtml(description.toString());
        }
        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Example 80
Project: MyTv-master  File: TvMaoCrawler.java View source code
/**
	 * 解�电视节目表
	 * 
	 * @param html
	 * @return
	 */
private List<ProgramTable> parseProgramTable(String html) {
    Document doc = Jsoup.parse(html);
    Elements dateElements = doc.select("div.pgmain div[class=\"mt10 clear\"] b:first-child");
    String dateAndWeek = dateElements.get(0).text().trim();
    String[] dateAndWeekArray = dateAndWeek.split("\\s+");
    String date = Calendar.getInstance().get(Calendar.YEAR) + "-" + dateAndWeekArray[0];
    String weekString = dateAndWeekArray[1];
    int week = weekStringToInt(weekString);
    Elements stationElements = doc.select("aside[class=\"related-aside rt\"] section[class=\"aside-section clear\"] div.bar");
    String stationName = stationElements.get(0).text().trim();
    Elements programElements = doc.select("ul#pgrow li");
    List<ProgramTable> resultList = new ArrayList<ProgramTable>();
    for (Element element : programElements) {
        List<Node> children = element.childNodes();
        int size = children.size();
        if (size < 2) {
            continue;
        }
        int i = 0;
        // 查找节目播出时间
        boolean foundAirTime = false;
        for (; i < size; i++) {
            Node child = children.get(i);
            if (child instanceof Element && "SPAN".equalsIgnoreCase(((Element) child).tagName())) {
                foundAirTime = true;
                break;
            }
        }
        if (!foundAirTime) {
            logger.info("the program table of " + stationName + " at " + date + " does not exists.");
            return resultList;
        }
        String airTime = ((Element) children.get(i++)).text().trim();
        StringBuffer program = new StringBuffer();
        // 查找节目å??ç§°
        for (; i < size; i++) {
            Node child = children.get(i);
            if (child instanceof TextNode) {
                program.append(((TextNode) child).text().trim());
            } else if (child instanceof Element && "A".equalsIgnoreCase(((Element) child).tagName())) {
                program.append(((Element) child).text().trim());
                i++;
                break;
            }
        }
        if (i < size - 1) {
            // 还有textnode元素
            Node child = children.get(i);
            if (child instanceof TextNode) {
                program.append(((TextNode) child).text().trim());
            }
        }
        ProgramTable pt = new ProgramTable();
        pt.setAirDate(date);
        pt.setAirTime(date + " " + airTime);
        pt.setProgram(program.toString().trim());
        pt.setStationName(stationName);
        pt.setWeek(week);
        for (CrawlEventListener listener : listeners) {
            listener.itemFound(new ProgramTableFoundEvent(this, pt));
        }
        resultList.add(pt);
    }
    return resultList;
}
Example 81
Project: Diary.Ru-Client-master  File: NetworkService.java View source code
/**
     * ФункциÑ? длÑ? применениÑ? модификаций ко вÑ?ем загружаемым Ñ?траницам дневников
     * Сюда вноÑ?Ñ?Ñ‚Ñ?Ñ? правки Ñ?траниц по проÑ?ьбам пользователей
     * @param resultPage Ñ?траница, которую нужно модифицировать
     */
private void mutateContent(Document resultPage) {
    // Ñ?траница будет иметь наш Ñ?тиль
    String theme = mPreferences.getString("app.theme", "red");
    resultPage.head().append("<link rel=\"stylesheet\" href=\"file:///android_asset/css/" + theme + ".css\" type=\"text/css\" media=\"all\" title=\"Стандарт\"/>");
    // кнопка репоÑ?та указывает на нужную Ñ?Ñ?ылку
    Elements shareLinks = resultPage.select(".postLinks li[class^=quote]");
    for (Element shareLi : shareLinks) {
        if (shareLi.childNodeSize() == 0)
            continue;
        Element repostLink = shareLi.child(0);
        Element diaryRepost = shareLi.select("div a[href*=newpost]").first();
        if (diaryRepost != null)
            repostLink.attr("href", diaryRepost.attr("href"));
    }
    // текÑ?Ñ‚ вмеÑ?то кнопок правки
    if (mUseTextInsteadOfImages) {
        Elements postActionImages = resultPage.select("ul.postActionLinks img");
        for (Element img : postActionImages) {
            // переделываем на текÑ?Ñ‚
            if (img.hasAttr("title")) {
                Node text = new TextNode(img.attr("title"), resultPage.baseUri());
                img.replaceWith(text);
            }
        }
    }
    // правка JS
    Elements jsElems = resultPage.getElementsByAttribute("onclick");
    for (Element js : jsElems) {
        String link = js.attr("href");
        if (!link.contains("#more") && !link.contains("subscribe") && !link.contains("showresult") && !link.contains("up&signature=") && !link.contains("down&signature=") && !link.contains("tag_showedit"))
            // Убиваем веÑ?ÑŒ Ñ?ваÑ?крипт кроме MORE, поднÑ?тиÑ?/опуÑ?каниÑ? поÑ?тов, результатов голоÑ?ованиÑ? и подпиÑ?ки
            js.removeAttr("onclick");
    }
    // Ñ?мена картинок, еÑ?ли автозагрузка выключена
    if (!mLoadImages) {
        Elements images = resultPage.select("img[src^=http], a:has(img)");
        for (Element current : images) {
            if (current.tagName().equals("img")) {
                String src = current.attr("src");
                if (!src.contains("diary.ru") && !current.parent().className().equals("avatar") && !src.startsWith("/")) {
                    // вÑ?е неподходÑ?щие под критерии изображениÑ? на Ñ?транице будут заменены на кнопки, по клику на которые и будут открыватьÑ?Ñ?
                    String jsButton = "<input type='image' src='file:///android_asset/images/load_image.png' onclick='return handleIMGDown(this, \"" + src + "\")' />";
                    current.after(jsButton);
                    current.remove();
                }
            }
            if (current.tagName().equals("a")) {
                String src = current.getElementsByTag("img").attr("src");
                if (!src.contains("diary.ru") && !current.parent().className().equals("avatar") && !src.startsWith("/")) {
                    // вÑ?е неподходÑ?щие под критерии изображениÑ? на Ñ?транице будут заменены на кнопки, по клику на которые и будут открыватьÑ?Ñ?
                    String jsButton = "<input type='image' src='file:///android_asset/images/load_image.png' onclick='return handleADown(this, \"" + current.attr("href") + "\", \"" + src + "\")' />";
                    current.after(jsButton);
                    current.remove();
                }
            }
        }
    }
    // включаем джаваÑ?крипт
    resultPage.body().append(Utils.javascriptContent);
    // Ñ?игнатура должна быть видна методам JS
    resultPage.body().append("<script>var signature = '" + UserData.getInstance().getSignature() + "';</script>");
}
Example 82
Project: hn-android-master  File: BaseHTMLParser.java View source code
public static String getFirstTextValueInElementChildren(Element element) {
    if (element == null)
        return "";
    for (org.jsoup.nodes.Node node : element.childNodes()) if (node instanceof TextNode)
        return ((TextNode) node).text();
    return "";
}
Example 83
Project: JibbrJabbr-master  File: HtmlResource.java View source code
@Override
public void head(Node node, int depth) {
}
Example 84
Project: webmagic-master  File: CssSelector.java View source code
protected String getText(Element element) {
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            accum.append(textNode.text());
        }
    }
    return accum.toString();
}
Example 85
Project: karma-exchange-master  File: HtmlUtil.java View source code
// hit when the node is first seen
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        // TextNodes carry all user-readable text in the DOM.
        append(((TextNode) node).text());
    else if (name.equals("li"))
        append("\n * ");
}
Example 86
Project: StartupNews-master  File: BaseHTMLParser.java View source code
public static String getFirstTextValueInElementChildren(Element element) {
    if (element == null) {
        return "";
    }
    for (org.jsoup.nodes.Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            return ((TextNode) node).text();
        }
    }
    return "";
}
Example 87
Project: jenkinsmobi-api-master  File: GoogleSsoHandler.java View source code
private String getDivText(final Element errorDiv) {
    for (final Node child : errorDiv.childNodes()) {
        if (child instanceof TextNode) {
            return ((TextNode) child).getWholeText().trim();
        }
    }
    return "";
}
Example 88
Project: PrepayCredit-master  File: HtmlUtilities.java View source code
private static void removeComments(Node node) {
    for (int i = 0; i < node.childNodes().size(); ) {
        Node child = node.childNode(i);
        if (child.nodeName().equals("#comment")) {
            child.remove();
        } else {
            removeComments(child);
            i++;
        }
    }
}
Example 89
Project: tika-wrapper-master  File: HtmlToPlaintTextSimple.java View source code
// hit when the node is first seen
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode)
        // TextNodes carry all user-readable text in the DOM.
        append(((TextNode) node).text());
    else if (name.equals("li"))
        append("\n * ");
}
Example 90
Project: apiman-master  File: TemplateScanner.java View source code
/**
     * @param element
     * @return true if the element doesn't have any child elements
     */
private static boolean hasNoChildren(Element element) {
    List<Node> childNodes = element.childNodes();
    for (Node node : childNodes) {
        if (node instanceof Element) {
            return false;
        }
    }
    return true;
}
Example 91
Project: step-master  File: RipHomePage.java View source code
private void removeComments(Node node) {
    int i = 0;
    while (i < node.childNodes().size()) {
        Node child = node.childNode(i);
        if (child.nodeName().equals("#comment"))
            child.remove();
        else {
            removeComments(child);
            i++;
        }
    }
}