Java Examples for org.jsoup.safety.Whitelist

The following java examples will help you to understand the usage of org.jsoup.safety.Whitelist. These source code samples are taken from different open source projects.

Example 1
Project: android-opensource-library-56-master  File: SanitizeActivity.java View source code
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_sanitize);
    final EditText inputText = (EditText) findViewById(R.id.input_text);
    inputText.setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
    final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
    findViewById(R.id.sanitize_button).setOnClickListener(new OnClickListener() {

        @Override
        public void onClick(View v) {
            String sanitized = Jsoup.clean(inputText.getText().toString(), Whitelist.basic());
            sanitizedText.setText(sanitized);
        }
    });
}
Example 2
Project: dataverse-master  File: MarkupChecker.java View source code
/**
     * Wrapper around Jsoup clean method with the basic White list
     *   http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
     * @param unsafe
     * @return 
     */
public static String sanitizeBasicHTML(String unsafe) {
    if (unsafe == null) {
        return null;
    }
    // basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
    //Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");  
    Whitelist wl = Whitelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap").addAttributes("map", "name").addAttributes("area", "shape", "coords", "href", "title", "alt").addEnforcedAttribute("a", "target", "_blank");
    return Jsoup.clean(unsafe, wl);
}
Example 3
Project: voj-master  File: HtmlTextFilter.java View source code
/**
	 * 过滤包�HTML字符串.
	 * @param text - 待过滤的字符串
	 * @return 过滤�的字符串.
	 */
public static String filter(String text) {
    if (text == null) {
        return text;
    }
    Document document = Jsoup.parse(text);
    document.outputSettings(new Document.OutputSettings().prettyPrint(false));
    document.select("br").append("\\n");
    document.select("p").prepend("\\n\\n");
    String s = document.html().replaceAll("\\\\n", "\n");
    return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
Example 4
Project: quadriga-master  File: HTMLContentValidator.java View source code
/**
     * This method validates the entered title, description of a about text for
     * a project. Validates if the values are available or not. If values are
     * not available error is thrown
     * 
     * @param obj
     * @param err
     */
@Override
public void validate(Object obj, Errors err) {
    String description = null;
    String title = null;
    if (obj instanceof IAboutText) {
        IAboutText abtText = (IAboutText) obj;
        description = abtText.getDescription();
        title = abtText.getTitle();
    } else if (obj instanceof ProjectBlogEntry) {
        ProjectBlogEntry projectBlog = (ProjectBlogEntry) obj;
        description = projectBlog.getDescription();
        title = projectBlog.getTitle();
    }
    Whitelist whitelist = ExtendedWhitelist.extendedWhiteListWithBase64();
    Whitelist titleWhitelist = Whitelist.simpleText();
    // validate all the input parameters
    ValidationUtils.rejectIfEmptyOrWhitespace(err, "title", "about_title.required");
    ValidationUtils.rejectIfEmptyOrWhitespace(err, "description", "about_description.required");
    if (!Jsoup.isValid(description, whitelist)) {
        err.rejectValue("description", "about_description.proper");
    }
    if (!Jsoup.isValid(title, titleWhitelist)) {
        err.rejectValue("title", "about_title.proper");
    }
}
Example 5
Project: symphony-master  File: ArticleQueryService.java View source code
/**
     * Gets preview content of the article specified with the given article id.
     *
     * @param articleId the given article id
     * @param request   the specified request
     * @return preview content
     * @throws ServiceException service exception
     */
public String getArticlePreviewContent(final String articleId, final HttpServletRequest request) throws ServiceException {
    final JSONObject article = getArticle(articleId);
    if (null == article) {
        return null;
    }
    final int articleType = article.optInt(Article.ARTICLE_TYPE);
    if (Article.ARTICLE_TYPE_C_THOUGHT == articleType) {
        return null;
    }
    Stopwatchs.start("Get preview content");
    try {
        final int length = Integer.valueOf("150");
        String ret = article.optString(Article.ARTICLE_CONTENT);
        final String authorId = article.optString(Article.ARTICLE_AUTHOR_ID);
        final JSONObject author = userQueryService.getUser(authorId);
        if (null != author && UserExt.USER_STATUS_C_INVALID == author.optInt(UserExt.USER_STATUS) || Article.ARTICLE_STATUS_C_INVALID == article.optInt(Article.ARTICLE_STATUS)) {
            return langPropsService.get("articleContentBlockLabel");
        }
        final Set<String> userNames = userQueryService.getUserNames(ret);
        final JSONObject currentUser = userQueryService.getCurrentUser(request);
        final String currentUserName = null == currentUser ? "" : currentUser.optString(User.USER_NAME);
        final String authorName = author.optString(User.USER_NAME);
        if (Article.ARTICLE_TYPE_C_DISCUSSION == articleType && !authorName.equals(currentUserName)) {
            boolean invited = false;
            for (final String userName : userNames) {
                if (userName.equals(currentUserName)) {
                    invited = true;
                    break;
                }
            }
            if (!invited) {
                String blockContent = langPropsService.get("articleDiscussionLabel");
                blockContent = blockContent.replace("{user}", "<a href='" + Latkes.getServePath() + "/member/" + authorName + "'>" + authorName + "</a>");
                return blockContent;
            }
        }
        ret = Emotions.convert(ret);
        ret = Markdowns.toHTML(ret);
        ret = Jsoup.clean(ret, Whitelist.none());
        if (ret.length() >= length) {
            ret = StringUtils.substring(ret, 0, length) + " ....";
        }
        return ret;
    } finally {
        Stopwatchs.end();
    }
}
Example 6
Project: viritin-master  File: RichTextExample.java View source code
@Override
public Component getTestComponent() {
    // note that styles is stripped of by default
    RichText example1 = new RichText("<h1 style='color:red' class='foobar'>Jou!</h1>");
    // This one will tolerate quite a lot more
    RichText example2 = new RichText("<h1 style='color:red'>Jou!</h1>") {

        @Override
        public Whitelist getWhitelist() {
            return Whitelist.relaxed().addAttributes("h1", "style");
        }
    };
    // This one will tolerate quite a lot more
    RichText example3 = new RichText().withMarkDown("This is **Markdown** formatted *text*");
    // RichText can also be used through LabelField 
    // (and it is with default settings)
    LabelField<Integer> lf = new LabelField<>();
    lf.setLabel(new RichText() {

        @Override
        public Whitelist getWhitelist() {
            return Whitelist.relaxed().addAttributes("h1", "style");
        }
    });
    lf.setCaptionGenerator( i -> "<h1 style='color:blue'>" + i + "</h1>");
    lf.setValue(69);
    return new MVerticalLayout(example1, example2, example3, lf);
}
Example 7
Project: calendula-master  File: LeafletHtmlPostProcessor.java View source code
@Override
public String process(String html) {
    // Parse str into a Document
    Document doc = Jsoup.parseBodyFragment(html);
    doc.select("nav").remove();
    doc.select("div#pdfurl").remove();
    // white list to clean html
    Whitelist wl = Whitelist.relaxed();
    wl.addTags("div", "span", "p", "h1", "h2", "h3", "ul", "ol", "li", "a", "img");
    wl.preserveRelativeLinks(true);
    wl.addAttributes("img", "src");
    wl.addAttributes("a", "href");
    // perform cleaning
    Document cleaned = new Cleaner(wl).clean(doc);
    cleaned.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
    // Remove empty elements
    Set<String> removable = new HashSet<>(Arrays.asList("div", "span", "strong", "p", "h1", "h2", "h3", "ul", "ol", "li", "a"));
    cleaned.select("p:matchesOwn((?is) )").remove();
    // For each element in the cleaned document
    for (Element el : cleaned.getAllElements()) {
        if (el.children().isEmpty() && (!el.hasText() || el.text().replaceAll(" ", "").trim().equals(""))) {
            // Element is empty, check if should be removed
            if (removable.contains(el.tagName()))
                el.remove();
        }
    }
    // return html for  display
    return cleaned.html();
}
Example 8
Project: chukwa-master  File: XssFilter.java View source code
/**
     * Strips any potential XSS threats out of the value
     * @param value is a string
     * @return filtered string
     */
public String filter(String value) {
    if (value == null)
        return null;
    // Use the ESAPI library to avoid encoded attacks.
    value = ESAPI.encoder().canonicalize(value);
    // Avoid null characters
    value = value.replaceAll("\0", "");
    // Clean out HTML
    value = Jsoup.clean(value, Whitelist.none());
    return value;
}
Example 9
Project: hibernate-validator-master  File: SafeHtmlValidator.java View source code
@Override
public void initialize(SafeHtml safeHtmlAnnotation) {
    switch(safeHtmlAnnotation.whitelistType()) {
        case BASIC:
            whitelist = Whitelist.basic();
            break;
        case BASIC_WITH_IMAGES:
            whitelist = Whitelist.basicWithImages();
            break;
        case NONE:
            whitelist = Whitelist.none();
            break;
        case RELAXED:
            whitelist = Whitelist.relaxed();
            break;
        case SIMPLE_TEXT:
            whitelist = Whitelist.simpleText();
            break;
    }
    baseURI = safeHtmlAnnotation.baseURI();
    whitelist.addTags(safeHtmlAnnotation.additionalTags());
    for (SafeHtml.Tag tag : safeHtmlAnnotation.additionalTagsWithAttributes()) {
        whitelist.addAttributes(tag.name(), tag.attributes());
    }
}
Example 10
Project: MusicDNA-master  File: LyricWiki.java View source code
public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }
    String text;
    String originalArtist = artist;
    String originalTitle = song;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.getElementsByClass("references").remove();
        String lyricsHtml = lyricbox.html();
        final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
        text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
        text = text.replaceAll("\\[\\d\\]", "").trim();
        String title = lyricsPage.getElementsByTag("title").get(0).text();
        int colon = title.indexOf(':');
        if (artist == null)
            artist = title.substring(0, colon).trim();
        if (song == null) {
            int end = title.lastIndexOf("Lyrics");
            song = title.substring(colon + 1, end).trim();
        }
    } catch (IndexOutOfBoundsExceptionIOException |  e) {
        return new Lyrics(ERROR);
    }
    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setOriginalArtist(originalArtist);
        lyrics.setOriginalTitle(originalTitle);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}
Example 11
Project: skalli-master  File: TimelineResource.java View source code
private SyndFeed getFeed(Project project, String host, List<Entry> entries) {
    SyndFeed feed = new SyndFeedImpl();
    String projectName = HtmlUtils.clean(project.getName(), Whitelist.none());
    feed.setTitle(MessageFormat.format("{0} | Timeline", projectName));
    feed.setDescription(MessageFormat.format("Latest changes to project ''{0}''.", projectName));
    //$NON-NLS-1$
    feed.setLink(host + RestUtils.URL_PROJECTS + project.getProjectId() + "/timeline");
    List<SyndEntry> feedEntries = new ArrayList<SyndEntry>();
    for (Entry entry : entries) {
        SyndEntry feedEntry = new SyndEntryImpl();
        feedEntry.setTitle(MessageFormat.format("{0} | {1}", entry.getTitle(), entry.getSource()));
        feedEntry.setLink(entry.getLink().getHref());
        feedEntry.setPublishedDate(entry.getPublished());
        SyndContent entryDescription = new SyndContentImpl();
        entryDescription.setType(entry.getContent().getType());
        entryDescription.setValue(entry.getContent().getValue());
        feedEntry.setDescription(entryDescription);
        feedEntries.add(feedEntry);
    }
    feed.setEntries(feedEntries);
    return feed;
}
Example 12
Project: appverse-web-master  File: ESAPIHelper.java View source code
/**
     * Strips any potential XSS threats out of the value
     * @param value
     * @return
     */
public static String stripXSS(String value) {
    if (value != null) {
        // Use the ESAPI library to avoid encoded attacks.
        value = ESAPI.encoder().canonicalize(value);
        // Avoid null characters
        value = value.replaceAll("\0", "");
        // Clean out HTML
        // This clean, removes all html tags. so instead of <script>, it simple removes the <script> tag.
        value = Jsoup.clean(value, Whitelist.none());
    }
    return value;
}
Example 13
Project: coprhd-controller-master  File: SecurityUtils.java View source code
/**
     * Removes any potential XSS threats from the value.
     * Depends on the WASP ESAPI (owasp.org) and jsoup libraries (jsoup.org).
     * 
     * @param value data to be cleaned
     * @return cleaned data
     */
public static String stripXSS(String value) {
    if (value == null) {
        return null;
    }
    // firstly, ESAPI canonicalize input, then Jsoup cleans all html tags, which includes <script> tags.
    value = ESAPI.encoder().canonicalize(value, false, false);
    value = value.replaceAll("\0", "");
    value = Jsoup.clean(value, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
    return value;
}
Example 14
Project: gitblit-master  File: JSoupXssFilter.java View source code
/**
	 * Builds & returns a loose HTML whitelist similar to Github.
	 *
	 * https://github.com/github/markup/tree/master#html-sanitization
	 * @return a loose HTML whitelist
	 */
protected Whitelist getRelaxedWhiteList() {
    return new Whitelist().addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "del", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd", "li", "ol", "p", "pre", "q", "samp", "small", "strike", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "tt", "u", "ul", "var").addAttributes("a", "class", "href", "style", "target", "title").addAttributes("blockquote", "cite").addAttributes("col", "span", "width").addAttributes("colgroup", "span", "width").addAttributes("div", "class", "style").addAttributes("img", "align", "alt", "height", "src", "title", "width").addAttributes("ol", "start", "type").addAttributes("q", "cite").addAttributes("span", "class", "style").addAttributes("table", "class", "style", "summary", "width").addAttributes("td", "abbr", "axis", "class", "colspan", "rowspan", "style", "width").addAttributes("th", "abbr", "axis", "class", "colspan", "rowspan", "scope", "style", "width").addAttributes("ul", "type").addEnforcedAttribute("a", "rel", "nofollow");
}
Example 15
Project: HERD-master  File: HerdStringUtils.java View source code
/**
     * Strips HTML tags from a given input String, allows some tags to be retained via a whitelist
     *
     * @param fragment the specified String
     * @param whitelistTags the specified whitelist tags
     *
     * @return cleaned String with allowed tags
     */
public static String stripHtml(String fragment, String... whitelistTags) {
    // Parse out html tags except those from a given list of whitelist tags
    Document dirty = Jsoup.parseBodyFragment(fragment);
    Whitelist whitelist = new Whitelist();
    for (String whitelistTag : whitelistTags) {
        // Get the actual tag name from the whitelist tag
        // this is vulnerable in general to complex tags but will suffice for our simple needs
        whitelistTag = StringUtils.removePattern(whitelistTag, "[^\\{IsAlphabetic}]");
        // Add all specified tags to the whitelist while preserving inline css
        whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class");
    }
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(// Set character encoding to UTF-8
    CharEncoding.UTF_8).prettyPrint(// Make sure no line-breaks are added
    false);
    // return 'cleaned' html body
    return clean.body().html();
}
Example 16
Project: kune-master  File: ContentUnrenderer.java View source code
/**
   * Turns a HTML document back into a set of text, elements, annotations.
   *
   * @param content the content
   * @return the unrendered blip
   */
public static UnrenderedBlip unrender(final String content) {
    final StringBuilder sb = new StringBuilder();
    final Map<Integer, com.google.wave.api.Element> elements = Maps.newHashMap();
    final Annotations annotations = new Annotations();
    // Sanitized
    final String safe = Jsoup.clean(content, Whitelist.basic());
    final Document doc = Jsoup.parse(safe);
    unrender(doc.body(), sb, elements, annotations);
    return new UnrenderedBlip(sb.toString(), elements, annotations);
}
Example 17
Project: lyrics-master  File: LyricWiki.java View source code
public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }
    String text;
    String originalArtist = artist;
    String originalTitle = song;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.getElementsByClass("references").remove();
        String lyricsHtml = lyricbox.html();
        final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
        text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
        text = text.replaceAll("\\[\\d\\]", "").trim();
        String title = lyricsPage.getElementsByTag("title").get(0).text();
        int colon = title.indexOf(':');
        if (artist == null)
            artist = title.substring(0, colon).trim();
        if (song == null) {
            int end = title.lastIndexOf("Lyrics");
            song = title.substring(colon + 1, end).trim();
        }
    } catch (IndexOutOfBoundsExceptionIOException |  e) {
        return new Lyrics(ERROR);
    }
    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setOriginalArtist(originalArtist);
        lyrics.setOriginalTitle(originalTitle);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}
Example 18
Project: patientview-master  File: XssUtils.java View source code
public <T> void cleanObjectForXss(T object) {
    Method[] methods = object.getClass().getMethods();
    if (methods != null && methods.length > 0) {
        Map<String, Method> methodNameToMethod = new HashMap<String, Method>();
        // add all the methods to the map
        for (Method method : methods) {
            methodNameToMethod.put(method.getName(), method);
        }
        // find the string setters
        for (Method method : methods) {
            if (isStringSetter(method)) {
                // check if there is a matching getter
                String getterName = method.getName().replace("set", "get");
                Method getter = methodNameToMethod.get(getterName);
                if (getter != null && isStringGetter(getter)) {
                    try {
                        // get the data, clean it and invoke the setter with the new data
                        String dirtyString = (String) getter.invoke(object);
                        if (dirtyString != null) {
                            String cleanString = Jsoup.clean(dirtyString, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
                            // set the clean string
                            method.invoke(object, cleanString);
                        }
                    } catch (Exception e) {
                        LOGGER.error(e.getMessage());
                        LOGGER.debug(e.getMessage(), e);
                    }
                }
            }
        }
    }
}
Example 19
Project: QuickLyric-master  File: LyricWiki.java View source code
public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }
    String text;
    String originalArtist = artist;
    String originalTitle = song;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.getElementsByClass("references").remove();
        String lyricsHtml = lyricbox.html();
        final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
        text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
        text = text.replaceAll("\\[\\d\\]", "").trim();
        String title = lyricsPage.getElementsByTag("title").get(0).text();
        int colon = title.indexOf(':');
        if (artist == null)
            artist = title.substring(0, colon).trim();
        if (song == null) {
            int end = title.lastIndexOf("Lyrics");
            song = title.substring(colon + 1, end).trim();
        }
    } catch (IndexOutOfBoundsExceptionIOException |  e) {
        return new Lyrics(ERROR);
    }
    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setOriginalArtist(originalArtist);
        lyrics.setOriginalTitle(originalTitle);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}
Example 20
Project: tinymce-renderer-plugin-master  File: TinyMCERendererReversePlugin.java View source code
public Object transformForEdit(Object obj) {
    if (obj == null) {
        return obj;
    }
    if (stripNoTags()) {
        return obj;
    }
    String s = (String) obj;
    s = s.replaceAll(" ", "tinymce-renderer-reverse_sp");
    if (stripAllTags()) {
        s = s.replaceAll("</tr>", "tinymce-renderer-reverse_br");
    }
    s = s.replaceAll("<p> </p>", "tinymce-renderer-reverse_br");
    s = s.replaceAll("</li>", "tinymce-renderer-reverse_br");
    s = s.replaceAll("<br />", "tinymce-renderer-reverse_br");
    s = s.replaceAll("</p>", "tinymce-renderer-reverse_br");
    s = s.replaceAll("</ul>", "tinymce-renderer-reverse_br");
    s = s.replaceAll("</ol>", "tinymce-renderer-reverse_br");
    s = s.replaceAll("</blockquote>", "tinymce-renderer-reverse_br");
    Whitelist wl = stripAllTags() ? Whitelist.none() : this.whitelist;
    String safe = Jsoup.clean(s, wl);
    safe = safe.replaceAll("tinymce-renderer-reverse_br", "\n");
    safe = safe.replaceAll("tinymce-renderer-reverse_sp", " ");
    return safe;
}
Example 21
Project: androidStuff-master  File: ExperimentalTests.java View source code
@SuppressWarnings("unused")
public void testJsoup() throws Exception {
    org.jsoup.nodes.Document doc;
    //    	doc = Jsoup.connect("http://en.wikipedia.org/wiki/Main_Page").get();
    //    	org.jsoup.select.Elements newsHeadlines = doc.select("#mp-itn b a");
    //    	for (int i = 0; i < newsHeadlines.size(); i++) {
    //    		org.jsoup.nodes.Element headline = newsHeadlines.get(i);
    //			System.out.println(headline.text());
    //		}
    //    	doc = Jsoup.connect("http://espn.go.com/mens-college-basketball/conferences/standings/_/id/2/year/2012/acc-conference").get();
    //        for (org.jsoup.nodes.Element table : doc.select("table.tablehead")) {
    //            for (org.jsoup.nodes.Element row : table.select("tr")) {
    //            	org.jsoup.select.Elements tds = row.select("td");
    //                if (tds.size() > 6) {
    //					System.out.println(tds.get(0).text() + ":" + tds.get(1).text());
    //                }
    //            }
    //        }
    String fragment = "<div id='div1'>" + "<p id='para1'>This is the first paragraph</p>" + "<p id='para2'>Second paragraph here!" + "</div>";
    /*org.jsoup.nodes.Document */
    doc = Jsoup.parseBodyFragment(fragment);
    //    	System.out.println(doc.toString());
    doc.select("p").last().after("<p id='para3'>Third paragraph I just added</p>");
    //    	System.out.println(doc.body().children().toString());
    //    	System.out.println(doc.select("#para1").toString());
    org.jsoup.select.Elements elements = doc.select("p");
    //    	System.out.println(elements.toString());
    elements = doc.select("#para1").remove();
    //    	System.out.println(doc.body().children().toString());
    //    	System.out.println("---------------------------------");
    //    	System.out.println(elements.toString());
    //    	System.out.println(Jsoup.clean(fragment, org.jsoup.safety.Whitelist.basic()));
    org.jsoup.safety.Whitelist myWhitelist = new org.jsoup.safety.Whitelist();
    myWhitelist.addTags("div", "p");
    myWhitelist.addAttributes("div", "class");
    myWhitelist.addAttributes("p", "id");
//    	System.out.println(Jsoup.clean(fragment, myWhitelist));
//    	URL url = new URL("http://gosmarter.net?query=cars");
//    	doc = Jsoup.parse(url, 3000);
//    	Iterator<Element> productList = doc.select("div[class=productList]").iterator();
//    	assertTrue(productList.hasNext());
//    	Element product = productList.next();
//		Element productLink = product.select("a").first();
//    	String href = productLink.attr("abs:href");
//    	System.out.println(href);
}
Example 22
Project: jblog-master  File: ArticleServiceImpl.java View source code
@Override
public String saveArticle(ArticleVo vo, String type, User user, WebSite ws) throws Exception {
    boolean add = false;
    Article a = null;
    ArticleText at = null;
    Set<Lable> labes = new HashSet<Lable>();
    if (null == vo.getId() || vo.getId().isEmpty()) {
        a = new Article();
        at = new ArticleText();
        a.setArticleViewcount(0);
        a.setArticleStatus((short) 1);
        a.setArticleSort(0);
        a.setArticleLikes(0);
        a.setWebsiteid(ws.getId());
        if (null == vo.getArticlePubtime() || vo.getArticlePubtime().isEmpty()) {
            a.setArticlePubtime(TimeStampUtil.getCurrentDate());
        } else {
            a.setArticlePubtime(TimeStampUtil.convertStringToTimeStamp(vo.getArticlePubtime()));
        }
        a.setArticleUpdatetime(TimeStampUtil.getCurrentDate());
        add = true;
    } else {
        a = article_dao.getArticleByID(vo.getId());
        at = a.getArticleText();
        //如果是�布就设置更新时间
        if (null != type && type.equalsIgnoreCase("release")) {
            a.setArticleUpdatetime(TimeStampUtil.getCurrentDate());
        }
    }
    a.setArticleCover(vo.getArticleCover());
    a.setArticleAllowcomments(null == vo.getArticleAllowcomments() ? false : vo.getArticleAllowcomments());
    a.setArticleTitle(Jsoup.parse(vo.getArticleTitle()).text());
    a.setCategory(category_dao.getObj(vo.getCategory().getId()));
    a.setArticlePrivate(null == vo.getArticlePrivate() ? false : vo.getArticlePrivate());
    a.setArticleLinkurl(null == vo.getArticleLinkurl() ? "" : vo.getArticleLinkurl());
    a.setArticlePassword(vo.getArticlePassword());
    a.setArticleEditor(vo.getArticleEditor());
    if (null != vo.getArticleLableStr()) {
        String[] lables = vo.getArticleLableStr().replaceAll("\\s", "").split(",");
        Map<String, String> map = new HashMap<String, String>();
        //�端传过�的值去�
        for (String str : lables) {
            if (!str.isEmpty()) {
                map.put(str, null);
            }
        }
        if (!add) {
            //把已存在的lable放入map
            for (Lable la : a.getLables()) {
                map.put(la.getLableName(), "");
            }
        }
        for (String keyword : map.keySet()) {
            String key = keyword.trim();
            if (map.containsKey(key)) {
                if (null == map.get(key) && !key.matches("\\w{32}")) {
                    labes.add(new Lable(ws, a, key, user.getId(), TimeStampUtil.getCurrentDate()));
                }
            }
        }
    }
    at.setArticleContent(vo.getArticleContent());
    at.setArticleSummary(vo.getArticleSummary());
    a.setUser(user);
    a.setArticleMd5(CommomEncrypt.MD5(at.getArticleContent()));
    a.setLables(labes);
    if (a.getArticleEditor().equals("html")) {
        String _html = Jsoup.clean(at.getArticleContent(), Whitelist.relaxed().addAttributes("a", "target").addAttributes("tr", "class"));
        at.setArticleContent(_html);
    }
    if (add) {
        article_dao.addObj(a);
        at.setId(a.getId());
        article_dao.save(at);
    } else {
        article_dao.updateObj(a);
        article_dao.update(at);
    }
    return a.getId();
}
Example 23
Project: ORCID-Source-master  File: OrcidStringUtils.java View source code
public static String simpleHtml(String s) {
    String output = Jsoup.clean(s, "", Whitelist.simpleText(), outputSettings);
    // According to
    // http://jsoup.org/apidocs/org/jsoup/nodes/Entities.EscapeMode.html#xhtml
    // jsoup scape lt, gt, amp, apos, and quot for xhtml
    // So we want to restore them
    output = output.replace(LT, DECODED_LT);
    output = output.replace(GT, DECODED_GT);
    output = output.replace(AMP, DECODED_AMP);
    output = output.replace(APOS, DECODED_APOS);
    output = output.replace(QUOT, DECODED_QUOT);
    return output;
}
Example 24
Project: site-master  File: RegistrationService.java View source code
/**
     * Cleans some html text by stripping all tags but <code>br</code> and then
     * unescapes named entitiesl like '"e';. brs will be replaced by
     * newlines.
     *
     * @param htmlText
     * @return
     */
String htmlTextToPlainText(final String htmlText) {
    final Whitelist whitelist = Whitelist.none();
    whitelist.addTags("br");
    final Cleaner cleaner = new Cleaner(whitelist);
    final Document cleanedDocument = cleaner.clean(Jsoup.parse(htmlText));
    cleanedDocument.outputSettings().prettyPrint(false).escapeMode(EscapeMode.xhtml).charset(StandardCharsets.UTF_8);
    return Parser.unescapeEntities(cleanedDocument.body().html().trim(), true).replaceAll("<br(?: ?/)?>", "\r\n");
}
Example 25
Project: TorrentFreak-Reader-master  File: ArticleContentHttpTask.java View source code
@Override
public Map<String, String> doInBackground(final String... params) {
    Document document;
    try {
        // retrieve the document
        document = Jsoup.connect(params[0]).timeout(20000).ignoreHttpErrors(true).followRedirects(true).get();
    } catch (IOException e) {
        error = e;
        return null;
    }
    // setup the whitelist of elements and attributes to allow
    final Whitelist whitelist = Whitelist.relaxed();
    whitelist.addTags("abbr", "address", "area", "article", "aside", "embed", "footer", "header", "hr", "iframe", "label", "legend", "nav", "object", "param", "s", "section", "summary", "time", "video", "track", "wbr", "center");
    whitelist.addAttributes("a", "rel");
    whitelist.addAttributes("ul", "id");
    whitelist.addAttributes("li", "class");
    whitelist.addAttributes("img", "class");
    whitelist.addAttributes("img", "align");
    whitelist.addAttributes("span", "class");
    whitelist.addAttributes("table", "class");
    whitelist.addAttributes("p", "class");
    whitelist.addAttributes("iframe", "src", "scrolling", "width", "height", "frameborder");
    // clear the retrieved document with the whitelist
    final Cleaner cleaner = new Cleaner(whitelist);
    document = cleaner.clean(document);
    Map<String, String> values = null;
    try {
        // scrape the required values from the document using the article provider
        values = provider.scrape(document);
    } catch (Exception e) {
        error = e;
        return null;
    }
    return values;
}
Example 26
Project: XCoLab-master  File: HtmlUtil.java View source code
/**
     * Removes html from the input string, allowing only tags as indicated by the whitelist.
     * @param text the unsafe input text
     * @param whitelist a list of allowed tags
     * @param baseUri used to evaluate relative links
     * @return input text without html tags other than those on the whitelist
     */
public static String clean(String text, Whitelist whitelist, String baseUri) {
    if (StringUtils.isEmpty(text)) {
        return "";
    }
    Document doc = Jsoup.parse(text, baseUri);
    doc = new Cleaner(whitelist).clean(doc);
    // Adjust escape mode, http://stackoverflow.com/questions/8683018/jsoup-clean-without-adding-html-entities
    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
    return doc.body().html();
}
Example 27
Project: ajah-master  File: RomeUtils.java View source code
/**
	 * Convert a Rome entry to an Ajah entry.
	 * 
	 * @param syndEntry
	 *            The entry to convert.
	 * @param feed
	 *            The feed to create the entry in.
	 * @return The converted entry. Will not be null.
	 */
private static FeedEntry createEntry(final SyndEntry syndEntry, final Feed feed) {
    final FeedEntry entry = new FeedEntry();
    entry.setFeedId(feed.getId());
    entry.setFeedSourceId(feed.getFeedSourceId());
    entry.setAuthor(syndEntry.getAuthor());
    entry.setTitle(syndEntry.getTitle());
    entry.setHtmlUrl(syndEntry.getLink());
    entry.setHtmlUrlSha1(HashUtils.sha1Hex(syndEntry.getUri()));
    entry.setPublished(syndEntry.getPublishedDate());
    entry.setUpdated(syndEntry.getUpdatedDate());
    @SuppressWarnings("unchecked") final List<SyndContent> contents = syndEntry.getContents();
    if (contents.size() < 1) {
        log.finest("Contents are empty");
        entry.setContentType(AjahMimeType.TEXT_PLAIN);
    } else if (contents.size() > 1) {
        log.warning(contents.size() + " contents in one entry");
    }
    for (final SyndContent content : contents) {
        if (content.getType() == null) {
            // TODO see if it's actually html
            entry.setContentType(AjahMimeType.TEXT_PLAIN);
        } else {
            entry.setContentType(AjahMimeType.get(content.getType()));
        }
        entry.setContent(content.getValue());
        if (!entry.getContentType().isText()) {
            log.warning("Non-text type of content: " + content.getType());
        }
    }
    if (syndEntry.getDescription() != null) {
        AjahMimeType descriptionType = null;
        if (syndEntry.getDescription().getType() == null) {
            // TODO see if it's actually html
            descriptionType = AjahMimeType.TEXT_PLAIN;
        } else {
            descriptionType = AjahMimeType.get(syndEntry.getDescription().getType());
        }
        switch(descriptionType) {
            case TEXT_PLAIN:
                entry.setDescription(HtmlUtils.toBodyHtml(syndEntry.getDescription().getValue()));
                break;
            case TEXT_HTML:
                entry.setDescription(syndEntry.getDescription().getValue());
                break;
            default:
                entry.setDescription(HtmlUtils.toBodyHtml(syndEntry.getDescription().getValue()));
                log.warning("Non-text type of description: " + descriptionType + " [" + syndEntry.getDescription().getType() + "]");
        }
    }
    @SuppressWarnings("unchecked") final List<SyndCategory> categories = syndEntry.getCategories();
    if (!CollectionUtils.isEmpty(categories)) {
        final StringBuilder categoryString = new StringBuilder();
        categoryString.append("|");
        for (final SyndCategory category : categories) {
            categoryString.append(category.getName());
            categoryString.append("|");
        }
        log.fine(categoryString.toString());
        entry.setCategories(categoryString.toString());
    }
    if (StringUtils.isBlank(entry.getTitle())) {
        if (!StringUtils.isBlank(entry.getDescription())) {
            entry.setTitle(Jsoup.clean(StringUtils.truncate(entry.getDescription(), 100), Whitelist.simpleText()));
        } else if (!StringUtils.isBlank(entry.getContent())) {
            entry.setTitle(entry.getContent().substring(0, 100));
        }
    }
    if (StringUtils.isBlank(entry.getContent()) && StringUtils.isBlank(entry.getDescription()) && StringUtils.isBlank(entry.getTitle())) {
        log.warning("Title, contents and description are all null");
    }
    entry.setContentSha1(HashUtils.sha1Hex(entry.getContent() + entry.getDescription() + entry.getCategories()));
    return entry;
}
Example 28
Project: NewSumServer-master  File: Article.java View source code
/**
     * Cleans up extra whitespace from the given text
     * @param sText the Text to cleanup
     * @return the text without any extra whitespace
     */
private String cleanUp(String sText) {
    if (sText != null) {
        sText = Jsoup.clean(sText, Whitelist.none());
        sText = sText.replaceAll("«|»", "");
        sText = sText.replaceAll(""", "");
        sText = sText.replaceAll(" ", "");
        sText = sText.replaceAll(">", "");
        sText = sText.replaceAll("&[lr]aquo;", "");
        return sText;
    } else {
        return "";
    }
}
Example 29
Project: ripme-master  File: DeviantartRipper.java View source code
/**
     * Attempts to download description for image.
     * Comes in handy when people put entire stories in their description.
     * If no description was found, returns null.
     * @param url The URL the description will be retrieved from
     * @param page The gallery page the URL was found on
     * @return A String[] with first object being the description, and the second object being image file name if found.
     */
@Override
public String[] getDescription(String url, Document page) {
    if (isThisATest()) {
        return null;
    }
    try {
        // Fetch the image page
        Response resp = Http.url(url).referrer(this.url).cookies(cookies).response();
        cookies.putAll(resp.cookies());
        // Try to find the description
        Document documentz = resp.parse();
        Element ele = documentz.select("div.dev-description").first();
        if (ele == null) {
            throw new IOException("No description found");
        }
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        String fullSize = null;
        Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0);
        if (!thumb.attr("data-super-full-img").isEmpty()) {
            fullSize = thumb.attr("data-super-full-img");
            String[] split = fullSize.split("/");
            fullSize = split[split.length - 1];
        } else {
            String spanUrl = thumb.attr("href");
            fullSize = jsonToImage(page, spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
            if (fullSize != null) {
                String[] split = fullSize.split("/");
                fullSize = split[split.length - 1];
            }
        }
        if (fullSize == null) {
            return new String[] { Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)) };
        }
        fullSize = fullSize.substring(0, fullSize.lastIndexOf("."));
        return new String[] { Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)), fullSize };
    // TODO Make this not make a newline if someone just types \n into the description.
    } catch (IOException ioe) {
        logger.info("Failed to get description at " + url + ": '" + ioe.getMessage() + "'");
        return null;
    }
}
Example 30
Project: jHTML2Md-master  File: HTML2Md.java View source code
private static String parseDocument(Document dirtyDoc) {
    indentation = -1;
    String title = dirtyDoc.title();
    Whitelist whitelist = Whitelist.relaxed();
    Cleaner cleaner = new Cleaner(whitelist);
    Document doc = cleaner.clean(dirtyDoc);
    doc.outputSettings().escapeMode(EscapeMode.xhtml);
    if (!title.trim().equals("")) {
        return "# " + title + "\n\n" + getTextContent(doc);
    } else {
        return getTextContent(doc);
    }
}
Example 31
Project: mbox_tools-master  File: MessageParser.java View source code
/**
     * Parse given Message into Mail.
     *
     * @param message
     * @param idsuffix  This value gets appended to the message-id.
     * @return
     */
public static Mail parse(Message message, /*Map<String, String> data,*/
String idsuffix) throws MessageParseException {
    String author_name = null;
    String author_email = null;
    String[] to = null;
    String subject_original = null;
    String subject = null;
    String date = null;
    String message_id_original = null;
    String message_id = null;
    String[] references = null;
    String in_reply_to = null;
    String message_snippet = null;
    String first_text_message = null;
    String first_text_message_without_quotes = null;
    String first_html_message = null;
    String[] text_messages = null;
    Integer text_messages_cnt = null;
    String[] html_messages = null;
    Integer html_messages_cnt = null;
    MailAttachment[] message_attachments = null;
    Integer message_attachments_cnt = null;
    Map<String, Field> headers = getMessageHeaders(message);
    boolean messageIdPresent = false;
    for (String fieldName : headers.keySet()) {
        Field f = headers.get(fieldName);
        switch(MessageHeader.getValue(f.getName())) {
            case FROM:
                Author author = extractValue((MailboxListField) f);
                author_name = author.name;
                author_email = author.email;
                break;
            case TO:
                List<String> tos = new ArrayList<>();
                for (String recipient : extractValue((AddressListField) f)) {
                    tos.add(recipient);
                }
                to = tos.toArray(new String[tos.size()]);
                break;
            case SUBJECT:
                subject_original = extractValue((UnstructuredField) f);
                subject = normalizeSubject(subject_original);
                break;
            case DATE:
                Date d = extractValue((DateTimeField) f);
                if (d != null) {
                    date = defaultDatePrinter.print(d.getTime());
                } else {
                    String mid = headers.get(MessageHeader.MESSAGE_ID.toString()).getBody();
                    log.warn("Unable to parse header field '{}' for message-id: '{}'", f, mid);
                    throw new MessageParseException("Unable to parsed a date field. Skipping message [" + mid + "]");
                }
                break;
            case MESSAGE_ID:
                String id = extractValue((UnstructuredField) f);
                if (isNullOrEmpty(id)) {
                    throw new MessageParseException("Message-ID header is null or empty.");
                }
                message_id_original = id;
                message_id = id;
                if (!isNullOrEmpty(idsuffix)) {
                    message_id += idsuffix;
                }
                messageIdPresent = true;
                break;
            case REFERENCES:
                List<String> _references = new ArrayList<>();
                for (String value : extractValue((UnstructuredField) f).trim().split("\\s+")) {
                    _references.add(value);
                }
                references = _references.toArray(new String[_references.size()]);
                break;
            case IN_REPLY_TO:
                in_reply_to = extractValue((UnstructuredField) f);
                break;
        }
    }
    if (!messageIdPresent)
        throw new MessageParseException("Message-ID header not found.");
    MessageBodyParser.MailBodyContent content;
    try {
        content = MessageBodyParser.parse(message);
    } catch (IOException e) {
        throw new MessageParseException(e);
    }
    String snippet = "";
    if (content.getFirstTextContentWithoutQuotes() != null) {
        snippet = content.getFirstTextContentWithoutQuotes();
    } else if (content.getFirstTextContent() != null) {
        snippet = content.getFirstTextContent();
    } else if (content.getFirstHtmlContent() != null) {
        snippet = Jsoup.parse(Jsoup.clean(content.getFirstHtmlContent(), Whitelist.relaxed())).text();
    } else {
    // TODO get text snippet from other fields
    }
    snippet = // index can be -1 if length = 0 !!!
    snippet.substring(0, (snippet.length() > 250 ? 250 : (snippet.length() > 0 ? snippet.length() - 1 : 0))).replaceAll(//                .replaceAll(">*", "")
    "^>From", "From").replaceAll("\\s+", " ").trim();
    message_snippet = snippet;
    first_text_message = content.getFirstTextContent();
    first_text_message_without_quotes = content.getFirstTextContentWithoutQuotes();
    first_html_message = content.getFirstHtmlContent();
    List<String> testMessages = new ArrayList<>();
    for (String part : content.getTextMessages()) {
        testMessages.add(part);
    }
    text_messages = testMessages.toArray(new String[testMessages.size()]);
    text_messages_cnt = content.getTextMessages().size();
    List<String> htmlMessages = new ArrayList<>();
    for (String part : content.getHtmlMessages()) {
        htmlMessages.add(part);
    }
    html_messages = htmlMessages.toArray(new String[htmlMessages.size()]);
    html_messages_cnt = content.getHtmlMessages().size();
    if (content.getAttachments().size() > 0) {
        message_attachments_cnt = content.getAttachments().size();
        message_attachments = content.getAttachments().toArray(new MailAttachment[message_attachments_cnt]);
    } else {
        message_attachments_cnt = 0;
    }
    return new Mail(message_id, message_id_original, to, subject_original, subject, author_name, author_email, date, in_reply_to, references, message_snippet, first_text_message, first_text_message_without_quotes, first_html_message, text_messages, text_messages_cnt, html_messages, html_messages_cnt, message_attachments, message_attachments_cnt);
}
Example 32
Project: solo-master  File: CommentMgmtService.java View source code
/**
     * Checks the specified comment adding request.
     *
     * <p>
     * XSS process (name, content) in this method.
     * </p>
     *
     * @param requestJSONObject the specified comment adding request, for example,      <pre>
     * {
     *     "type": "", // "article"/"page"
     *     "oId": "",
     *     "commentName": "",
     *     "commentEmail": "",
     *     "commentURL": "",
     *     "commentContent": "",
     * }
     * </pre>
     *
     * @return check result, for example,      <pre>
     * {
     *     "sc": boolean,
     *     "msg": "" // Exists if "sc" equals to false
     * }
     * </pre>
     */
public JSONObject checkAddCommentRequest(final JSONObject requestJSONObject) {
    final JSONObject ret = new JSONObject();
    try {
        ret.put(Keys.STATUS_CODE, false);
        final JSONObject preference = preferenceQueryService.getPreference();
        if (null == preference || !preference.optBoolean(Option.ID_C_COMMENTABLE)) {
            ret.put(Keys.MSG, langPropsService.get("notAllowCommentLabel"));
            return ret;
        }
        final String id = requestJSONObject.optString(Keys.OBJECT_ID);
        final String type = requestJSONObject.optString(Common.TYPE);
        if (Article.ARTICLE.equals(type)) {
            final JSONObject article = articleRepository.get(id);
            if (null == article || !article.optBoolean(Article.ARTICLE_COMMENTABLE)) {
                ret.put(Keys.MSG, langPropsService.get("notAllowCommentLabel"));
                return ret;
            }
        } else {
            final JSONObject page = pageRepository.get(id);
            if (null == page || !page.optBoolean(Page.PAGE_COMMENTABLE)) {
                ret.put(Keys.MSG, langPropsService.get("notAllowCommentLabel"));
                return ret;
            }
        }
        String commentName = requestJSONObject.getString(Comment.COMMENT_NAME);
        if (MAX_COMMENT_NAME_LENGTH < commentName.length() || MIN_COMMENT_NAME_LENGTH > commentName.length()) {
            LOGGER.log(Level.WARN, "Comment name is too long[{0}]", commentName);
            ret.put(Keys.MSG, langPropsService.get("nameTooLongLabel"));
            return ret;
        }
        final String commentEmail = requestJSONObject.getString(Comment.COMMENT_EMAIL).trim().toLowerCase();
        if (!Strings.isEmail(commentEmail)) {
            LOGGER.log(Level.WARN, "Comment email is invalid[{0}]", commentEmail);
            ret.put(Keys.MSG, langPropsService.get("mailInvalidLabel"));
            return ret;
        }
        final String commentURL = requestJSONObject.optString(Comment.COMMENT_URL);
        if (!Strings.isURL(commentURL) || StringUtils.contains(commentURL, "<")) {
            LOGGER.log(Level.WARN, "Comment URL is invalid[{0}]", commentURL);
            ret.put(Keys.MSG, langPropsService.get("urlInvalidLabel"));
            return ret;
        }
        String commentContent = requestJSONObject.optString(Comment.COMMENT_CONTENT);
        if (MAX_COMMENT_CONTENT_LENGTH < commentContent.length() || MIN_COMMENT_CONTENT_LENGTH > commentContent.length()) {
            LOGGER.log(Level.WARN, "Comment conent length is invalid[{0}]", commentContent.length());
            ret.put(Keys.MSG, langPropsService.get("commentContentCannotEmptyLabel"));
            return ret;
        }
        ret.put(Keys.STATUS_CODE, true);
        // name XSS process
        commentName = Jsoup.clean(commentName, Whitelist.none());
        requestJSONObject.put(Comment.COMMENT_NAME, commentName);
        // content Markdown & XSS process 
        commentContent = Markdowns.toHTML(commentContent);
        commentContent = Jsoup.clean(commentContent, Whitelist.relaxed());
        // Emoji
        commentContent = Emotions.toAliases(commentContent);
        requestJSONObject.put(Comment.COMMENT_CONTENT, commentContent);
        return ret;
    } catch (final Exception e) {
        LOGGER.log(Level.WARN, "Checks add comment request[" + requestJSONObject.toString() + "] failed", e);
        ret.put(Keys.STATUS_CODE, false);
        ret.put(Keys.MSG, langPropsService.get("addFailLabel"));
        return ret;
    }
}
Example 33
Project: commafeed-master  File: FeedUtils.java View source code
private static synchronized Whitelist buildWhiteList() {
    Whitelist whitelist = new Whitelist();
    whitelist.addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "iframe", "img", "li", "ol", "p", "pre", "q", "small", "strike", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul");
    whitelist.addAttributes("div", "dir");
    whitelist.addAttributes("pre", "dir");
    whitelist.addAttributes("code", "dir");
    whitelist.addAttributes("table", "dir");
    whitelist.addAttributes("p", "dir");
    whitelist.addAttributes("a", "href", "title");
    whitelist.addAttributes("blockquote", "cite");
    whitelist.addAttributes("col", "span", "width");
    whitelist.addAttributes("colgroup", "span", "width");
    whitelist.addAttributes("iframe", "src", "height", "width", "allowfullscreen", "frameborder", "style");
    whitelist.addAttributes("img", "align", "alt", "height", "src", "title", "width", "style");
    whitelist.addAttributes("ol", "start", "type");
    whitelist.addAttributes("q", "cite");
    whitelist.addAttributes("table", "border", "bordercolor", "summary", "width");
    whitelist.addAttributes("td", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "width");
    whitelist.addAttributes("th", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "scope", "width");
    whitelist.addAttributes("ul", "type");
    whitelist.addProtocols("a", "href", "ftp", "http", "https", "magnet", "mailto");
    whitelist.addProtocols("blockquote", "cite", "http", "https");
    whitelist.addProtocols("img", "src", "http", "https");
    whitelist.addProtocols("q", "cite", "http", "https");
    whitelist.addEnforcedAttribute("a", "target", "_blank");
    whitelist.addEnforcedAttribute("a", "rel", "noreferrer");
    return whitelist;
}
Example 34
Project: enzymeportal-master  File: SearchController.java View source code
/**
     * Processes the search request. When user enters a search text and presses
     * the submit button the request is processed here.
     *
     * @param searchModel
     * @param model
     * @param searchTerm
     * @param session
     * @param searchId
     * @param keywordType
     * @param ec
     * @param request
     * @param response
     * @return
     */
@RequestMapping(value = "/search", method = RequestMethod.POST)
public String postSearchResult(SearchModel searchModel, Model model, @RequestParam(required = false, value = "searchTerm") String searchTerm, @RequestParam(required = false, value = "ec") String ec, @RequestParam(required = false, value = "searchId") String searchId, @RequestParam(required = false, value = "keywordType") String keywordType, HttpSession session, HttpServletRequest request, HttpServletResponse response) {
    String view = "error";
    String searchKey = null;
    SearchResults results = null;
    response.setHeader("Access-Control-Allow-Origin", "*");
    response.setHeader("Access-Control-Allow-Methods", "POST, GET, OPTIONS, DELETE");
    response.setHeader("Access-Control-Max-Age", "3600");
    response.setHeader("Access-Control-Allow-Headers", "x-requested-with");
    try {
        // See if it is already there, perhaps we are paginating:
        Map<String, SearchResults> prevSearches = getPreviousSearches(session.getServletContext());
        String modelSearchKey = getSearchKey(searchModel.getSearchparams());
        searchKey = Jsoup.clean(modelSearchKey, Whitelist.basic());
        results = prevSearches.get(searchKey);
        if (results == null) {
            // New search:
            clearHistory(session);
            switch(searchModel.getSearchparams().getType()) {
                case KEYWORD:
                    //results = searchKeyword(searchModel.getSearchparams());
                    results = searchKeyword(ec, searchTerm, searchId, keywordType, ASSOCIATED_PROTEIN_LIMIT);
                    model.addAttribute(SEARCH_VIDEO, SEARCH_VIDEO);
                    //LOGGER.warn("keyword search=" + searchModel.getSearchparams().getText());
                    break;
                case SEQUENCE:
                    //view = searchSequence(model, searchModel);
                    model.addAttribute(SEQUENCE_VIDEO, SEQUENCE_VIDEO);
                    break;
                case COMPOUND:
                    results = searchCompound(model, searchModel);
                    break;
                default:
            }
        }
        if (results != null) {
            // something to show
            cacheSearch(session.getServletContext(), searchKey, results);
            setLastSummaries(session, results.getSummaryentries());
            searchModel.setSearchresults(results);
            applyFilters(searchModel, request);
            model.addAttribute("searchConfig", searchConfig);
            model.addAttribute("searchModel", searchModel);
            model.addAttribute("pagination", getPagination(searchModel));
            request.setAttribute("searchTerm", searchModel.getSearchparams().getText());
            clearHistory(session);
            addToHistory(session, searchModel.getSearchparams().getType(), searchKey, searchId, keywordType);
            view = "search";
        }
    } catch (Exception e) {
        logger.error("one of the search params (Text or Sequence is :" + searchKey, e);
    }
    return view;
}
Example 35
Project: jw-community-master  File: StringUtil.java View source code
/**
     * Removed all HTML tags not in the allowed map from the content
     * @param content
     * @param allowedTag
     * @return 
     */
public static String stripHtmlTag(String content, String[] allowedTag) {
    if (content != null && !content.isEmpty()) {
        Whitelist whitelist = Whitelist.none().addAttributes(":all", "style", "class", "title", "id", "src", "href", "target");
        for (String tag : allowedTag) {
            whitelist.addTags(tag);
        }
        java.lang.reflect.Field field = ReflectionUtils.findField(whitelist.getClass(), "protocols");
        ReflectionUtils.makeAccessible(field);
        ReflectionUtils.setField(field, whitelist, new HashMap());
        content = Jsoup.clean(content, whitelist);
    }
    return content;
}
Example 36
Project: muikku-master  File: WorkspaceForumRESTService.java View source code
@POST
@Path("/workspaces/{WORKSPACEENTITYID}/forumAreas/{AREAID}/threads")
@RESTPermit(handling = Handling.INLINE)
public Response createThread(@PathParam("WORKSPACEENTITYID") Long workspaceEntityId, @PathParam("AREAID") Long areaId, ForumThreadRESTModel newThread) {
    WorkspaceEntity workspaceEntity = workspaceEntityController.findWorkspaceEntityById(workspaceEntityId);
    if (workspaceEntity == null) {
        return Response.status(Status.NOT_FOUND).entity(String.format("Workspace entity %d not found", workspaceEntityId)).build();
    }
    ForumArea forumArea = forumController.getForumArea(areaId);
    if (forumArea == null) {
        return Response.status(Status.NOT_FOUND).entity("Forum area not found").build();
    }
    if (!(forumArea instanceof WorkspaceForumArea)) {
        logger.severe(String.format("Trying to access forum %d via incorrect REST endpoint", forumArea.getId()));
        return Response.status(Status.NOT_FOUND).build();
    }
    if (!workspaceEntity.getId().equals(((WorkspaceForumArea) forumArea).getWorkspace())) {
        return Response.status(Status.NOT_FOUND).entity(String.format("WorkspaceForumArea %d does not belong to workspace entity %d", forumArea.getId(), workspaceEntity.getId())).build();
    }
    if (sessionController.hasWorkspacePermission(ForumResourcePermissionCollection.FORUM_WRITE_WORKSPACE_MESSAGES, workspaceEntity)) {
        if (Boolean.TRUE.equals(newThread.getSticky()) || Boolean.TRUE.equals(newThread.getLocked())) {
            if (!sessionController.hasWorkspacePermission(ForumResourcePermissionCollection.FORUM_LOCK_OR_STICKIFY_WORKSPACE_MESSAGES, workspaceEntity))
                return Response.status(Status.BAD_REQUEST).build();
        }
        Document message = Jsoup.parse(Jsoup.clean(newThread.getMessage(), Whitelist.relaxed().addAttributes("a", "target")));
        message.outputSettings().escapeMode(EscapeMode.xhtml);
        message.select("a[target]").attr("rel", "noopener noreferer");
        ForumThread thread = forumController.createForumThread(forumArea, newThread.getTitle(), message.body().toString(), newThread.getSticky(), newThread.getLocked());
        ForumThreadRESTModel result = new ForumThreadRESTModel(thread.getId(), thread.getTitle(), thread.getMessage(), thread.getCreator(), thread.getCreated(), thread.getForumArea().getId(), thread.getSticky(), thread.getLocked(), thread.getUpdated(), 1l, thread.getLastModified());
        return Response.ok(result).build();
    } else {
        return Response.status(Status.FORBIDDEN).build();
    }
}
Example 37
Project: streamflow-core-master  File: CreateCaseFromEmailService.java View source code
public void receivedEmail(ApplicationEvent event, EmailValue email) {
    UnitOfWork uow = module.unitOfWorkFactory().newUnitOfWork(UsecaseBuilder.newUsecase("Create case from email"));
    try {
        String references = email.headers().get().get("References");
        if (!hasStreamflowReference(references)) {
            Organizations.Data organizations = uow.get(Organizations.Data.class, OrganizationsEntity.ORGANIZATIONS_ID);
            Organization organization = organizations.organization().get();
            EmailAccessPoint ap = null;
            try {
                ap = organization.getEmailAccessPoint(email.to().get());
            } catch (IllegalArgumentException e) {
                ValueBuilder<EmailValue> builder = module.valueBuilderFactory().newValueBuilder(EmailValue.class).withPrototype(email);
                String subj = "Unknown accesspoint: " + builder.prototype().to().get() + " - " + builder.prototype().subject().get();
                builder.prototype().subject().set(subj.length() > 50 ? subj.substring(0, 50) : subj);
                systemDefaults.createCaseOnEmailFailure(builder.newInstance());
                uow.discard();
                return;
            }
            if (ap != null && hasAutoReplyHeader(email.headers().get())) {
                // Possible mail loop - auto reply header present but no References - create support case.
                ValueBuilder<EmailValue> builder = module.valueBuilderFactory().newValueBuilder(EmailValue.class).withPrototype(email);
                String subj = "Possible Mail Loop: " + builder.prototype().to().get() + " - " + builder.prototype().subject().get();
                builder.prototype().subject().set(subj.length() > 50 ? subj.substring(0, 50) : subj);
                systemDefaults.createCaseOnEmailFailure(builder.newInstance());
                uow.discard();
                return;
            }
            Drafts user = systemDefaults.getUser(email);
            ConversationParticipant participant = (ConversationParticipant) user;
            RoleMap.newCurrentRoleMap();
            RoleMap.current().set(organization);
            RoleMap.current().set(ap);
            RoleMap.current().set(user);
            CaseEntity caze = ap.createCase(user);
            RoleMap.current().set(caze);
            caze.caselog().get().addTypedEntry("{accesspoint,description=" + ap.getDescription() + "}", CaseLogEntryTypes.system);
            // STREAMFLOW-714
            String subject = email.subject().get();
            caze.changeDescription(subject.length() > 50 ? subject.substring(0, 50) : subject);
            if (Translator.HTML.equalsIgnoreCase(email.contentType().get())) {
                caze.addNote(Jsoup.clean(email.content().get(), Whitelist.basic()), Translator.HTML);
            //caze.addNote( Translator.cleanHtml( email.content().get() ), Translator.HTML );
            } else {
                caze.addNote(email.content().get(), Translator.PLAIN);
            }
            // Create conversation
            Conversation conversation = caze.createConversation(email.subject().get(), (Creator) user);
            Message message = null;
            if (Translator.HTML.equalsIgnoreCase(email.contentType().get())) {
                message = conversation.createMessage(Translator.cleanHtml(email.content().get()), MessageType.HTML, participant);
            } else {
                message = conversation.createMessage(email.content().get(), MessageType.PLAIN, participant);
            }
            // Create attachments
            for (AttachedFileValue attachedFileValue : email.attachments().get()) {
                if (attachedFileValue.mimeType().get().contains("text/x-vcard") || attachedFileValue.mimeType().get().contains("text/directory")) {
                    addVCardAsContact((Contactable.Data) user, attachedFileValue);
                } else {
                    Attachment attachment = conversation.createAttachment(attachedFileValue.uri().get());
                    attachment.changeName(attachedFileValue.name().get());
                    attachment.changeMimeType(attachedFileValue.mimeType().get());
                    attachment.changeModificationDate(attachedFileValue.modificationDate().get());
                    attachment.changeSize(attachedFileValue.size().get());
                    attachment.changeUri(attachedFileValue.uri().get());
                    message.addAttachment(attachment);
                    // remove attachment from conversation attachments data so AttachmentEntity does not get
                    // removed for real - we just moved it to message attachments where it actually belongs after
                    // message creation.
                    ((Attachments.Data) conversation).attachments().remove(attachment);
                }
            }
            // Add contact info
            caze.updateContact(0, ((Contactable.Data) user).contact().get());
            // Open the case
            ap.sendTo(caze);
        }
        System.out.println("CreateCaseFromEmailService before uow complete");
        uow.complete();
        System.out.println("CreateCaseFromEmailService after uow complete");
    } catch (Exception ex) {
        ValueBuilder<EmailValue> builder = module.valueBuilderFactory().newValueBuilder(EmailValue.class).withPrototype(email);
        String subj = "General error: " + builder.prototype().to().get() + " - " + builder.prototype().subject().get();
        builder.prototype().subject().set(subj.length() > 50 ? subj.substring(0, 50) : subj);
        systemDefaults.createCaseOnEmailFailure(builder.newInstance());
        uow.discard();
        throw new ApplicationEventReplayException(event, ex);
    } finally {
        RoleMap.clearCurrentRoleMap();
    }
}
Example 38
Project: jeboorker-master  File: EbookTableCellRenderer.java View source code
/**
	 * Remove all html tags an decode entities.
	 * @param toClean The text to be cleaned.
	 * @return
	 */
private String cleanString(String toClean) {
    if (toClean == null) {
        return EMPTY;
    }
    if (toClean.indexOf('&') != -1) {
        toClean = new HTMLEntityConverter(toClean, HTMLEntityConverter.ENCODE_EIGHT_BIT_ASCII).decodeEntities();
    }
    if (toClean.indexOf('<') != -1) {
        toClean = Jsoup.clean(toClean, Whitelist.none());
    }
    return toClean.trim();
}
Example 39
Project: TGFC-master  File: NetworkUtils.java View source code
public static void contentListParser(String html, ContentListPageData pageData, int tid, String referer) {
    List<ContentListItemData> dataList = pageData.dataList;
    Document htmlDoc = Jsoup.parse(html, referer);
    Elements lightMessageEmelents = htmlDoc.select(".lightmessage");
    lightMessageEmelents.remove();
    Elements messageElements = htmlDoc.select(".message");
    Elements infobarElements = htmlDoc.select(".infobar");
    int messageStart = 0;
    Pattern mainPostPattern = Pattern.compile("标题:<b>(.+)<\\/b><br \\/>时间:(.+)<br \\/>作者:<a href=\".+uid=(\\d+).*\">(?:<b>)?(.+?)(<\\/b>)?<\\/a>");
    Matcher mainPostMatcher = mainPostPattern.matcher(html);
    Pattern urlReplacePattern = Pattern.compile("<a\\s*.*?href=\"(.*?)\"\\s*.*?>.*?\\s\\.\\.\\.\\s.*?<\\/a>");
    if (mainPostMatcher.find()) {
        messageStart++;
        ContentListItemData itemData = new ContentListItemData();
        itemData.floorNum = 1;
        itemData.posterTime = mainPostMatcher.group(2);
        itemData.posterUID = Integer.parseInt(mainPostMatcher.group(3));
        itemData.posterName = mainPostMatcher.group(4);
        itemData.canEdit = mainPostMatcher.group(5) != null;
        Pattern ratingPattern = Pattern.compile("评分记录\\(.+?=(\\d+)\\)");
        Matcher ratingMatcher = ratingPattern.matcher(html);
        if (ratingMatcher.find()) {
            itemData.ratings = Integer.parseInt(ratingMatcher.group(1));
        }
        Pattern pidPattern = Pattern.compile("作者:<a href=\".*?pid=(\\d+)[^\\>]*?>");
        Matcher pidMatcher = pidPattern.matcher(html);
        if (pidMatcher.find()) {
            itemData.pid = Integer.parseInt(pidMatcher.group(1));
        }
        itemData.mainText = messageElements.get(0).html();
        extractPlatform(itemData);
        dataList.add(itemData);
    }
    for (int i = messageStart, j = 0; i < messageElements.size(); i++, j++) {
        ContentListItemData itemData = new ContentListItemData();
        Element msgElement = messageElements.get(i);
        Element barElement = infobarElements.get(j);
        String infoString = StringEscapeUtils.unescapeHtml(barElement.html());
        Pattern barPattern = Pattern.compile("<a href=\".*?pid=(\\d+).*?>#(\\d+)[\\s\\S]*?<a href=\".*?uid=(\\d+).*?>(?:<b>)?(.+?)(?:<\\/b>)?<\\/a>[\\s\\S]*?(?:骚\\((\\d+)\\)[\\s\\S]*?)?<span class=\"nf\">(?:<font \\S*>)? (?:<b>)?(.*?)(<\\/b>)?(?:<\\/font>)?<\\/span>");
        Matcher barMatcher = barPattern.matcher(infoString);
        if (barMatcher.find()) {
            itemData.pid = Integer.parseInt(barMatcher.group(1));
            itemData.floorNum = Integer.parseInt(barMatcher.group(2));
            itemData.posterUID = Integer.parseInt(barMatcher.group(3));
            itemData.posterName = barMatcher.group(4);
            if (barMatcher.group(5) != null) {
                itemData.ratings = Integer.parseInt(barMatcher.group(5));
            }
            itemData.posterTime = barMatcher.group(6);
            itemData.canEdit = barMatcher.group(7) != null;
        }
        Elements quotedElements = msgElement.select(".quote-bd");
        if (quotedElements.size() > 0) {
            String quoteString = quotedElements.get(0).html();
            String divider = "<br>";
            int t = quoteString.indexOf(divider);
            if (t != -1) {
                itemData.quotedInfo = quoteString.substring(0, t);
                itemData.quotedInfo = cleanQuote(itemData.quotedInfo);
                itemData.quotedText = quoteString.substring(t + divider.length());
                itemData.quotedText = getPlainText(cleanText(itemData.quotedText)).trim();
            }
            msgElement.select(".ui-topic-content").remove();
        }
        itemData.mainText = msgElement.html();
        extractPlatform(itemData);
        dataList.add(itemData);
    }
    Pattern imgURLPattern = Pattern.compile("<img\\s*[^>]*?src\\s*=\\s*['\\\"]([^'\\\"]*?)['\\\"][^>]*?\\s*\\/?>");
    List<String> imgURLList = new ArrayList<String>();
    for (int i = 0; i < dataList.size(); i++) {
        ContentListItemData itemData = dataList.get(i);
        //			itemData.mainText = Jsoup.clean(itemData.mainText, Whitelist.basicWithImages());
        Matcher urlReplaceMatcher = urlReplacePattern.matcher(itemData.mainText);
        //			Log.w("Matcher", String.valueOf(urlReplaceMatcher.find()));
        itemData.mainText = urlReplaceMatcher.replaceAll("<a href=\"$1\">$1</a>");
    }
    for (int i = 0; i < dataList.size(); i++) {
        String itemHTML = dataList.get(i).mainText;
        Matcher imgURLMatcher = imgURLPattern.matcher(itemHTML);
        while (imgURLMatcher.find()) {
            imgURLList.add(imgURLMatcher.group(1));
        }
    }
    pageData.imgURLList = imgURLList;
}
Example 40
Project: tika-wrapper-master  File: TikaWrapper.java View source code
private void processWithPdfToText(InputStream input) {
    File tempFile = null;
    File tempFile2 = null;
    try {
        if (input != null && pdfToTextPath != null && !"".equals(pdfToTextPath)) {
            // Get a local copy of the file
            tempFile = createTempFile("tmp", ".pdf", tmpPath);
            if (!writeToFile(tempFile, input))
                return;
            meta2 = new HashMap<String, String>();
            meta2.put(META_CONTENTSIZE, String.valueOf(tempFile.length()));
            tempFile2 = createTempFile("tmp", ".html", tmpPath);
            Shell sh = new Shell();
            // Convert with PDFTOTEXT - pdftotext -enc UTF-8 -raw -q -htmlmeta -eol unix in.pdf out.html
            sh.exec(pdfToTextPath, "-enc", "UTF-8", "-raw", "-q", "-htmlmeta", "-eol", "unix", tempFile.getAbsolutePath(), tempFile2.getAbsolutePath()).consumeAsString();
            tempFile.delete();
            // Load in string and add the <meta http-equiv='Content-Type' content='text/html; charset=utf-8'> line
            InputStreamReader fr1 = new InputStreamReader(new FileInputStream(tempFile2), "UTF-8");
            BufferedReader br1 = new BufferedReader(fr1);
            StringBuilder sb = new StringBuilder();
            while (br1.ready()) {
                String line = br1.readLine();
                sb.append(line).append("\n");
                if ("</head>".equals(line)) {
                    sb.append("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>").append("\n");
                }
            }
            br1.close();
            tempFile2.delete();
            meta2.put(META_CONTENTTYPE, CONTENT_TYPE_PDF);
            text = sb.toString();
            Document doc = Jsoup.parse(text);
            if (doc != null) {
                meta2.put(META_TITLE, doc.select("title").text());
                meta2.put(META_AUTHOR, getMetaContent(doc, "Author"));
                String creationDate = getMetaContent(doc, "CreationDate");
                if (creationDate != null) {
                    // 20130322143113Z00'00' -> 2013-03-22T14:31:13Z
                    Pattern p = Pattern.compile("[0-9]{14}Z[0-9]{2}'[0-9]{2}'");
                    Matcher m = p.matcher(creationDate);
                    if (m.find()) {
                        String value = String.format("%1$s-%2$s-%3$sT%4$s:%5$s:%6$sZ", creationDate.substring(0, 4), creationDate.substring(4, 6), creationDate.substring(6, 8), creationDate.substring(8, 10), creationDate.substring(10, 12), creationDate.substring(12, 14));
                        meta2.put(META_CREATED, value);
                    } else {
                        // 20130322143113+02'00' -> 2013-03-22T14:31:13Z
                        p = Pattern.compile("[0-9]{14}\\+[0-9]{2}'[0-9]{2}'");
                        m = p.matcher(creationDate);
                        if (m.find()) {
                            String value = String.format("%1$s-%2$s-%3$sT%4$s:%5$s:%6$sZ", creationDate.substring(0, 4), creationDate.substring(4, 6), creationDate.substring(6, 8), creationDate.substring(8, 10), creationDate.substring(10, 12), creationDate.substring(12, 14));
                            meta2.put(META_CREATED, value);
                        }
                    }
                }
                if (OUTPUT_FORMAT_TEXT.equals(outputFormat)) {
                    Document doc2 = new Cleaner(Whitelist.basic()).clean(doc);
                    text = doc2.body().text();
                }
            }
        }
    } catch (Exception e) {
        if (tempFile != null && tempFile.exists())
            tempFile.delete();
        if (tempFile2 != null && tempFile2.exists())
            tempFile2.delete();
        e.printStackTrace();
        text = null;
        meta2 = null;
    }
}
Example 41
Project: ttr-master  File: ArticleFragment.java View source code
@SuppressLint({ "SetJavaScriptEnabled", "AddJavascriptInterface" })
private void doRefresh() {
    if (webView == null)
        return;
    try {
        ProgressBarManager.getInstance().addProgress((MenuActivity) getActivity());
        if (Controller.getInstance().workOffline() || !Controller.getInstance().loadMedia()) {
            webView.getSettings().setCacheMode(WebSettings.LOAD_CACHE_ONLY);
        } else {
            webView.getSettings().setCacheMode(WebSettings.LOAD_DEFAULT);
        }
        if (!Controller.getInstance().loadMedia() && Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
            webView.getSettings().setMediaPlaybackRequiresUserGesture(false);
        // No need to reload everything
        if (webviewInitialized)
            return;
        // Check for errors
        if (Controller.getInstance().getConnector().hasLastError()) {
            Intent i = new Intent(getActivity(), ErrorActivity.class);
            i.putExtra(ErrorActivity.ERROR_MESSAGE, Controller.getInstance().getConnector().pullLastError());
            startActivityForResult(i, ErrorActivity.ACTIVITY_SHOW_ERROR);
            return;
        }
        StringBuilder labels = new StringBuilder();
        for (Label label : article.labels) {
            if (label.checked) {
                if (labels.length() > 0)
                    labels.append(", ");
                String labelString = label.caption;
                if (label.foregroundColor != null && label.backgroundColor != null)
                    labelString = String.format(LABEL_COLOR_STRING, label.foregroundColor, label.backgroundColor, label.caption);
                labels.append(labelString);
            }
        }
        // Remove all html tags and content that doesn't meet this set of allowed stuff
        final String contentClean;
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
            contentClean = article.content;
        else
            contentClean = Jsoup.clean(article.content, Whitelist.relaxed());
        // Load html from Controller and insert content// Article-Prefetch-Stuff from Raw-Ressources and System
        ST htmlTmpl = new ST(getString(R.string.HTML_TEMPLATE), '$', '$');
        // Styles
        if (Controller.getInstance().allowHyphenation()) {
            ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
            javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
            htmlTmpl.add("HYPHENATION", javascriptST.render());
        }
        // Replace alignment-marker: align:left or align:justify
        ST stylesST = new ST(getString(R.string.STYLE_TEMPLATE), '$', '$');
        if (Controller.getInstance().alignFlushLeft()) {
            stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_LEFT));
        } else {
            stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_JUSTIFY));
        }
        htmlTmpl.add("STYLE", stylesST.render());
        // General values
        htmlTmpl.add("THEME", getResources().getString(Controller.getInstance().getThemeHTML()));
        htmlTmpl.add("CACHE_DIR", Controller.getInstance().cacheFolder());
        htmlTmpl.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
        // Special values for this article
        htmlTmpl.add("article", article);
        htmlTmpl.add("feed", feed);
        htmlTmpl.add("CACHED_IMAGES", cachedImages);
        htmlTmpl.add("LABELS", labels.toString());
        htmlTmpl.add("UPDATED", DateUtils.getDateTimeCustom(getActivity(), article.updated));
        htmlTmpl.add("ATTACHMENTS", getAttachmentsMarkup(article.attachments));
        htmlTmpl.add("CONTENT", contentClean);
        // Hyphenation Javascript
        if (Controller.getInstance().allowHyphenation()) {
            ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
            javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
            htmlTmpl.add("HYPHENATION", javascriptST.render());
        }
        // Navigation buttons
        if (Controller.getInstance().showButtonsMode() == Constants.SHOW_BUTTONS_MODE_HTML) {
            htmlTmpl.add("NAVIGATION", getString(R.string.BOTTOM_NAVIGATION_TEMPLATE));
        }
        // Note of the article
        if (article.note != null && article.note.length() > 0) {
            ST noteST = new ST(getResources().getString(R.string.NOTE_TEMPLATE), '$', '$');
            noteST.add("NOTE", getResources().getString(R.string.Commons_HtmlPrefixNote) + " " + article.note);
            htmlTmpl.add("NOTE_TEMPLATE", noteST.render());
        }
        content = htmlTmpl.render();
        /* JavaScript should be safe since we use JSoup to remove all unwanted stuff from article.content */
        webView.getSettings().setJavaScriptEnabled(true);
        webView.addJavascriptInterface(articleJSInterface, "articleController");
        webView.loadDataWithBaseURL("file:///android_asset/", content, "text/html", "utf-8", null);
        if (!linkAutoOpened && article.content.length() < 3) {
            if (Controller.getInstance().openUrlEmptyArticle()) {
                Log.i(TAG, "Article-Content is empty, opening URL in browser");
                linkAutoOpened = true;
                openLink();
            }
        }
        // Everything did load, we dont have to do this again.
        webviewInitialized = true;
    } catch (Exception e) {
        Log.w(TAG, e.getClass().getSimpleName() + " in doRefresh(): " + e.getMessage() + " (" + e.getCause() + ")", e);
    } finally {
        ProgressBarManager.getInstance().removeProgress((MenuActivity) getActivity());
    }
}
Example 42
Project: ttrss-reader-fork-master  File: ArticleFragment.java View source code
@SuppressLint({ "SetJavaScriptEnabled", "AddJavascriptInterface" })
private void doRefresh() {
    if (webView == null)
        return;
    try {
        ProgressBarManager.getInstance().addProgress((MenuActivity) getActivity());
        if (Controller.getInstance().workOffline() || !Controller.getInstance().loadMedia()) {
            webView.getSettings().setCacheMode(WebSettings.LOAD_CACHE_ONLY);
        } else {
            webView.getSettings().setCacheMode(WebSettings.LOAD_DEFAULT);
        }
        if (!Controller.getInstance().loadMedia() && Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
            webView.getSettings().setMediaPlaybackRequiresUserGesture(false);
        // No need to reload everything
        if (webviewInitialized)
            return;
        // Check for errors
        if (Controller.getInstance().getConnector().hasLastError()) {
            Intent i = new Intent(getActivity(), ErrorActivity.class);
            i.putExtra(ErrorActivity.ERROR_MESSAGE, Controller.getInstance().getConnector().pullLastError());
            startActivityForResult(i, ErrorActivity.ACTIVITY_SHOW_ERROR);
            return;
        }
        StringBuilder labels = new StringBuilder();
        for (Label label : article.labels) {
            if (label.checked) {
                if (labels.length() > 0)
                    labels.append(", ");
                String labelString = label.caption;
                if (label.foregroundColor != null && label.backgroundColor != null)
                    labelString = String.format(LABEL_COLOR_STRING, label.foregroundColor, label.backgroundColor, label.caption);
                labels.append(labelString);
            }
        }
        // Remove all html tags and content that doesn't meet this set of allowed stuff
        final String contentClean;
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
            contentClean = article.content;
        else
            contentClean = Jsoup.clean(article.content, Whitelist.relaxed());
        // Load html from Controller and insert content// Article-Prefetch-Stuff from Raw-Ressources and System
        ST htmlTmpl = new ST(getString(R.string.HTML_TEMPLATE), '$', '$');
        // Styles
        if (Controller.getInstance().allowHyphenation()) {
            ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
            javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
            htmlTmpl.add("HYPHENATION", javascriptST.render());
        }
        // Replace alignment-marker: align:left or align:justify
        ST stylesST = new ST(getString(R.string.STYLE_TEMPLATE), '$', '$');
        if (Controller.getInstance().alignFlushLeft()) {
            stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_LEFT));
        } else {
            stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_JUSTIFY));
        }
        htmlTmpl.add("STYLE", stylesST.render());
        // General values
        htmlTmpl.add("THEME", getResources().getString(Controller.getInstance().getThemeHTML()));
        htmlTmpl.add("CACHE_DIR", Controller.getInstance().cacheFolder());
        htmlTmpl.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
        // Special values for this article
        htmlTmpl.add("article", article);
        htmlTmpl.add("feed", feed);
        htmlTmpl.add("CACHED_IMAGES", cachedImages);
        htmlTmpl.add("LABELS", labels.toString());
        htmlTmpl.add("UPDATED", DateUtils.getDateTimeCustom(getActivity(), article.updated));
        htmlTmpl.add("ATTACHMENTS", getAttachmentsMarkup(article.attachments));
        htmlTmpl.add("CONTENT", contentClean);
        // Hyphenation Javascript
        if (Controller.getInstance().allowHyphenation()) {
            ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
            javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
            htmlTmpl.add("HYPHENATION", javascriptST.render());
        }
        // Navigation buttons
        if (Controller.getInstance().showButtonsMode() == Constants.SHOW_BUTTONS_MODE_HTML) {
            htmlTmpl.add("NAVIGATION", getString(R.string.BOTTOM_NAVIGATION_TEMPLATE));
        }
        // Note of the article
        if (article.note != null && article.note.length() > 0) {
            ST noteST = new ST(getResources().getString(R.string.NOTE_TEMPLATE), '$', '$');
            noteST.add("NOTE", getResources().getString(R.string.Commons_HtmlPrefixNote) + " " + article.note);
            htmlTmpl.add("NOTE_TEMPLATE", noteST.render());
        }
        content = htmlTmpl.render();
        /* JavaScript should be safe since we use JSoup to remove all unwanted stuff from article.content */
        webView.getSettings().setJavaScriptEnabled(true);
        webView.addJavascriptInterface(articleJSInterface, "articleController");
        webView.loadDataWithBaseURL("file:///android_asset/", content, "text/html", "utf-8", null);
        if (!linkAutoOpened && article.content.length() < 3) {
            if (Controller.getInstance().openUrlEmptyArticle()) {
                Log.i(TAG, "Article-Content is empty, opening URL in browser");
                linkAutoOpened = true;
                openLink();
            }
        }
        // Everything did load, we dont have to do this again.
        webviewInitialized = true;
    } catch (Exception e) {
        Log.w(TAG, e.getClass().getSimpleName() + " in doRefresh(): " + e.getMessage() + " (" + e.getCause() + ")", e);
    } finally {
        ProgressBarManager.getInstance().removeProgress((MenuActivity) getActivity());
    }
}
Example 43
Project: Portofino-master  File: AbstractCrudAction.java View source code
//**************************************************************************
// Form handling
//**************************************************************************
/**
     * Writes the contents of the create or edit form into the persistent object.
     * Assumes that the form has already been validated.
     * Also processes rich-text (HTML) fields by cleaning the submitted HTML according
     * to the {@link #getWhitelist() whitelist}.
     */
protected void writeFormToObject() {
    form.writeToObject(object);
    for (TextField textField : FormUtil.collectEditableRichTextFields(form)) {
        //TODO in bulk edit mode, the field should be skipped altogether if the checkbox is not checked.
        PropertyAccessor propertyAccessor = textField.getPropertyAccessor();
        String stringValue = (String) propertyAccessor.get(object);
        String cleanText;
        try {
            Whitelist whitelist = getWhitelist();
            cleanText = Jsoup.clean(stringValue, whitelist);
        } catch (Throwable t) {
            logger.error("Could not clean HTML, falling back to escaped text", t);
            cleanText = StringEscapeUtils.escapeHtml(stringValue);
        }
        propertyAccessor.set(object, cleanText);
    }
}
Example 44
Project: email-master  File: HtmlHelper.java View source code
public static String extractText(String html) {
    return Jsoup.clean(html, Whitelist.none());
}
Example 45
Project: k-9-master  File: HtmlHelper.java View source code
public static String extractText(String html) {
    return Jsoup.clean(html, Whitelist.none());
}
Example 46
Project: NoticEditor-master  File: HtmlImportMode.java View source code
public Whitelist getWhitelist() {
    return whitelist;
}
Example 47
Project: iis-master  File: HtmlToPlaintextIngester.java View source code
private static String cleanNoMarkup(String input) {
    return Jsoup.clean(input, "", Whitelist.none(), outputSettings).replace(" ", "");
}
Example 48
Project: new-android-daisy-reader-master  File: FullText.java View source code
/**
     * Returns the inner HTML for a given smilReference.
     * 
     * @param smilReference the reference e.g. "id_224"
     */
public String getHtmlFor(String reference) {
    String contents = documentContents.getElementById(reference).html();
    return Jsoup.clean(contents, Whitelist.simpleText());
}
Example 49
Project: btpka3.github.com-master  File: Test123.java View source code
public static void testJsoup() {
    String safe = Jsoup.clean(drityInput, Whitelist.basic());
    System.out.println(safe);
}
Example 50
Project: lutece-core-master  File: JSoupHtmlCleaner.java View source code
/**
     * {@inheritDoc}
     */
@Override
public String clean(String strSource) throws HtmlCleanerException {
    String strSafe = Jsoup.clean(strSource, Whitelist.relaxed());
    return strSafe;
}
Example 51
Project: smartly-master  File: HtmlParser.java View source code
public Document cleanBasic() {
    final Cleaner cleaner = new Cleaner(Whitelist.basic());
    return cleaner.clean(_document);
}
Example 52
Project: structured-content-tools-master  File: StripHtmlPreprocessor.java View source code
protected String stripHtml(String value) {
    if (value == null || value.trim().isEmpty())
        return value;
    Document doc = Jsoup.parse(Jsoup.clean(value, Whitelist.relaxed()));
    return convertNodeToText(doc.body());
}
Example 53
Project: CN1ML-NetbeansModule-master  File: Jsoup.java View source code
/**
     Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
     tags and attributes.

     @param bodyHtml  input untrusted HTML (body fragment)
     @param baseUri   URL to resolve relative URLs against
     @param whitelist white-list of permitted HTML elements
     @return safe HTML (body fragment)

     @see Cleaner#clean(Document)
     */
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
    Document dirty = parseBodyFragment(bodyHtml, baseUri);
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    return clean.body().html();
}
Example 54
Project: FudanBBS-master  File: Jsoup.java View source code
/**
     Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
     tags and attributes.

     @param bodyHtml  input untrusted HTML (body fragment)
     @param baseUri   URL to resolve relative URLs against
     @param whitelist white-list of permitted HTML elements
     @return safe HTML (body fragment)

     @see Cleaner#clean(Document)
     */
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
    Document dirty = parseBodyFragment(bodyHtml, baseUri);
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    return clean.body().html();
}
Example 55
Project: jsoup-master  File: Jsoup.java View source code
/**
     Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
     tags and attributes.

     @param bodyHtml  input untrusted HTML (body fragment)
     @param baseUri   URL to resolve relative URLs against
     @param whitelist white-list of permitted HTML elements
     @return safe HTML (body fragment)

     @see Cleaner#clean(Document)
     */
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
    Document dirty = parseBodyFragment(bodyHtml, baseUri);
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    return clean.body().html();
}
Example 56
Project: validadorAcessibilidade-master  File: Jsoup.java View source code
/**
     Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
     tags and attributes.

     @param bodyHtml  input untrusted HTML (body fragment)
     @param baseUri   URL to resolve relative URLs against
     @param whitelist white-list of permitted HTML elements
     @return safe HTML (body fragment)

     @see Cleaner#clean(Document)
     */
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
    Document dirty = parseBodyFragment(bodyHtml, baseUri);
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    return clean.body().html();
}
Example 57
Project: zafu_jwc-master  File: Jsoup.java View source code
/**
     Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
     tags and attributes.

     @param bodyHtml  input untrusted HTML (body fragment)
     @param baseUri   URL to resolve relative URLs against
     @param whitelist white-list of permitted HTML elements
     @return safe HTML (body fragment)

     @see Cleaner#clean(Document)
     */
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
    Document dirty = parseBodyFragment(bodyHtml, baseUri);
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    return clean.body().html();
}
Example 58
Project: ese2010-team3-master  File: Tools.java View source code
/**
	 * Convert Markdown to HTML content (in an amazingly unoptimized way)
	 * 
	 * @param content
	 *            some Markdown content
	 * @return that content in plain and sanitized HTML (XSS safe!)
	 */
public static String markdownToHtml(String content) {
    // Markdown processor for content that's already HTML
    if (content.startsWith("<h3>"))
        return Jsoup.clean(content, Whitelist.basic());
    return Jsoup.clean(new PegDownProcessor().markdownToHtml(content), Whitelist.basic());
}
Example 59
Project: SmartAndroidSource-master  File: SmartTag.java View source code
/**
     Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
     tags and attributes.

     @param bodyHtml  input untrusted HTML (body fragment)
     @param baseUri   URL to resolve relative URLs against
     @param whitelist white-list of permitted HTML elements
     @return safe HTML (body fragment)

     @see Cleaner#clean(Document)
     */
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
    Document dirty = parseBodyFragment(bodyHtml, baseUri);
    Cleaner cleaner = new Cleaner(whitelist);
    Document clean = cleaner.clean(dirty);
    return clean.body().html();
}
Example 60
Project: activeweb-master  File: HttpSupport.java View source code
/**
     * Cleans HTML from harmful tags, making XSS impossible.
     * <p>For example, input like this:</p>
     *
     * <pre>
     *      <html><script> alert('hello');</script><div>this is a clean part</div></html>
     * </pre>
     *
     * Will produce output like this:
     *
     * <pre>
     *     this is a clean part
     * </pre>
     *
     * @param unsafeContent unsafe content. Something that an end user typed into a text area, or input that may include
     *                      a script tag or other garbage.
     * @return sanitized version of input
     */
protected String sanitize(String unsafeContent) {
    return Jsoup.clean(unsafeContent, Whitelist.basic());
}
Example 61
Project: agile-itsm-master  File: SolicitacaoServicoDTO.java View source code
public void setDescricaoForTitle(String descricaoForTitle) {
    if (descricaoForTitle != null) {
        this.descricaoForTitle = Jsoup.clean(descricaoForTitle, Whitelist.none());
    }
}
Example 62
Project: crushpaper-master  File: Servlet.java View source code
@Override
protected Whitelist initialValue() {
    return Whitelist.relaxed().addEnforcedAttribute("a", "rel", "nofollow").addEnforcedAttribute("a", "target", "_blank");
}