Java Examples for org.jsoup.safety.Whitelist
The following java examples will help you to understand the usage of org.jsoup.safety.Whitelist. These source code samples are taken from different open source projects.
Example 1
| Project: android-opensource-library-56-master File: SanitizeActivity.java View source code |
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_sanitize);
final EditText inputText = (EditText) findViewById(R.id.input_text);
inputText.setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
findViewById(R.id.sanitize_button).setOnClickListener(new OnClickListener() {
@Override
public void onClick(View v) {
String sanitized = Jsoup.clean(inputText.getText().toString(), Whitelist.basic());
sanitizedText.setText(sanitized);
}
});
}Example 2
| Project: dataverse-master File: MarkupChecker.java View source code |
/**
* Wrapper around Jsoup clean method with the basic White list
* http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
* @param unsafe
* @return
*/
public static String sanitizeBasicHTML(String unsafe) {
if (unsafe == null) {
return null;
}
// basic includes: a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul
//Whitelist wl = Whitelist.basic().addTags("img", "h1", "h2", "h3", "kbd", "hr", "s", "del");
Whitelist wl = Whitelist.basicWithImages().addTags("h1", "h2", "h3", "kbd", "hr", "s", "del", "map", "area").addAttributes("img", "usemap").addAttributes("map", "name").addAttributes("area", "shape", "coords", "href", "title", "alt").addEnforcedAttribute("a", "target", "_blank");
return Jsoup.clean(unsafe, wl);
}Example 3
| Project: voj-master File: HtmlTextFilter.java View source code |
/**
* 过滤包å?«HTMLå—符串.
* @param text - 待过滤的å—符串
* @return 过滤å?Žçš„å—符串.
*/
public static String filter(String text) {
if (text == null) {
return text;
}
Document document = Jsoup.parse(text);
document.outputSettings(new Document.OutputSettings().prettyPrint(false));
document.select("br").append("\\n");
document.select("p").prepend("\\n\\n");
String s = document.html().replaceAll("\\\\n", "\n");
return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}Example 4
| Project: quadriga-master File: HTMLContentValidator.java View source code |
/**
* This method validates the entered title, description of a about text for
* a project. Validates if the values are available or not. If values are
* not available error is thrown
*
* @param obj
* @param err
*/
@Override
public void validate(Object obj, Errors err) {
String description = null;
String title = null;
if (obj instanceof IAboutText) {
IAboutText abtText = (IAboutText) obj;
description = abtText.getDescription();
title = abtText.getTitle();
} else if (obj instanceof ProjectBlogEntry) {
ProjectBlogEntry projectBlog = (ProjectBlogEntry) obj;
description = projectBlog.getDescription();
title = projectBlog.getTitle();
}
Whitelist whitelist = ExtendedWhitelist.extendedWhiteListWithBase64();
Whitelist titleWhitelist = Whitelist.simpleText();
// validate all the input parameters
ValidationUtils.rejectIfEmptyOrWhitespace(err, "title", "about_title.required");
ValidationUtils.rejectIfEmptyOrWhitespace(err, "description", "about_description.required");
if (!Jsoup.isValid(description, whitelist)) {
err.rejectValue("description", "about_description.proper");
}
if (!Jsoup.isValid(title, titleWhitelist)) {
err.rejectValue("title", "about_title.proper");
}
}Example 5
| Project: symphony-master File: ArticleQueryService.java View source code |
/**
* Gets preview content of the article specified with the given article id.
*
* @param articleId the given article id
* @param request the specified request
* @return preview content
* @throws ServiceException service exception
*/
public String getArticlePreviewContent(final String articleId, final HttpServletRequest request) throws ServiceException {
final JSONObject article = getArticle(articleId);
if (null == article) {
return null;
}
final int articleType = article.optInt(Article.ARTICLE_TYPE);
if (Article.ARTICLE_TYPE_C_THOUGHT == articleType) {
return null;
}
Stopwatchs.start("Get preview content");
try {
final int length = Integer.valueOf("150");
String ret = article.optString(Article.ARTICLE_CONTENT);
final String authorId = article.optString(Article.ARTICLE_AUTHOR_ID);
final JSONObject author = userQueryService.getUser(authorId);
if (null != author && UserExt.USER_STATUS_C_INVALID == author.optInt(UserExt.USER_STATUS) || Article.ARTICLE_STATUS_C_INVALID == article.optInt(Article.ARTICLE_STATUS)) {
return langPropsService.get("articleContentBlockLabel");
}
final Set<String> userNames = userQueryService.getUserNames(ret);
final JSONObject currentUser = userQueryService.getCurrentUser(request);
final String currentUserName = null == currentUser ? "" : currentUser.optString(User.USER_NAME);
final String authorName = author.optString(User.USER_NAME);
if (Article.ARTICLE_TYPE_C_DISCUSSION == articleType && !authorName.equals(currentUserName)) {
boolean invited = false;
for (final String userName : userNames) {
if (userName.equals(currentUserName)) {
invited = true;
break;
}
}
if (!invited) {
String blockContent = langPropsService.get("articleDiscussionLabel");
blockContent = blockContent.replace("{user}", "<a href='" + Latkes.getServePath() + "/member/" + authorName + "'>" + authorName + "</a>");
return blockContent;
}
}
ret = Emotions.convert(ret);
ret = Markdowns.toHTML(ret);
ret = Jsoup.clean(ret, Whitelist.none());
if (ret.length() >= length) {
ret = StringUtils.substring(ret, 0, length) + " ....";
}
return ret;
} finally {
Stopwatchs.end();
}
}Example 6
| Project: viritin-master File: RichTextExample.java View source code |
@Override
public Component getTestComponent() {
// note that styles is stripped of by default
RichText example1 = new RichText("<h1 style='color:red' class='foobar'>Jou!</h1>");
// This one will tolerate quite a lot more
RichText example2 = new RichText("<h1 style='color:red'>Jou!</h1>") {
@Override
public Whitelist getWhitelist() {
return Whitelist.relaxed().addAttributes("h1", "style");
}
};
// This one will tolerate quite a lot more
RichText example3 = new RichText().withMarkDown("This is **Markdown** formatted *text*");
// RichText can also be used through LabelField
// (and it is with default settings)
LabelField<Integer> lf = new LabelField<>();
lf.setLabel(new RichText() {
@Override
public Whitelist getWhitelist() {
return Whitelist.relaxed().addAttributes("h1", "style");
}
});
lf.setCaptionGenerator( i -> "<h1 style='color:blue'>" + i + "</h1>");
lf.setValue(69);
return new MVerticalLayout(example1, example2, example3, lf);
}Example 7
| Project: calendula-master File: LeafletHtmlPostProcessor.java View source code |
@Override
public String process(String html) {
// Parse str into a Document
Document doc = Jsoup.parseBodyFragment(html);
doc.select("nav").remove();
doc.select("div#pdfurl").remove();
// white list to clean html
Whitelist wl = Whitelist.relaxed();
wl.addTags("div", "span", "p", "h1", "h2", "h3", "ul", "ol", "li", "a", "img");
wl.preserveRelativeLinks(true);
wl.addAttributes("img", "src");
wl.addAttributes("a", "href");
// perform cleaning
Document cleaned = new Cleaner(wl).clean(doc);
cleaned.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
// Remove empty elements
Set<String> removable = new HashSet<>(Arrays.asList("div", "span", "strong", "p", "h1", "h2", "h3", "ul", "ol", "li", "a"));
cleaned.select("p:matchesOwn((?is) )").remove();
// For each element in the cleaned document
for (Element el : cleaned.getAllElements()) {
if (el.children().isEmpty() && (!el.hasText() || el.text().replaceAll(" ", "").trim().equals(""))) {
// Element is empty, check if should be removed
if (removable.contains(el.tagName()))
el.remove();
}
}
// return html for display
return cleaned.html();
}Example 8
| Project: chukwa-master File: XssFilter.java View source code |
/**
* Strips any potential XSS threats out of the value
* @param value is a string
* @return filtered string
*/
public String filter(String value) {
if (value == null)
return null;
// Use the ESAPI library to avoid encoded attacks.
value = ESAPI.encoder().canonicalize(value);
// Avoid null characters
value = value.replaceAll("\0", "");
// Clean out HTML
value = Jsoup.clean(value, Whitelist.none());
return value;
}Example 9
| Project: hibernate-validator-master File: SafeHtmlValidator.java View source code |
@Override
public void initialize(SafeHtml safeHtmlAnnotation) {
switch(safeHtmlAnnotation.whitelistType()) {
case BASIC:
whitelist = Whitelist.basic();
break;
case BASIC_WITH_IMAGES:
whitelist = Whitelist.basicWithImages();
break;
case NONE:
whitelist = Whitelist.none();
break;
case RELAXED:
whitelist = Whitelist.relaxed();
break;
case SIMPLE_TEXT:
whitelist = Whitelist.simpleText();
break;
}
baseURI = safeHtmlAnnotation.baseURI();
whitelist.addTags(safeHtmlAnnotation.additionalTags());
for (SafeHtml.Tag tag : safeHtmlAnnotation.additionalTagsWithAttributes()) {
whitelist.addAttributes(tag.name(), tag.attributes());
}
}Example 10
| Project: MusicDNA-master File: LyricWiki.java View source code |
public static Lyrics fromURL(String url, String artist, String song) {
if (url.endsWith("action=edit")) {
return new Lyrics(NO_RESULT);
}
String text;
String originalArtist = artist;
String originalTitle = song;
try {
//url = URLDecoder.decode(url, "utf-8");
Document lyricsPage = Jsoup.connect(url).get();
Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
lyricbox.getElementsByClass("references").remove();
String lyricsHtml = lyricbox.html();
final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
if (text.contains(""))
text = Parser.unescapeEntities(text, true);
text = text.replaceAll("\\[\\d\\]", "").trim();
String title = lyricsPage.getElementsByTag("title").get(0).text();
int colon = title.indexOf(':');
if (artist == null)
artist = title.substring(0, colon).trim();
if (song == null) {
int end = title.lastIndexOf("Lyrics");
song = title.substring(colon + 1, end).trim();
}
} catch (IndexOutOfBoundsExceptionIOException | e) {
return new Lyrics(ERROR);
}
try {
artist = URLDecoder.decode(artist, "UTF-8");
song = URLDecoder.decode(song, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
Lyrics result = new Lyrics(NEGATIVE_RESULT);
result.setArtist(artist);
result.setTitle(song);
return result;
} else if (text.equals("") || text.length() < 3)
return new Lyrics(NO_RESULT);
else {
Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
lyrics.setArtist(artist);
lyrics.setTitle(song);
lyrics.setOriginalArtist(originalArtist);
lyrics.setOriginalTitle(originalTitle);
lyrics.setText(text);
lyrics.setSource("LyricsWiki");
lyrics.setURL(url);
return lyrics;
}
}Example 11
| Project: skalli-master File: TimelineResource.java View source code |
private SyndFeed getFeed(Project project, String host, List<Entry> entries) {
SyndFeed feed = new SyndFeedImpl();
String projectName = HtmlUtils.clean(project.getName(), Whitelist.none());
feed.setTitle(MessageFormat.format("{0} | Timeline", projectName));
feed.setDescription(MessageFormat.format("Latest changes to project ''{0}''.", projectName));
//$NON-NLS-1$
feed.setLink(host + RestUtils.URL_PROJECTS + project.getProjectId() + "/timeline");
List<SyndEntry> feedEntries = new ArrayList<SyndEntry>();
for (Entry entry : entries) {
SyndEntry feedEntry = new SyndEntryImpl();
feedEntry.setTitle(MessageFormat.format("{0} | {1}", entry.getTitle(), entry.getSource()));
feedEntry.setLink(entry.getLink().getHref());
feedEntry.setPublishedDate(entry.getPublished());
SyndContent entryDescription = new SyndContentImpl();
entryDescription.setType(entry.getContent().getType());
entryDescription.setValue(entry.getContent().getValue());
feedEntry.setDescription(entryDescription);
feedEntries.add(feedEntry);
}
feed.setEntries(feedEntries);
return feed;
}Example 12
| Project: appverse-web-master File: ESAPIHelper.java View source code |
/**
* Strips any potential XSS threats out of the value
* @param value
* @return
*/
public static String stripXSS(String value) {
if (value != null) {
// Use the ESAPI library to avoid encoded attacks.
value = ESAPI.encoder().canonicalize(value);
// Avoid null characters
value = value.replaceAll("\0", "");
// Clean out HTML
// This clean, removes all html tags. so instead of <script>, it simple removes the <script> tag.
value = Jsoup.clean(value, Whitelist.none());
}
return value;
}Example 13
| Project: coprhd-controller-master File: SecurityUtils.java View source code |
/**
* Removes any potential XSS threats from the value.
* Depends on the WASP ESAPI (owasp.org) and jsoup libraries (jsoup.org).
*
* @param value data to be cleaned
* @return cleaned data
*/
public static String stripXSS(String value) {
if (value == null) {
return null;
}
// firstly, ESAPI canonicalize input, then Jsoup cleans all html tags, which includes <script> tags.
value = ESAPI.encoder().canonicalize(value, false, false);
value = value.replaceAll("\0", "");
value = Jsoup.clean(value, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
return value;
}Example 14
| Project: gitblit-master File: JSoupXssFilter.java View source code |
/** * Builds & returns a loose HTML whitelist similar to Github. * * https://github.com/github/markup/tree/master#html-sanitization * @return a loose HTML whitelist */ protected Whitelist getRelaxedWhiteList() { return new Whitelist().addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "del", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd", "li", "ol", "p", "pre", "q", "samp", "small", "strike", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "tt", "u", "ul", "var").addAttributes("a", "class", "href", "style", "target", "title").addAttributes("blockquote", "cite").addAttributes("col", "span", "width").addAttributes("colgroup", "span", "width").addAttributes("div", "class", "style").addAttributes("img", "align", "alt", "height", "src", "title", "width").addAttributes("ol", "start", "type").addAttributes("q", "cite").addAttributes("span", "class", "style").addAttributes("table", "class", "style", "summary", "width").addAttributes("td", "abbr", "axis", "class", "colspan", "rowspan", "style", "width").addAttributes("th", "abbr", "axis", "class", "colspan", "rowspan", "scope", "style", "width").addAttributes("ul", "type").addEnforcedAttribute("a", "rel", "nofollow"); }
Example 15
| Project: HERD-master File: HerdStringUtils.java View source code |
/**
* Strips HTML tags from a given input String, allows some tags to be retained via a whitelist
*
* @param fragment the specified String
* @param whitelistTags the specified whitelist tags
*
* @return cleaned String with allowed tags
*/
public static String stripHtml(String fragment, String... whitelistTags) {
// Parse out html tags except those from a given list of whitelist tags
Document dirty = Jsoup.parseBodyFragment(fragment);
Whitelist whitelist = new Whitelist();
for (String whitelistTag : whitelistTags) {
// Get the actual tag name from the whitelist tag
// this is vulnerable in general to complex tags but will suffice for our simple needs
whitelistTag = StringUtils.removePattern(whitelistTag, "[^\\{IsAlphabetic}]");
// Add all specified tags to the whitelist while preserving inline css
whitelist.addTags(whitelistTag).addAttributes(whitelistTag, "class");
}
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(// Set character encoding to UTF-8
CharEncoding.UTF_8).prettyPrint(// Make sure no line-breaks are added
false);
// return 'cleaned' html body
return clean.body().html();
}Example 16
| Project: kune-master File: ContentUnrenderer.java View source code |
/**
* Turns a HTML document back into a set of text, elements, annotations.
*
* @param content the content
* @return the unrendered blip
*/
public static UnrenderedBlip unrender(final String content) {
final StringBuilder sb = new StringBuilder();
final Map<Integer, com.google.wave.api.Element> elements = Maps.newHashMap();
final Annotations annotations = new Annotations();
// Sanitized
final String safe = Jsoup.clean(content, Whitelist.basic());
final Document doc = Jsoup.parse(safe);
unrender(doc.body(), sb, elements, annotations);
return new UnrenderedBlip(sb.toString(), elements, annotations);
}Example 17
| Project: lyrics-master File: LyricWiki.java View source code |
public static Lyrics fromURL(String url, String artist, String song) {
if (url.endsWith("action=edit")) {
return new Lyrics(NO_RESULT);
}
String text;
String originalArtist = artist;
String originalTitle = song;
try {
//url = URLDecoder.decode(url, "utf-8");
Document lyricsPage = Jsoup.connect(url).get();
Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
lyricbox.getElementsByClass("references").remove();
String lyricsHtml = lyricbox.html();
final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
if (text.contains(""))
text = Parser.unescapeEntities(text, true);
text = text.replaceAll("\\[\\d\\]", "").trim();
String title = lyricsPage.getElementsByTag("title").get(0).text();
int colon = title.indexOf(':');
if (artist == null)
artist = title.substring(0, colon).trim();
if (song == null) {
int end = title.lastIndexOf("Lyrics");
song = title.substring(colon + 1, end).trim();
}
} catch (IndexOutOfBoundsExceptionIOException | e) {
return new Lyrics(ERROR);
}
try {
artist = URLDecoder.decode(artist, "UTF-8");
song = URLDecoder.decode(song, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
Lyrics result = new Lyrics(NEGATIVE_RESULT);
result.setArtist(artist);
result.setTitle(song);
return result;
} else if (text.equals("") || text.length() < 3)
return new Lyrics(NO_RESULT);
else {
Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
lyrics.setArtist(artist);
lyrics.setTitle(song);
lyrics.setOriginalArtist(originalArtist);
lyrics.setOriginalTitle(originalTitle);
lyrics.setText(text);
lyrics.setSource("LyricsWiki");
lyrics.setURL(url);
return lyrics;
}
}Example 18
| Project: patientview-master File: XssUtils.java View source code |
public <T> void cleanObjectForXss(T object) {
Method[] methods = object.getClass().getMethods();
if (methods != null && methods.length > 0) {
Map<String, Method> methodNameToMethod = new HashMap<String, Method>();
// add all the methods to the map
for (Method method : methods) {
methodNameToMethod.put(method.getName(), method);
}
// find the string setters
for (Method method : methods) {
if (isStringSetter(method)) {
// check if there is a matching getter
String getterName = method.getName().replace("set", "get");
Method getter = methodNameToMethod.get(getterName);
if (getter != null && isStringGetter(getter)) {
try {
// get the data, clean it and invoke the setter with the new data
String dirtyString = (String) getter.invoke(object);
if (dirtyString != null) {
String cleanString = Jsoup.clean(dirtyString, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
// set the clean string
method.invoke(object, cleanString);
}
} catch (Exception e) {
LOGGER.error(e.getMessage());
LOGGER.debug(e.getMessage(), e);
}
}
}
}
}
}Example 19
| Project: QuickLyric-master File: LyricWiki.java View source code |
public static Lyrics fromURL(String url, String artist, String song) {
if (url.endsWith("action=edit")) {
return new Lyrics(NO_RESULT);
}
String text;
String originalArtist = artist;
String originalTitle = song;
try {
//url = URLDecoder.decode(url, "utf-8");
Document lyricsPage = Jsoup.connect(url).get();
Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
lyricbox.getElementsByClass("references").remove();
String lyricsHtml = lyricbox.html();
final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
if (text.contains(""))
text = Parser.unescapeEntities(text, true);
text = text.replaceAll("\\[\\d\\]", "").trim();
String title = lyricsPage.getElementsByTag("title").get(0).text();
int colon = title.indexOf(':');
if (artist == null)
artist = title.substring(0, colon).trim();
if (song == null) {
int end = title.lastIndexOf("Lyrics");
song = title.substring(colon + 1, end).trim();
}
} catch (IndexOutOfBoundsExceptionIOException | e) {
return new Lyrics(ERROR);
}
try {
artist = URLDecoder.decode(artist, "UTF-8");
song = URLDecoder.decode(song, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
Lyrics result = new Lyrics(NEGATIVE_RESULT);
result.setArtist(artist);
result.setTitle(song);
return result;
} else if (text.equals("") || text.length() < 3)
return new Lyrics(NO_RESULT);
else {
Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
lyrics.setArtist(artist);
lyrics.setTitle(song);
lyrics.setOriginalArtist(originalArtist);
lyrics.setOriginalTitle(originalTitle);
lyrics.setText(text);
lyrics.setSource("LyricsWiki");
lyrics.setURL(url);
return lyrics;
}
}Example 20
| Project: tinymce-renderer-plugin-master File: TinyMCERendererReversePlugin.java View source code |
public Object transformForEdit(Object obj) {
if (obj == null) {
return obj;
}
if (stripNoTags()) {
return obj;
}
String s = (String) obj;
s = s.replaceAll(" ", "tinymce-renderer-reverse_sp");
if (stripAllTags()) {
s = s.replaceAll("</tr>", "tinymce-renderer-reverse_br");
}
s = s.replaceAll("<p> </p>", "tinymce-renderer-reverse_br");
s = s.replaceAll("</li>", "tinymce-renderer-reverse_br");
s = s.replaceAll("<br />", "tinymce-renderer-reverse_br");
s = s.replaceAll("</p>", "tinymce-renderer-reverse_br");
s = s.replaceAll("</ul>", "tinymce-renderer-reverse_br");
s = s.replaceAll("</ol>", "tinymce-renderer-reverse_br");
s = s.replaceAll("</blockquote>", "tinymce-renderer-reverse_br");
Whitelist wl = stripAllTags() ? Whitelist.none() : this.whitelist;
String safe = Jsoup.clean(s, wl);
safe = safe.replaceAll("tinymce-renderer-reverse_br", "\n");
safe = safe.replaceAll("tinymce-renderer-reverse_sp", " ");
return safe;
}Example 21
| Project: androidStuff-master File: ExperimentalTests.java View source code |
@SuppressWarnings("unused")
public void testJsoup() throws Exception {
org.jsoup.nodes.Document doc;
// doc = Jsoup.connect("http://en.wikipedia.org/wiki/Main_Page").get();
// org.jsoup.select.Elements newsHeadlines = doc.select("#mp-itn b a");
// for (int i = 0; i < newsHeadlines.size(); i++) {
// org.jsoup.nodes.Element headline = newsHeadlines.get(i);
// System.out.println(headline.text());
// }
// doc = Jsoup.connect("http://espn.go.com/mens-college-basketball/conferences/standings/_/id/2/year/2012/acc-conference").get();
// for (org.jsoup.nodes.Element table : doc.select("table.tablehead")) {
// for (org.jsoup.nodes.Element row : table.select("tr")) {
// org.jsoup.select.Elements tds = row.select("td");
// if (tds.size() > 6) {
// System.out.println(tds.get(0).text() + ":" + tds.get(1).text());
// }
// }
// }
String fragment = "<div id='div1'>" + "<p id='para1'>This is the first paragraph</p>" + "<p id='para2'>Second paragraph here!" + "</div>";
/*org.jsoup.nodes.Document */
doc = Jsoup.parseBodyFragment(fragment);
// System.out.println(doc.toString());
doc.select("p").last().after("<p id='para3'>Third paragraph I just added</p>");
// System.out.println(doc.body().children().toString());
// System.out.println(doc.select("#para1").toString());
org.jsoup.select.Elements elements = doc.select("p");
// System.out.println(elements.toString());
elements = doc.select("#para1").remove();
// System.out.println(doc.body().children().toString());
// System.out.println("---------------------------------");
// System.out.println(elements.toString());
// System.out.println(Jsoup.clean(fragment, org.jsoup.safety.Whitelist.basic()));
org.jsoup.safety.Whitelist myWhitelist = new org.jsoup.safety.Whitelist();
myWhitelist.addTags("div", "p");
myWhitelist.addAttributes("div", "class");
myWhitelist.addAttributes("p", "id");
// System.out.println(Jsoup.clean(fragment, myWhitelist));
// URL url = new URL("http://gosmarter.net?query=cars");
// doc = Jsoup.parse(url, 3000);
// Iterator<Element> productList = doc.select("div[class=productList]").iterator();
// assertTrue(productList.hasNext());
// Element product = productList.next();
// Element productLink = product.select("a").first();
// String href = productLink.attr("abs:href");
// System.out.println(href);
}Example 22
| Project: jblog-master File: ArticleServiceImpl.java View source code |
@Override
public String saveArticle(ArticleVo vo, String type, User user, WebSite ws) throws Exception {
boolean add = false;
Article a = null;
ArticleText at = null;
Set<Lable> labes = new HashSet<Lable>();
if (null == vo.getId() || vo.getId().isEmpty()) {
a = new Article();
at = new ArticleText();
a.setArticleViewcount(0);
a.setArticleStatus((short) 1);
a.setArticleSort(0);
a.setArticleLikes(0);
a.setWebsiteid(ws.getId());
if (null == vo.getArticlePubtime() || vo.getArticlePubtime().isEmpty()) {
a.setArticlePubtime(TimeStampUtil.getCurrentDate());
} else {
a.setArticlePubtime(TimeStampUtil.convertStringToTimeStamp(vo.getArticlePubtime()));
}
a.setArticleUpdatetime(TimeStampUtil.getCurrentDate());
add = true;
} else {
a = article_dao.getArticleByID(vo.getId());
at = a.getArticleText();
//如果是�布就设置更新时间
if (null != type && type.equalsIgnoreCase("release")) {
a.setArticleUpdatetime(TimeStampUtil.getCurrentDate());
}
}
a.setArticleCover(vo.getArticleCover());
a.setArticleAllowcomments(null == vo.getArticleAllowcomments() ? false : vo.getArticleAllowcomments());
a.setArticleTitle(Jsoup.parse(vo.getArticleTitle()).text());
a.setCategory(category_dao.getObj(vo.getCategory().getId()));
a.setArticlePrivate(null == vo.getArticlePrivate() ? false : vo.getArticlePrivate());
a.setArticleLinkurl(null == vo.getArticleLinkurl() ? "" : vo.getArticleLinkurl());
a.setArticlePassword(vo.getArticlePassword());
a.setArticleEditor(vo.getArticleEditor());
if (null != vo.getArticleLableStr()) {
String[] lables = vo.getArticleLableStr().replaceAll("\\s", "").split(",");
Map<String, String> map = new HashMap<String, String>();
//å‰?ç«¯ä¼ è¿‡æ?¥çš„值去é‡?
for (String str : lables) {
if (!str.isEmpty()) {
map.put(str, null);
}
}
if (!add) {
//把已å˜åœ¨çš„lable放入map
for (Lable la : a.getLables()) {
map.put(la.getLableName(), "");
}
}
for (String keyword : map.keySet()) {
String key = keyword.trim();
if (map.containsKey(key)) {
if (null == map.get(key) && !key.matches("\\w{32}")) {
labes.add(new Lable(ws, a, key, user.getId(), TimeStampUtil.getCurrentDate()));
}
}
}
}
at.setArticleContent(vo.getArticleContent());
at.setArticleSummary(vo.getArticleSummary());
a.setUser(user);
a.setArticleMd5(CommomEncrypt.MD5(at.getArticleContent()));
a.setLables(labes);
if (a.getArticleEditor().equals("html")) {
String _html = Jsoup.clean(at.getArticleContent(), Whitelist.relaxed().addAttributes("a", "target").addAttributes("tr", "class"));
at.setArticleContent(_html);
}
if (add) {
article_dao.addObj(a);
at.setId(a.getId());
article_dao.save(at);
} else {
article_dao.updateObj(a);
article_dao.update(at);
}
return a.getId();
}Example 23
| Project: ORCID-Source-master File: OrcidStringUtils.java View source code |
public static String simpleHtml(String s) {
String output = Jsoup.clean(s, "", Whitelist.simpleText(), outputSettings);
// According to
// http://jsoup.org/apidocs/org/jsoup/nodes/Entities.EscapeMode.html#xhtml
// jsoup scape lt, gt, amp, apos, and quot for xhtml
// So we want to restore them
output = output.replace(LT, DECODED_LT);
output = output.replace(GT, DECODED_GT);
output = output.replace(AMP, DECODED_AMP);
output = output.replace(APOS, DECODED_APOS);
output = output.replace(QUOT, DECODED_QUOT);
return output;
}Example 24
| Project: site-master File: RegistrationService.java View source code |
/**
* Cleans some html text by stripping all tags but <code>br</code> and then
* unescapes named entitiesl like '"e';. brs will be replaced by
* newlines.
*
* @param htmlText
* @return
*/
String htmlTextToPlainText(final String htmlText) {
final Whitelist whitelist = Whitelist.none();
whitelist.addTags("br");
final Cleaner cleaner = new Cleaner(whitelist);
final Document cleanedDocument = cleaner.clean(Jsoup.parse(htmlText));
cleanedDocument.outputSettings().prettyPrint(false).escapeMode(EscapeMode.xhtml).charset(StandardCharsets.UTF_8);
return Parser.unescapeEntities(cleanedDocument.body().html().trim(), true).replaceAll("<br(?: ?/)?>", "\r\n");
}Example 25
| Project: TorrentFreak-Reader-master File: ArticleContentHttpTask.java View source code |
@Override
public Map<String, String> doInBackground(final String... params) {
Document document;
try {
// retrieve the document
document = Jsoup.connect(params[0]).timeout(20000).ignoreHttpErrors(true).followRedirects(true).get();
} catch (IOException e) {
error = e;
return null;
}
// setup the whitelist of elements and attributes to allow
final Whitelist whitelist = Whitelist.relaxed();
whitelist.addTags("abbr", "address", "area", "article", "aside", "embed", "footer", "header", "hr", "iframe", "label", "legend", "nav", "object", "param", "s", "section", "summary", "time", "video", "track", "wbr", "center");
whitelist.addAttributes("a", "rel");
whitelist.addAttributes("ul", "id");
whitelist.addAttributes("li", "class");
whitelist.addAttributes("img", "class");
whitelist.addAttributes("img", "align");
whitelist.addAttributes("span", "class");
whitelist.addAttributes("table", "class");
whitelist.addAttributes("p", "class");
whitelist.addAttributes("iframe", "src", "scrolling", "width", "height", "frameborder");
// clear the retrieved document with the whitelist
final Cleaner cleaner = new Cleaner(whitelist);
document = cleaner.clean(document);
Map<String, String> values = null;
try {
// scrape the required values from the document using the article provider
values = provider.scrape(document);
} catch (Exception e) {
error = e;
return null;
}
return values;
}Example 26
| Project: XCoLab-master File: HtmlUtil.java View source code |
/**
* Removes html from the input string, allowing only tags as indicated by the whitelist.
* @param text the unsafe input text
* @param whitelist a list of allowed tags
* @param baseUri used to evaluate relative links
* @return input text without html tags other than those on the whitelist
*/
public static String clean(String text, Whitelist whitelist, String baseUri) {
if (StringUtils.isEmpty(text)) {
return "";
}
Document doc = Jsoup.parse(text, baseUri);
doc = new Cleaner(whitelist).clean(doc);
// Adjust escape mode, http://stackoverflow.com/questions/8683018/jsoup-clean-without-adding-html-entities
doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
return doc.body().html();
}Example 27
| Project: ajah-master File: RomeUtils.java View source code |
/**
* Convert a Rome entry to an Ajah entry.
*
* @param syndEntry
* The entry to convert.
* @param feed
* The feed to create the entry in.
* @return The converted entry. Will not be null.
*/
private static FeedEntry createEntry(final SyndEntry syndEntry, final Feed feed) {
final FeedEntry entry = new FeedEntry();
entry.setFeedId(feed.getId());
entry.setFeedSourceId(feed.getFeedSourceId());
entry.setAuthor(syndEntry.getAuthor());
entry.setTitle(syndEntry.getTitle());
entry.setHtmlUrl(syndEntry.getLink());
entry.setHtmlUrlSha1(HashUtils.sha1Hex(syndEntry.getUri()));
entry.setPublished(syndEntry.getPublishedDate());
entry.setUpdated(syndEntry.getUpdatedDate());
@SuppressWarnings("unchecked") final List<SyndContent> contents = syndEntry.getContents();
if (contents.size() < 1) {
log.finest("Contents are empty");
entry.setContentType(AjahMimeType.TEXT_PLAIN);
} else if (contents.size() > 1) {
log.warning(contents.size() + " contents in one entry");
}
for (final SyndContent content : contents) {
if (content.getType() == null) {
// TODO see if it's actually html
entry.setContentType(AjahMimeType.TEXT_PLAIN);
} else {
entry.setContentType(AjahMimeType.get(content.getType()));
}
entry.setContent(content.getValue());
if (!entry.getContentType().isText()) {
log.warning("Non-text type of content: " + content.getType());
}
}
if (syndEntry.getDescription() != null) {
AjahMimeType descriptionType = null;
if (syndEntry.getDescription().getType() == null) {
// TODO see if it's actually html
descriptionType = AjahMimeType.TEXT_PLAIN;
} else {
descriptionType = AjahMimeType.get(syndEntry.getDescription().getType());
}
switch(descriptionType) {
case TEXT_PLAIN:
entry.setDescription(HtmlUtils.toBodyHtml(syndEntry.getDescription().getValue()));
break;
case TEXT_HTML:
entry.setDescription(syndEntry.getDescription().getValue());
break;
default:
entry.setDescription(HtmlUtils.toBodyHtml(syndEntry.getDescription().getValue()));
log.warning("Non-text type of description: " + descriptionType + " [" + syndEntry.getDescription().getType() + "]");
}
}
@SuppressWarnings("unchecked") final List<SyndCategory> categories = syndEntry.getCategories();
if (!CollectionUtils.isEmpty(categories)) {
final StringBuilder categoryString = new StringBuilder();
categoryString.append("|");
for (final SyndCategory category : categories) {
categoryString.append(category.getName());
categoryString.append("|");
}
log.fine(categoryString.toString());
entry.setCategories(categoryString.toString());
}
if (StringUtils.isBlank(entry.getTitle())) {
if (!StringUtils.isBlank(entry.getDescription())) {
entry.setTitle(Jsoup.clean(StringUtils.truncate(entry.getDescription(), 100), Whitelist.simpleText()));
} else if (!StringUtils.isBlank(entry.getContent())) {
entry.setTitle(entry.getContent().substring(0, 100));
}
}
if (StringUtils.isBlank(entry.getContent()) && StringUtils.isBlank(entry.getDescription()) && StringUtils.isBlank(entry.getTitle())) {
log.warning("Title, contents and description are all null");
}
entry.setContentSha1(HashUtils.sha1Hex(entry.getContent() + entry.getDescription() + entry.getCategories()));
return entry;
}Example 28
| Project: NewSumServer-master File: Article.java View source code |
/**
* Cleans up extra whitespace from the given text
* @param sText the Text to cleanup
* @return the text without any extra whitespace
*/
private String cleanUp(String sText) {
if (sText != null) {
sText = Jsoup.clean(sText, Whitelist.none());
sText = sText.replaceAll("«|»", "");
sText = sText.replaceAll(""", "");
sText = sText.replaceAll(" ", "");
sText = sText.replaceAll(">", "");
sText = sText.replaceAll("&[lr]aquo;", "");
return sText;
} else {
return "";
}
}Example 29
| Project: ripme-master File: DeviantartRipper.java View source code |
/**
* Attempts to download description for image.
* Comes in handy when people put entire stories in their description.
* If no description was found, returns null.
* @param url The URL the description will be retrieved from
* @param page The gallery page the URL was found on
* @return A String[] with first object being the description, and the second object being image file name if found.
*/
@Override
public String[] getDescription(String url, Document page) {
if (isThisATest()) {
return null;
}
try {
// Fetch the image page
Response resp = Http.url(url).referrer(this.url).cookies(cookies).response();
cookies.putAll(resp.cookies());
// Try to find the description
Document documentz = resp.parse();
Element ele = documentz.select("div.dev-description").first();
if (ele == null) {
throw new IOException("No description found");
}
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
ele.select("br").append("\\n");
ele.select("p").prepend("\\n\\n");
String fullSize = null;
Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0);
if (!thumb.attr("data-super-full-img").isEmpty()) {
fullSize = thumb.attr("data-super-full-img");
String[] split = fullSize.split("/");
fullSize = split[split.length - 1];
} else {
String spanUrl = thumb.attr("href");
fullSize = jsonToImage(page, spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
if (fullSize != null) {
String[] split = fullSize.split("/");
fullSize = split[split.length - 1];
}
}
if (fullSize == null) {
return new String[] { Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)) };
}
fullSize = fullSize.substring(0, fullSize.lastIndexOf("."));
return new String[] { Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)), fullSize };
// TODO Make this not make a newline if someone just types \n into the description.
} catch (IOException ioe) {
logger.info("Failed to get description at " + url + ": '" + ioe.getMessage() + "'");
return null;
}
}Example 30
| Project: jHTML2Md-master File: HTML2Md.java View source code |
private static String parseDocument(Document dirtyDoc) {
indentation = -1;
String title = dirtyDoc.title();
Whitelist whitelist = Whitelist.relaxed();
Cleaner cleaner = new Cleaner(whitelist);
Document doc = cleaner.clean(dirtyDoc);
doc.outputSettings().escapeMode(EscapeMode.xhtml);
if (!title.trim().equals("")) {
return "# " + title + "\n\n" + getTextContent(doc);
} else {
return getTextContent(doc);
}
}Example 31
| Project: mbox_tools-master File: MessageParser.java View source code |
/**
* Parse given Message into Mail.
*
* @param message
* @param idsuffix This value gets appended to the message-id.
* @return
*/
public static Mail parse(Message message, /*Map<String, String> data,*/
String idsuffix) throws MessageParseException {
String author_name = null;
String author_email = null;
String[] to = null;
String subject_original = null;
String subject = null;
String date = null;
String message_id_original = null;
String message_id = null;
String[] references = null;
String in_reply_to = null;
String message_snippet = null;
String first_text_message = null;
String first_text_message_without_quotes = null;
String first_html_message = null;
String[] text_messages = null;
Integer text_messages_cnt = null;
String[] html_messages = null;
Integer html_messages_cnt = null;
MailAttachment[] message_attachments = null;
Integer message_attachments_cnt = null;
Map<String, Field> headers = getMessageHeaders(message);
boolean messageIdPresent = false;
for (String fieldName : headers.keySet()) {
Field f = headers.get(fieldName);
switch(MessageHeader.getValue(f.getName())) {
case FROM:
Author author = extractValue((MailboxListField) f);
author_name = author.name;
author_email = author.email;
break;
case TO:
List<String> tos = new ArrayList<>();
for (String recipient : extractValue((AddressListField) f)) {
tos.add(recipient);
}
to = tos.toArray(new String[tos.size()]);
break;
case SUBJECT:
subject_original = extractValue((UnstructuredField) f);
subject = normalizeSubject(subject_original);
break;
case DATE:
Date d = extractValue((DateTimeField) f);
if (d != null) {
date = defaultDatePrinter.print(d.getTime());
} else {
String mid = headers.get(MessageHeader.MESSAGE_ID.toString()).getBody();
log.warn("Unable to parse header field '{}' for message-id: '{}'", f, mid);
throw new MessageParseException("Unable to parsed a date field. Skipping message [" + mid + "]");
}
break;
case MESSAGE_ID:
String id = extractValue((UnstructuredField) f);
if (isNullOrEmpty(id)) {
throw new MessageParseException("Message-ID header is null or empty.");
}
message_id_original = id;
message_id = id;
if (!isNullOrEmpty(idsuffix)) {
message_id += idsuffix;
}
messageIdPresent = true;
break;
case REFERENCES:
List<String> _references = new ArrayList<>();
for (String value : extractValue((UnstructuredField) f).trim().split("\\s+")) {
_references.add(value);
}
references = _references.toArray(new String[_references.size()]);
break;
case IN_REPLY_TO:
in_reply_to = extractValue((UnstructuredField) f);
break;
}
}
if (!messageIdPresent)
throw new MessageParseException("Message-ID header not found.");
MessageBodyParser.MailBodyContent content;
try {
content = MessageBodyParser.parse(message);
} catch (IOException e) {
throw new MessageParseException(e);
}
String snippet = "";
if (content.getFirstTextContentWithoutQuotes() != null) {
snippet = content.getFirstTextContentWithoutQuotes();
} else if (content.getFirstTextContent() != null) {
snippet = content.getFirstTextContent();
} else if (content.getFirstHtmlContent() != null) {
snippet = Jsoup.parse(Jsoup.clean(content.getFirstHtmlContent(), Whitelist.relaxed())).text();
} else {
// TODO get text snippet from other fields
}
snippet = // index can be -1 if length = 0 !!!
snippet.substring(0, (snippet.length() > 250 ? 250 : (snippet.length() > 0 ? snippet.length() - 1 : 0))).replaceAll(// .replaceAll(">*", "")
"^>From", "From").replaceAll("\\s+", " ").trim();
message_snippet = snippet;
first_text_message = content.getFirstTextContent();
first_text_message_without_quotes = content.getFirstTextContentWithoutQuotes();
first_html_message = content.getFirstHtmlContent();
List<String> testMessages = new ArrayList<>();
for (String part : content.getTextMessages()) {
testMessages.add(part);
}
text_messages = testMessages.toArray(new String[testMessages.size()]);
text_messages_cnt = content.getTextMessages().size();
List<String> htmlMessages = new ArrayList<>();
for (String part : content.getHtmlMessages()) {
htmlMessages.add(part);
}
html_messages = htmlMessages.toArray(new String[htmlMessages.size()]);
html_messages_cnt = content.getHtmlMessages().size();
if (content.getAttachments().size() > 0) {
message_attachments_cnt = content.getAttachments().size();
message_attachments = content.getAttachments().toArray(new MailAttachment[message_attachments_cnt]);
} else {
message_attachments_cnt = 0;
}
return new Mail(message_id, message_id_original, to, subject_original, subject, author_name, author_email, date, in_reply_to, references, message_snippet, first_text_message, first_text_message_without_quotes, first_html_message, text_messages, text_messages_cnt, html_messages, html_messages_cnt, message_attachments, message_attachments_cnt);
}Example 32
| Project: solo-master File: CommentMgmtService.java View source code |
/**
* Checks the specified comment adding request.
*
* <p>
* XSS process (name, content) in this method.
* </p>
*
* @param requestJSONObject the specified comment adding request, for example, <pre>
* {
* "type": "", // "article"/"page"
* "oId": "",
* "commentName": "",
* "commentEmail": "",
* "commentURL": "",
* "commentContent": "",
* }
* </pre>
*
* @return check result, for example, <pre>
* {
* "sc": boolean,
* "msg": "" // Exists if "sc" equals to false
* }
* </pre>
*/
public JSONObject checkAddCommentRequest(final JSONObject requestJSONObject) {
final JSONObject ret = new JSONObject();
try {
ret.put(Keys.STATUS_CODE, false);
final JSONObject preference = preferenceQueryService.getPreference();
if (null == preference || !preference.optBoolean(Option.ID_C_COMMENTABLE)) {
ret.put(Keys.MSG, langPropsService.get("notAllowCommentLabel"));
return ret;
}
final String id = requestJSONObject.optString(Keys.OBJECT_ID);
final String type = requestJSONObject.optString(Common.TYPE);
if (Article.ARTICLE.equals(type)) {
final JSONObject article = articleRepository.get(id);
if (null == article || !article.optBoolean(Article.ARTICLE_COMMENTABLE)) {
ret.put(Keys.MSG, langPropsService.get("notAllowCommentLabel"));
return ret;
}
} else {
final JSONObject page = pageRepository.get(id);
if (null == page || !page.optBoolean(Page.PAGE_COMMENTABLE)) {
ret.put(Keys.MSG, langPropsService.get("notAllowCommentLabel"));
return ret;
}
}
String commentName = requestJSONObject.getString(Comment.COMMENT_NAME);
if (MAX_COMMENT_NAME_LENGTH < commentName.length() || MIN_COMMENT_NAME_LENGTH > commentName.length()) {
LOGGER.log(Level.WARN, "Comment name is too long[{0}]", commentName);
ret.put(Keys.MSG, langPropsService.get("nameTooLongLabel"));
return ret;
}
final String commentEmail = requestJSONObject.getString(Comment.COMMENT_EMAIL).trim().toLowerCase();
if (!Strings.isEmail(commentEmail)) {
LOGGER.log(Level.WARN, "Comment email is invalid[{0}]", commentEmail);
ret.put(Keys.MSG, langPropsService.get("mailInvalidLabel"));
return ret;
}
final String commentURL = requestJSONObject.optString(Comment.COMMENT_URL);
if (!Strings.isURL(commentURL) || StringUtils.contains(commentURL, "<")) {
LOGGER.log(Level.WARN, "Comment URL is invalid[{0}]", commentURL);
ret.put(Keys.MSG, langPropsService.get("urlInvalidLabel"));
return ret;
}
String commentContent = requestJSONObject.optString(Comment.COMMENT_CONTENT);
if (MAX_COMMENT_CONTENT_LENGTH < commentContent.length() || MIN_COMMENT_CONTENT_LENGTH > commentContent.length()) {
LOGGER.log(Level.WARN, "Comment conent length is invalid[{0}]", commentContent.length());
ret.put(Keys.MSG, langPropsService.get("commentContentCannotEmptyLabel"));
return ret;
}
ret.put(Keys.STATUS_CODE, true);
// name XSS process
commentName = Jsoup.clean(commentName, Whitelist.none());
requestJSONObject.put(Comment.COMMENT_NAME, commentName);
// content Markdown & XSS process
commentContent = Markdowns.toHTML(commentContent);
commentContent = Jsoup.clean(commentContent, Whitelist.relaxed());
// Emoji
commentContent = Emotions.toAliases(commentContent);
requestJSONObject.put(Comment.COMMENT_CONTENT, commentContent);
return ret;
} catch (final Exception e) {
LOGGER.log(Level.WARN, "Checks add comment request[" + requestJSONObject.toString() + "] failed", e);
ret.put(Keys.STATUS_CODE, false);
ret.put(Keys.MSG, langPropsService.get("addFailLabel"));
return ret;
}
}Example 33
| Project: commafeed-master File: FeedUtils.java View source code |
private static synchronized Whitelist buildWhiteList() { Whitelist whitelist = new Whitelist(); whitelist.addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "iframe", "img", "li", "ol", "p", "pre", "q", "small", "strike", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul"); whitelist.addAttributes("div", "dir"); whitelist.addAttributes("pre", "dir"); whitelist.addAttributes("code", "dir"); whitelist.addAttributes("table", "dir"); whitelist.addAttributes("p", "dir"); whitelist.addAttributes("a", "href", "title"); whitelist.addAttributes("blockquote", "cite"); whitelist.addAttributes("col", "span", "width"); whitelist.addAttributes("colgroup", "span", "width"); whitelist.addAttributes("iframe", "src", "height", "width", "allowfullscreen", "frameborder", "style"); whitelist.addAttributes("img", "align", "alt", "height", "src", "title", "width", "style"); whitelist.addAttributes("ol", "start", "type"); whitelist.addAttributes("q", "cite"); whitelist.addAttributes("table", "border", "bordercolor", "summary", "width"); whitelist.addAttributes("td", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "width"); whitelist.addAttributes("th", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "scope", "width"); whitelist.addAttributes("ul", "type"); whitelist.addProtocols("a", "href", "ftp", "http", "https", "magnet", "mailto"); whitelist.addProtocols("blockquote", "cite", "http", "https"); whitelist.addProtocols("img", "src", "http", "https"); whitelist.addProtocols("q", "cite", "http", "https"); whitelist.addEnforcedAttribute("a", "target", "_blank"); whitelist.addEnforcedAttribute("a", "rel", "noreferrer"); return whitelist; }
Example 34
| Project: enzymeportal-master File: SearchController.java View source code |
/**
* Processes the search request. When user enters a search text and presses
* the submit button the request is processed here.
*
* @param searchModel
* @param model
* @param searchTerm
* @param session
* @param searchId
* @param keywordType
* @param ec
* @param request
* @param response
* @return
*/
@RequestMapping(value = "/search", method = RequestMethod.POST)
public String postSearchResult(SearchModel searchModel, Model model, @RequestParam(required = false, value = "searchTerm") String searchTerm, @RequestParam(required = false, value = "ec") String ec, @RequestParam(required = false, value = "searchId") String searchId, @RequestParam(required = false, value = "keywordType") String keywordType, HttpSession session, HttpServletRequest request, HttpServletResponse response) {
String view = "error";
String searchKey = null;
SearchResults results = null;
response.setHeader("Access-Control-Allow-Origin", "*");
response.setHeader("Access-Control-Allow-Methods", "POST, GET, OPTIONS, DELETE");
response.setHeader("Access-Control-Max-Age", "3600");
response.setHeader("Access-Control-Allow-Headers", "x-requested-with");
try {
// See if it is already there, perhaps we are paginating:
Map<String, SearchResults> prevSearches = getPreviousSearches(session.getServletContext());
String modelSearchKey = getSearchKey(searchModel.getSearchparams());
searchKey = Jsoup.clean(modelSearchKey, Whitelist.basic());
results = prevSearches.get(searchKey);
if (results == null) {
// New search:
clearHistory(session);
switch(searchModel.getSearchparams().getType()) {
case KEYWORD:
//results = searchKeyword(searchModel.getSearchparams());
results = searchKeyword(ec, searchTerm, searchId, keywordType, ASSOCIATED_PROTEIN_LIMIT);
model.addAttribute(SEARCH_VIDEO, SEARCH_VIDEO);
//LOGGER.warn("keyword search=" + searchModel.getSearchparams().getText());
break;
case SEQUENCE:
//view = searchSequence(model, searchModel);
model.addAttribute(SEQUENCE_VIDEO, SEQUENCE_VIDEO);
break;
case COMPOUND:
results = searchCompound(model, searchModel);
break;
default:
}
}
if (results != null) {
// something to show
cacheSearch(session.getServletContext(), searchKey, results);
setLastSummaries(session, results.getSummaryentries());
searchModel.setSearchresults(results);
applyFilters(searchModel, request);
model.addAttribute("searchConfig", searchConfig);
model.addAttribute("searchModel", searchModel);
model.addAttribute("pagination", getPagination(searchModel));
request.setAttribute("searchTerm", searchModel.getSearchparams().getText());
clearHistory(session);
addToHistory(session, searchModel.getSearchparams().getType(), searchKey, searchId, keywordType);
view = "search";
}
} catch (Exception e) {
logger.error("one of the search params (Text or Sequence is :" + searchKey, e);
}
return view;
}Example 35
| Project: jw-community-master File: StringUtil.java View source code |
/**
* Removed all HTML tags not in the allowed map from the content
* @param content
* @param allowedTag
* @return
*/
public static String stripHtmlTag(String content, String[] allowedTag) {
if (content != null && !content.isEmpty()) {
Whitelist whitelist = Whitelist.none().addAttributes(":all", "style", "class", "title", "id", "src", "href", "target");
for (String tag : allowedTag) {
whitelist.addTags(tag);
}
java.lang.reflect.Field field = ReflectionUtils.findField(whitelist.getClass(), "protocols");
ReflectionUtils.makeAccessible(field);
ReflectionUtils.setField(field, whitelist, new HashMap());
content = Jsoup.clean(content, whitelist);
}
return content;
}Example 36
| Project: muikku-master File: WorkspaceForumRESTService.java View source code |
@POST
@Path("/workspaces/{WORKSPACEENTITYID}/forumAreas/{AREAID}/threads")
@RESTPermit(handling = Handling.INLINE)
public Response createThread(@PathParam("WORKSPACEENTITYID") Long workspaceEntityId, @PathParam("AREAID") Long areaId, ForumThreadRESTModel newThread) {
WorkspaceEntity workspaceEntity = workspaceEntityController.findWorkspaceEntityById(workspaceEntityId);
if (workspaceEntity == null) {
return Response.status(Status.NOT_FOUND).entity(String.format("Workspace entity %d not found", workspaceEntityId)).build();
}
ForumArea forumArea = forumController.getForumArea(areaId);
if (forumArea == null) {
return Response.status(Status.NOT_FOUND).entity("Forum area not found").build();
}
if (!(forumArea instanceof WorkspaceForumArea)) {
logger.severe(String.format("Trying to access forum %d via incorrect REST endpoint", forumArea.getId()));
return Response.status(Status.NOT_FOUND).build();
}
if (!workspaceEntity.getId().equals(((WorkspaceForumArea) forumArea).getWorkspace())) {
return Response.status(Status.NOT_FOUND).entity(String.format("WorkspaceForumArea %d does not belong to workspace entity %d", forumArea.getId(), workspaceEntity.getId())).build();
}
if (sessionController.hasWorkspacePermission(ForumResourcePermissionCollection.FORUM_WRITE_WORKSPACE_MESSAGES, workspaceEntity)) {
if (Boolean.TRUE.equals(newThread.getSticky()) || Boolean.TRUE.equals(newThread.getLocked())) {
if (!sessionController.hasWorkspacePermission(ForumResourcePermissionCollection.FORUM_LOCK_OR_STICKIFY_WORKSPACE_MESSAGES, workspaceEntity))
return Response.status(Status.BAD_REQUEST).build();
}
Document message = Jsoup.parse(Jsoup.clean(newThread.getMessage(), Whitelist.relaxed().addAttributes("a", "target")));
message.outputSettings().escapeMode(EscapeMode.xhtml);
message.select("a[target]").attr("rel", "noopener noreferer");
ForumThread thread = forumController.createForumThread(forumArea, newThread.getTitle(), message.body().toString(), newThread.getSticky(), newThread.getLocked());
ForumThreadRESTModel result = new ForumThreadRESTModel(thread.getId(), thread.getTitle(), thread.getMessage(), thread.getCreator(), thread.getCreated(), thread.getForumArea().getId(), thread.getSticky(), thread.getLocked(), thread.getUpdated(), 1l, thread.getLastModified());
return Response.ok(result).build();
} else {
return Response.status(Status.FORBIDDEN).build();
}
}Example 37
| Project: streamflow-core-master File: CreateCaseFromEmailService.java View source code |
public void receivedEmail(ApplicationEvent event, EmailValue email) {
UnitOfWork uow = module.unitOfWorkFactory().newUnitOfWork(UsecaseBuilder.newUsecase("Create case from email"));
try {
String references = email.headers().get().get("References");
if (!hasStreamflowReference(references)) {
Organizations.Data organizations = uow.get(Organizations.Data.class, OrganizationsEntity.ORGANIZATIONS_ID);
Organization organization = organizations.organization().get();
EmailAccessPoint ap = null;
try {
ap = organization.getEmailAccessPoint(email.to().get());
} catch (IllegalArgumentException e) {
ValueBuilder<EmailValue> builder = module.valueBuilderFactory().newValueBuilder(EmailValue.class).withPrototype(email);
String subj = "Unknown accesspoint: " + builder.prototype().to().get() + " - " + builder.prototype().subject().get();
builder.prototype().subject().set(subj.length() > 50 ? subj.substring(0, 50) : subj);
systemDefaults.createCaseOnEmailFailure(builder.newInstance());
uow.discard();
return;
}
if (ap != null && hasAutoReplyHeader(email.headers().get())) {
// Possible mail loop - auto reply header present but no References - create support case.
ValueBuilder<EmailValue> builder = module.valueBuilderFactory().newValueBuilder(EmailValue.class).withPrototype(email);
String subj = "Possible Mail Loop: " + builder.prototype().to().get() + " - " + builder.prototype().subject().get();
builder.prototype().subject().set(subj.length() > 50 ? subj.substring(0, 50) : subj);
systemDefaults.createCaseOnEmailFailure(builder.newInstance());
uow.discard();
return;
}
Drafts user = systemDefaults.getUser(email);
ConversationParticipant participant = (ConversationParticipant) user;
RoleMap.newCurrentRoleMap();
RoleMap.current().set(organization);
RoleMap.current().set(ap);
RoleMap.current().set(user);
CaseEntity caze = ap.createCase(user);
RoleMap.current().set(caze);
caze.caselog().get().addTypedEntry("{accesspoint,description=" + ap.getDescription() + "}", CaseLogEntryTypes.system);
// STREAMFLOW-714
String subject = email.subject().get();
caze.changeDescription(subject.length() > 50 ? subject.substring(0, 50) : subject);
if (Translator.HTML.equalsIgnoreCase(email.contentType().get())) {
caze.addNote(Jsoup.clean(email.content().get(), Whitelist.basic()), Translator.HTML);
//caze.addNote( Translator.cleanHtml( email.content().get() ), Translator.HTML );
} else {
caze.addNote(email.content().get(), Translator.PLAIN);
}
// Create conversation
Conversation conversation = caze.createConversation(email.subject().get(), (Creator) user);
Message message = null;
if (Translator.HTML.equalsIgnoreCase(email.contentType().get())) {
message = conversation.createMessage(Translator.cleanHtml(email.content().get()), MessageType.HTML, participant);
} else {
message = conversation.createMessage(email.content().get(), MessageType.PLAIN, participant);
}
// Create attachments
for (AttachedFileValue attachedFileValue : email.attachments().get()) {
if (attachedFileValue.mimeType().get().contains("text/x-vcard") || attachedFileValue.mimeType().get().contains("text/directory")) {
addVCardAsContact((Contactable.Data) user, attachedFileValue);
} else {
Attachment attachment = conversation.createAttachment(attachedFileValue.uri().get());
attachment.changeName(attachedFileValue.name().get());
attachment.changeMimeType(attachedFileValue.mimeType().get());
attachment.changeModificationDate(attachedFileValue.modificationDate().get());
attachment.changeSize(attachedFileValue.size().get());
attachment.changeUri(attachedFileValue.uri().get());
message.addAttachment(attachment);
// remove attachment from conversation attachments data so AttachmentEntity does not get
// removed for real - we just moved it to message attachments where it actually belongs after
// message creation.
((Attachments.Data) conversation).attachments().remove(attachment);
}
}
// Add contact info
caze.updateContact(0, ((Contactable.Data) user).contact().get());
// Open the case
ap.sendTo(caze);
}
System.out.println("CreateCaseFromEmailService before uow complete");
uow.complete();
System.out.println("CreateCaseFromEmailService after uow complete");
} catch (Exception ex) {
ValueBuilder<EmailValue> builder = module.valueBuilderFactory().newValueBuilder(EmailValue.class).withPrototype(email);
String subj = "General error: " + builder.prototype().to().get() + " - " + builder.prototype().subject().get();
builder.prototype().subject().set(subj.length() > 50 ? subj.substring(0, 50) : subj);
systemDefaults.createCaseOnEmailFailure(builder.newInstance());
uow.discard();
throw new ApplicationEventReplayException(event, ex);
} finally {
RoleMap.clearCurrentRoleMap();
}
}Example 38
| Project: jeboorker-master File: EbookTableCellRenderer.java View source code |
/**
* Remove all html tags an decode entities.
* @param toClean The text to be cleaned.
* @return
*/
private String cleanString(String toClean) {
if (toClean == null) {
return EMPTY;
}
if (toClean.indexOf('&') != -1) {
toClean = new HTMLEntityConverter(toClean, HTMLEntityConverter.ENCODE_EIGHT_BIT_ASCII).decodeEntities();
}
if (toClean.indexOf('<') != -1) {
toClean = Jsoup.clean(toClean, Whitelist.none());
}
return toClean.trim();
}Example 39
| Project: TGFC-master File: NetworkUtils.java View source code |
public static void contentListParser(String html, ContentListPageData pageData, int tid, String referer) {
List<ContentListItemData> dataList = pageData.dataList;
Document htmlDoc = Jsoup.parse(html, referer);
Elements lightMessageEmelents = htmlDoc.select(".lightmessage");
lightMessageEmelents.remove();
Elements messageElements = htmlDoc.select(".message");
Elements infobarElements = htmlDoc.select(".infobar");
int messageStart = 0;
Pattern mainPostPattern = Pattern.compile("æ ‡é¢˜:<b>(.+)<\\/b><br \\/>æ—¶é—´:(.+)<br \\/>作者:<a href=\".+uid=(\\d+).*\">(?:<b>)?(.+?)(<\\/b>)?<\\/a>");
Matcher mainPostMatcher = mainPostPattern.matcher(html);
Pattern urlReplacePattern = Pattern.compile("<a\\s*.*?href=\"(.*?)\"\\s*.*?>.*?\\s\\.\\.\\.\\s.*?<\\/a>");
if (mainPostMatcher.find()) {
messageStart++;
ContentListItemData itemData = new ContentListItemData();
itemData.floorNum = 1;
itemData.posterTime = mainPostMatcher.group(2);
itemData.posterUID = Integer.parseInt(mainPostMatcher.group(3));
itemData.posterName = mainPostMatcher.group(4);
itemData.canEdit = mainPostMatcher.group(5) != null;
Pattern ratingPattern = Pattern.compile("评分记录\\(.+?=(\\d+)\\)");
Matcher ratingMatcher = ratingPattern.matcher(html);
if (ratingMatcher.find()) {
itemData.ratings = Integer.parseInt(ratingMatcher.group(1));
}
Pattern pidPattern = Pattern.compile("作者:<a href=\".*?pid=(\\d+)[^\\>]*?>");
Matcher pidMatcher = pidPattern.matcher(html);
if (pidMatcher.find()) {
itemData.pid = Integer.parseInt(pidMatcher.group(1));
}
itemData.mainText = messageElements.get(0).html();
extractPlatform(itemData);
dataList.add(itemData);
}
for (int i = messageStart, j = 0; i < messageElements.size(); i++, j++) {
ContentListItemData itemData = new ContentListItemData();
Element msgElement = messageElements.get(i);
Element barElement = infobarElements.get(j);
String infoString = StringEscapeUtils.unescapeHtml(barElement.html());
Pattern barPattern = Pattern.compile("<a href=\".*?pid=(\\d+).*?>#(\\d+)[\\s\\S]*?<a href=\".*?uid=(\\d+).*?>(?:<b>)?(.+?)(?:<\\/b>)?<\\/a>[\\s\\S]*?(?:骚\\((\\d+)\\)[\\s\\S]*?)?<span class=\"nf\">(?:<font \\S*>)? (?:<b>)?(.*?)(<\\/b>)?(?:<\\/font>)?<\\/span>");
Matcher barMatcher = barPattern.matcher(infoString);
if (barMatcher.find()) {
itemData.pid = Integer.parseInt(barMatcher.group(1));
itemData.floorNum = Integer.parseInt(barMatcher.group(2));
itemData.posterUID = Integer.parseInt(barMatcher.group(3));
itemData.posterName = barMatcher.group(4);
if (barMatcher.group(5) != null) {
itemData.ratings = Integer.parseInt(barMatcher.group(5));
}
itemData.posterTime = barMatcher.group(6);
itemData.canEdit = barMatcher.group(7) != null;
}
Elements quotedElements = msgElement.select(".quote-bd");
if (quotedElements.size() > 0) {
String quoteString = quotedElements.get(0).html();
String divider = "<br>";
int t = quoteString.indexOf(divider);
if (t != -1) {
itemData.quotedInfo = quoteString.substring(0, t);
itemData.quotedInfo = cleanQuote(itemData.quotedInfo);
itemData.quotedText = quoteString.substring(t + divider.length());
itemData.quotedText = getPlainText(cleanText(itemData.quotedText)).trim();
}
msgElement.select(".ui-topic-content").remove();
}
itemData.mainText = msgElement.html();
extractPlatform(itemData);
dataList.add(itemData);
}
Pattern imgURLPattern = Pattern.compile("<img\\s*[^>]*?src\\s*=\\s*['\\\"]([^'\\\"]*?)['\\\"][^>]*?\\s*\\/?>");
List<String> imgURLList = new ArrayList<String>();
for (int i = 0; i < dataList.size(); i++) {
ContentListItemData itemData = dataList.get(i);
// itemData.mainText = Jsoup.clean(itemData.mainText, Whitelist.basicWithImages());
Matcher urlReplaceMatcher = urlReplacePattern.matcher(itemData.mainText);
// Log.w("Matcher", String.valueOf(urlReplaceMatcher.find()));
itemData.mainText = urlReplaceMatcher.replaceAll("<a href=\"$1\">$1</a>");
}
for (int i = 0; i < dataList.size(); i++) {
String itemHTML = dataList.get(i).mainText;
Matcher imgURLMatcher = imgURLPattern.matcher(itemHTML);
while (imgURLMatcher.find()) {
imgURLList.add(imgURLMatcher.group(1));
}
}
pageData.imgURLList = imgURLList;
}Example 40
| Project: tika-wrapper-master File: TikaWrapper.java View source code |
private void processWithPdfToText(InputStream input) {
File tempFile = null;
File tempFile2 = null;
try {
if (input != null && pdfToTextPath != null && !"".equals(pdfToTextPath)) {
// Get a local copy of the file
tempFile = createTempFile("tmp", ".pdf", tmpPath);
if (!writeToFile(tempFile, input))
return;
meta2 = new HashMap<String, String>();
meta2.put(META_CONTENTSIZE, String.valueOf(tempFile.length()));
tempFile2 = createTempFile("tmp", ".html", tmpPath);
Shell sh = new Shell();
// Convert with PDFTOTEXT - pdftotext -enc UTF-8 -raw -q -htmlmeta -eol unix in.pdf out.html
sh.exec(pdfToTextPath, "-enc", "UTF-8", "-raw", "-q", "-htmlmeta", "-eol", "unix", tempFile.getAbsolutePath(), tempFile2.getAbsolutePath()).consumeAsString();
tempFile.delete();
// Load in string and add the <meta http-equiv='Content-Type' content='text/html; charset=utf-8'> line
InputStreamReader fr1 = new InputStreamReader(new FileInputStream(tempFile2), "UTF-8");
BufferedReader br1 = new BufferedReader(fr1);
StringBuilder sb = new StringBuilder();
while (br1.ready()) {
String line = br1.readLine();
sb.append(line).append("\n");
if ("</head>".equals(line)) {
sb.append("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>").append("\n");
}
}
br1.close();
tempFile2.delete();
meta2.put(META_CONTENTTYPE, CONTENT_TYPE_PDF);
text = sb.toString();
Document doc = Jsoup.parse(text);
if (doc != null) {
meta2.put(META_TITLE, doc.select("title").text());
meta2.put(META_AUTHOR, getMetaContent(doc, "Author"));
String creationDate = getMetaContent(doc, "CreationDate");
if (creationDate != null) {
// 20130322143113Z00'00' -> 2013-03-22T14:31:13Z
Pattern p = Pattern.compile("[0-9]{14}Z[0-9]{2}'[0-9]{2}'");
Matcher m = p.matcher(creationDate);
if (m.find()) {
String value = String.format("%1$s-%2$s-%3$sT%4$s:%5$s:%6$sZ", creationDate.substring(0, 4), creationDate.substring(4, 6), creationDate.substring(6, 8), creationDate.substring(8, 10), creationDate.substring(10, 12), creationDate.substring(12, 14));
meta2.put(META_CREATED, value);
} else {
// 20130322143113+02'00' -> 2013-03-22T14:31:13Z
p = Pattern.compile("[0-9]{14}\\+[0-9]{2}'[0-9]{2}'");
m = p.matcher(creationDate);
if (m.find()) {
String value = String.format("%1$s-%2$s-%3$sT%4$s:%5$s:%6$sZ", creationDate.substring(0, 4), creationDate.substring(4, 6), creationDate.substring(6, 8), creationDate.substring(8, 10), creationDate.substring(10, 12), creationDate.substring(12, 14));
meta2.put(META_CREATED, value);
}
}
}
if (OUTPUT_FORMAT_TEXT.equals(outputFormat)) {
Document doc2 = new Cleaner(Whitelist.basic()).clean(doc);
text = doc2.body().text();
}
}
}
} catch (Exception e) {
if (tempFile != null && tempFile.exists())
tempFile.delete();
if (tempFile2 != null && tempFile2.exists())
tempFile2.delete();
e.printStackTrace();
text = null;
meta2 = null;
}
}Example 41
| Project: ttr-master File: ArticleFragment.java View source code |
@SuppressLint({ "SetJavaScriptEnabled", "AddJavascriptInterface" })
private void doRefresh() {
if (webView == null)
return;
try {
ProgressBarManager.getInstance().addProgress((MenuActivity) getActivity());
if (Controller.getInstance().workOffline() || !Controller.getInstance().loadMedia()) {
webView.getSettings().setCacheMode(WebSettings.LOAD_CACHE_ONLY);
} else {
webView.getSettings().setCacheMode(WebSettings.LOAD_DEFAULT);
}
if (!Controller.getInstance().loadMedia() && Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
webView.getSettings().setMediaPlaybackRequiresUserGesture(false);
// No need to reload everything
if (webviewInitialized)
return;
// Check for errors
if (Controller.getInstance().getConnector().hasLastError()) {
Intent i = new Intent(getActivity(), ErrorActivity.class);
i.putExtra(ErrorActivity.ERROR_MESSAGE, Controller.getInstance().getConnector().pullLastError());
startActivityForResult(i, ErrorActivity.ACTIVITY_SHOW_ERROR);
return;
}
StringBuilder labels = new StringBuilder();
for (Label label : article.labels) {
if (label.checked) {
if (labels.length() > 0)
labels.append(", ");
String labelString = label.caption;
if (label.foregroundColor != null && label.backgroundColor != null)
labelString = String.format(LABEL_COLOR_STRING, label.foregroundColor, label.backgroundColor, label.caption);
labels.append(labelString);
}
}
// Remove all html tags and content that doesn't meet this set of allowed stuff
final String contentClean;
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
contentClean = article.content;
else
contentClean = Jsoup.clean(article.content, Whitelist.relaxed());
// Load html from Controller and insert content// Article-Prefetch-Stuff from Raw-Ressources and System
ST htmlTmpl = new ST(getString(R.string.HTML_TEMPLATE), '$', '$');
// Styles
if (Controller.getInstance().allowHyphenation()) {
ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
htmlTmpl.add("HYPHENATION", javascriptST.render());
}
// Replace alignment-marker: align:left or align:justify
ST stylesST = new ST(getString(R.string.STYLE_TEMPLATE), '$', '$');
if (Controller.getInstance().alignFlushLeft()) {
stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_LEFT));
} else {
stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_JUSTIFY));
}
htmlTmpl.add("STYLE", stylesST.render());
// General values
htmlTmpl.add("THEME", getResources().getString(Controller.getInstance().getThemeHTML()));
htmlTmpl.add("CACHE_DIR", Controller.getInstance().cacheFolder());
htmlTmpl.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
// Special values for this article
htmlTmpl.add("article", article);
htmlTmpl.add("feed", feed);
htmlTmpl.add("CACHED_IMAGES", cachedImages);
htmlTmpl.add("LABELS", labels.toString());
htmlTmpl.add("UPDATED", DateUtils.getDateTimeCustom(getActivity(), article.updated));
htmlTmpl.add("ATTACHMENTS", getAttachmentsMarkup(article.attachments));
htmlTmpl.add("CONTENT", contentClean);
// Hyphenation Javascript
if (Controller.getInstance().allowHyphenation()) {
ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
htmlTmpl.add("HYPHENATION", javascriptST.render());
}
// Navigation buttons
if (Controller.getInstance().showButtonsMode() == Constants.SHOW_BUTTONS_MODE_HTML) {
htmlTmpl.add("NAVIGATION", getString(R.string.BOTTOM_NAVIGATION_TEMPLATE));
}
// Note of the article
if (article.note != null && article.note.length() > 0) {
ST noteST = new ST(getResources().getString(R.string.NOTE_TEMPLATE), '$', '$');
noteST.add("NOTE", getResources().getString(R.string.Commons_HtmlPrefixNote) + " " + article.note);
htmlTmpl.add("NOTE_TEMPLATE", noteST.render());
}
content = htmlTmpl.render();
/* JavaScript should be safe since we use JSoup to remove all unwanted stuff from article.content */
webView.getSettings().setJavaScriptEnabled(true);
webView.addJavascriptInterface(articleJSInterface, "articleController");
webView.loadDataWithBaseURL("file:///android_asset/", content, "text/html", "utf-8", null);
if (!linkAutoOpened && article.content.length() < 3) {
if (Controller.getInstance().openUrlEmptyArticle()) {
Log.i(TAG, "Article-Content is empty, opening URL in browser");
linkAutoOpened = true;
openLink();
}
}
// Everything did load, we dont have to do this again.
webviewInitialized = true;
} catch (Exception e) {
Log.w(TAG, e.getClass().getSimpleName() + " in doRefresh(): " + e.getMessage() + " (" + e.getCause() + ")", e);
} finally {
ProgressBarManager.getInstance().removeProgress((MenuActivity) getActivity());
}
}Example 42
| Project: ttrss-reader-fork-master File: ArticleFragment.java View source code |
@SuppressLint({ "SetJavaScriptEnabled", "AddJavascriptInterface" })
private void doRefresh() {
if (webView == null)
return;
try {
ProgressBarManager.getInstance().addProgress((MenuActivity) getActivity());
if (Controller.getInstance().workOffline() || !Controller.getInstance().loadMedia()) {
webView.getSettings().setCacheMode(WebSettings.LOAD_CACHE_ONLY);
} else {
webView.getSettings().setCacheMode(WebSettings.LOAD_DEFAULT);
}
if (!Controller.getInstance().loadMedia() && Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
webView.getSettings().setMediaPlaybackRequiresUserGesture(false);
// No need to reload everything
if (webviewInitialized)
return;
// Check for errors
if (Controller.getInstance().getConnector().hasLastError()) {
Intent i = new Intent(getActivity(), ErrorActivity.class);
i.putExtra(ErrorActivity.ERROR_MESSAGE, Controller.getInstance().getConnector().pullLastError());
startActivityForResult(i, ErrorActivity.ACTIVITY_SHOW_ERROR);
return;
}
StringBuilder labels = new StringBuilder();
for (Label label : article.labels) {
if (label.checked) {
if (labels.length() > 0)
labels.append(", ");
String labelString = label.caption;
if (label.foregroundColor != null && label.backgroundColor != null)
labelString = String.format(LABEL_COLOR_STRING, label.foregroundColor, label.backgroundColor, label.caption);
labels.append(labelString);
}
}
// Remove all html tags and content that doesn't meet this set of allowed stuff
final String contentClean;
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN_MR1)
contentClean = article.content;
else
contentClean = Jsoup.clean(article.content, Whitelist.relaxed());
// Load html from Controller and insert content// Article-Prefetch-Stuff from Raw-Ressources and System
ST htmlTmpl = new ST(getString(R.string.HTML_TEMPLATE), '$', '$');
// Styles
if (Controller.getInstance().allowHyphenation()) {
ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
htmlTmpl.add("HYPHENATION", javascriptST.render());
}
// Replace alignment-marker: align:left or align:justify
ST stylesST = new ST(getString(R.string.STYLE_TEMPLATE), '$', '$');
if (Controller.getInstance().alignFlushLeft()) {
stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_LEFT));
} else {
stylesST.add("TEXT_ALIGN", getString(R.string.ALIGN_JUSTIFY));
}
htmlTmpl.add("STYLE", stylesST.render());
// General values
htmlTmpl.add("THEME", getResources().getString(Controller.getInstance().getThemeHTML()));
htmlTmpl.add("CACHE_DIR", Controller.getInstance().cacheFolder());
htmlTmpl.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
// Special values for this article
htmlTmpl.add("article", article);
htmlTmpl.add("feed", feed);
htmlTmpl.add("CACHED_IMAGES", cachedImages);
htmlTmpl.add("LABELS", labels.toString());
htmlTmpl.add("UPDATED", DateUtils.getDateTimeCustom(getActivity(), article.updated));
htmlTmpl.add("ATTACHMENTS", getAttachmentsMarkup(article.attachments));
htmlTmpl.add("CONTENT", contentClean);
// Hyphenation Javascript
if (Controller.getInstance().allowHyphenation()) {
ST javascriptST = new ST(getString(R.string.JAVASCRIPT_HYPHENATION_TEMPLATE), '$', '$');
javascriptST.add("LANGUAGE", Controller.getInstance().hyphenationLanguage());
htmlTmpl.add("HYPHENATION", javascriptST.render());
}
// Navigation buttons
if (Controller.getInstance().showButtonsMode() == Constants.SHOW_BUTTONS_MODE_HTML) {
htmlTmpl.add("NAVIGATION", getString(R.string.BOTTOM_NAVIGATION_TEMPLATE));
}
// Note of the article
if (article.note != null && article.note.length() > 0) {
ST noteST = new ST(getResources().getString(R.string.NOTE_TEMPLATE), '$', '$');
noteST.add("NOTE", getResources().getString(R.string.Commons_HtmlPrefixNote) + " " + article.note);
htmlTmpl.add("NOTE_TEMPLATE", noteST.render());
}
content = htmlTmpl.render();
/* JavaScript should be safe since we use JSoup to remove all unwanted stuff from article.content */
webView.getSettings().setJavaScriptEnabled(true);
webView.addJavascriptInterface(articleJSInterface, "articleController");
webView.loadDataWithBaseURL("file:///android_asset/", content, "text/html", "utf-8", null);
if (!linkAutoOpened && article.content.length() < 3) {
if (Controller.getInstance().openUrlEmptyArticle()) {
Log.i(TAG, "Article-Content is empty, opening URL in browser");
linkAutoOpened = true;
openLink();
}
}
// Everything did load, we dont have to do this again.
webviewInitialized = true;
} catch (Exception e) {
Log.w(TAG, e.getClass().getSimpleName() + " in doRefresh(): " + e.getMessage() + " (" + e.getCause() + ")", e);
} finally {
ProgressBarManager.getInstance().removeProgress((MenuActivity) getActivity());
}
}Example 43
| Project: Portofino-master File: AbstractCrudAction.java View source code |
//**************************************************************************
// Form handling
//**************************************************************************
/**
* Writes the contents of the create or edit form into the persistent object.
* Assumes that the form has already been validated.
* Also processes rich-text (HTML) fields by cleaning the submitted HTML according
* to the {@link #getWhitelist() whitelist}.
*/
protected void writeFormToObject() {
form.writeToObject(object);
for (TextField textField : FormUtil.collectEditableRichTextFields(form)) {
//TODO in bulk edit mode, the field should be skipped altogether if the checkbox is not checked.
PropertyAccessor propertyAccessor = textField.getPropertyAccessor();
String stringValue = (String) propertyAccessor.get(object);
String cleanText;
try {
Whitelist whitelist = getWhitelist();
cleanText = Jsoup.clean(stringValue, whitelist);
} catch (Throwable t) {
logger.error("Could not clean HTML, falling back to escaped text", t);
cleanText = StringEscapeUtils.escapeHtml(stringValue);
}
propertyAccessor.set(object, cleanText);
}
}Example 44
| Project: email-master File: HtmlHelper.java View source code |
public static String extractText(String html) {
return Jsoup.clean(html, Whitelist.none());
}Example 45
| Project: k-9-master File: HtmlHelper.java View source code |
public static String extractText(String html) {
return Jsoup.clean(html, Whitelist.none());
}Example 46
| Project: NoticEditor-master File: HtmlImportMode.java View source code |
public Whitelist getWhitelist() {
return whitelist;
}Example 47
| Project: iis-master File: HtmlToPlaintextIngester.java View source code |
private static String cleanNoMarkup(String input) {
return Jsoup.clean(input, "", Whitelist.none(), outputSettings).replace(" ", "");
}Example 48
| Project: new-android-daisy-reader-master File: FullText.java View source code |
/**
* Returns the inner HTML for a given smilReference.
*
* @param smilReference the reference e.g. "id_224"
*/
public String getHtmlFor(String reference) {
String contents = documentContents.getElementById(reference).html();
return Jsoup.clean(contents, Whitelist.simpleText());
}Example 49
| Project: btpka3.github.com-master File: Test123.java View source code |
public static void testJsoup() {
String safe = Jsoup.clean(drityInput, Whitelist.basic());
System.out.println(safe);
}Example 50
| Project: lutece-core-master File: JSoupHtmlCleaner.java View source code |
/**
* {@inheritDoc}
*/
@Override
public String clean(String strSource) throws HtmlCleanerException {
String strSafe = Jsoup.clean(strSource, Whitelist.relaxed());
return strSafe;
}Example 51
| Project: smartly-master File: HtmlParser.java View source code |
public Document cleanBasic() {
final Cleaner cleaner = new Cleaner(Whitelist.basic());
return cleaner.clean(_document);
}Example 52
| Project: structured-content-tools-master File: StripHtmlPreprocessor.java View source code |
protected String stripHtml(String value) {
if (value == null || value.trim().isEmpty())
return value;
Document doc = Jsoup.parse(Jsoup.clean(value, Whitelist.relaxed()));
return convertNodeToText(doc.body());
}Example 53
| Project: CN1ML-NetbeansModule-master File: Jsoup.java View source code |
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}Example 54
| Project: FudanBBS-master File: Jsoup.java View source code |
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}Example 55
| Project: jsoup-master File: Jsoup.java View source code |
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}Example 56
| Project: validadorAcessibilidade-master File: Jsoup.java View source code |
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}Example 57
| Project: zafu_jwc-master File: Jsoup.java View source code |
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}Example 58
| Project: ese2010-team3-master File: Tools.java View source code |
/**
* Convert Markdown to HTML content (in an amazingly unoptimized way)
*
* @param content
* some Markdown content
* @return that content in plain and sanitized HTML (XSS safe!)
*/
public static String markdownToHtml(String content) {
// Markdown processor for content that's already HTML
if (content.startsWith("<h3>"))
return Jsoup.clean(content, Whitelist.basic());
return Jsoup.clean(new PegDownProcessor().markdownToHtml(content), Whitelist.basic());
}Example 59
| Project: SmartAndroidSource-master File: SmartTag.java View source code |
/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.
@param bodyHtml input untrusted HTML (body fragment)
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML (body fragment)
@see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
return clean.body().html();
}Example 60
| Project: activeweb-master File: HttpSupport.java View source code |
/**
* Cleans HTML from harmful tags, making XSS impossible.
* <p>For example, input like this:</p>
*
* <pre>
* <html><script> alert('hello');</script><div>this is a clean part</div></html>
* </pre>
*
* Will produce output like this:
*
* <pre>
* this is a clean part
* </pre>
*
* @param unsafeContent unsafe content. Something that an end user typed into a text area, or input that may include
* a script tag or other garbage.
* @return sanitized version of input
*/
protected String sanitize(String unsafeContent) {
return Jsoup.clean(unsafeContent, Whitelist.basic());
}Example 61
| Project: agile-itsm-master File: SolicitacaoServicoDTO.java View source code |
public void setDescricaoForTitle(String descricaoForTitle) {
if (descricaoForTitle != null) {
this.descricaoForTitle = Jsoup.clean(descricaoForTitle, Whitelist.none());
}
}Example 62
| Project: crushpaper-master File: Servlet.java View source code |
@Override protected Whitelist initialValue() { return Whitelist.relaxed().addEnforcedAttribute("a", "rel", "nofollow").addEnforcedAttribute("a", "target", "_blank"); }