/**
* Copyright (C) 2011 JTalks.org Team
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.jtalks.jcommune.service.bb2htmlprocessors;
import com.google.common.annotations.VisibleForTesting;
import org.springframework.web.context.request.RequestAttributes;
import org.springframework.web.context.request.RequestContextHolder;
import org.springframework.web.context.request.ServletRequestAttributes;
import javax.servlet.http.HttpServletRequest;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* PostProcessor for bb2html which adds attribute rel="nofollow" to foreign links. It's done in order to keep our Google
* Rating higher.
* <p/>
* Note: Nofollow is an attribute that can be added to links to discourage Comment Spam. It is used with the rel=" "
* attribute in a link. By default, posting links generates no positive benefit for the poster in terms of PageRank
* (or other search engine value) the spammers will be dissuaded from wasting their time.
*
* @author Andrey Pogorelov
* @see <a href="http://jira.jtalks.org/browse/JC-1421">JIRA</a> for more details.
*/
public class BBForeignLinksPostprocessor implements TextPostProcessor {
private static final String URL_PATTERN = "(<a .*?href=(\"|').*?(\"|')|<img .*?src=(\"|').*?(\"|'))";
/**
* Process incoming text with adding prefix "/out" to foreign links. This prefix
* will be excluded from indexing by search engines (robots.txt)
*
* @return resultant text
*/
@Override
public String postProcess(String bbDecodedText) {
HttpServletRequest httpServletRequest = getServletRequest();
return addPrefixToForeignLinks(bbDecodedText, httpServletRequest.getServerName());
}
private String addPrefixToForeignLinks(String decodedText, String serverName) {
Pattern linkPattern = Pattern.compile(URL_PATTERN, Pattern.DOTALL);
Matcher linkMatcher = linkPattern.matcher(decodedText);
String href;
String encoded;
while (linkMatcher.find()) {
href = linkMatcher.group();
encoded = href.replaceAll(" ", "%20");
if (!href.contains(serverName) && href.split("(http|ftp|https)://", 2).length == 2
&& href.startsWith("<a")) {
decodedText = decodedText.replace(href,
encoded.replaceFirst("<a.*href=\"", "<a rel=\"nofollow\" href=\"" + getHrefPrefix()));
} else if(href.startsWith("<a")){
decodedText = decodedText.replace(href,
encoded.replaceFirst("<a.*href=\"", "<a href=\""));
} else if(href.startsWith("<img")) {
decodedText = decodedText.replace(href,
encoded.replaceFirst("<img.*src=\"", "<img alt=\" \" class=\"thumbnail\" src=\""));
}
}
return decodedText;
}
/**
* Gets current request
*
* @return native {@link javax.servlet.http.HttpServletRequest}
*/
@VisibleForTesting
protected HttpServletRequest getServletRequest() {
RequestAttributes attributes = RequestContextHolder.currentRequestAttributes();
return ((ServletRequestAttributes) attributes).getRequest();
}
/**
* Gets prefix to add href
*
* @return prefix
*/
@VisibleForTesting
protected String getHrefPrefix() {
return "/out?url=";
}
}