/* This code is part of Freenet. It is distributed under the GNU General
* Public License, version 2 (or at your option any later version). See
* http://www.gnu.org/ for further details of the GPL. */
package freenet.client.filter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.regex.Pattern;
import freenet.client.filter.HTMLFilter.ParsedTag;
import freenet.clients.http.ExternalLinkToadlet;
import freenet.clients.http.HTTPRequestImpl;
import freenet.clients.http.StaticToadlet;
import freenet.keys.FreenetURI;
import freenet.l10n.NodeL10n;
import freenet.support.LogThresholdCallback;
import freenet.support.Logger;
import freenet.support.URIPreEncoder;
import freenet.support.URLDecoder;
import freenet.support.URLEncodedFormatException;
import freenet.support.Logger.LogLevel;
import freenet.support.api.HTTPRequest;
public class GenericReadFilterCallback implements FilterCallback, URIProcessor {
public static final HashSet<String> allowedProtocols;
static {
allowedProtocols = new HashSet<String>();
allowedProtocols.add("http");
allowedProtocols.add("https");
allowedProtocols.add("ftp");
allowedProtocols.add("mailto");
allowedProtocols.add("nntp");
allowedProtocols.add("news");
allowedProtocols.add("snews");
allowedProtocols.add("about");
allowedProtocols.add("irc");
// file:// ?
}
private URI baseURI;
private URI strippedBaseURI;
private final FoundURICallback cb;
private final TagReplacerCallback trc;
/** Provider for link filter exceptions. */
private final LinkFilterExceptionProvider linkFilterExceptionProvider;
private static volatile boolean logMINOR;
static {
Logger.registerLogThresholdCallback(new LogThresholdCallback(){
@Override
public void shouldUpdate(){
logMINOR = Logger.shouldLog(LogLevel.MINOR, this);
}
});
}
public GenericReadFilterCallback(URI uri, FoundURICallback cb,TagReplacerCallback trc, LinkFilterExceptionProvider linkFilterExceptionProvider) {
this.baseURI = uri;
this.cb = cb;
this.trc=trc;
this.linkFilterExceptionProvider = linkFilterExceptionProvider;
setStrippedURI(uri.toString());
}
public GenericReadFilterCallback(FreenetURI uri, FoundURICallback cb,TagReplacerCallback trc, LinkFilterExceptionProvider linkFilterExceptionProvider) {
try {
this.baseURI = uri.toRelativeURI();
setStrippedURI(baseURI.toString());
this.cb = cb;
this.trc=trc;
this.linkFilterExceptionProvider = linkFilterExceptionProvider;
} catch (URISyntaxException e) {
throw new Error(e);
}
}
private void setStrippedURI(String u) {
int idx = u.lastIndexOf('/');
if(idx > 0) {
u = u.substring(0, idx+1);
try {
strippedBaseURI = new URI(u);
} catch (URISyntaxException e) {
Logger.error(this, "Can't strip base URI: "+e+" parsing "+u);
strippedBaseURI = baseURI;
}
} else
strippedBaseURI = baseURI;
}
@Override
public String processURI(String u, String overrideType) throws CommentException {
return processURI(u, overrideType, false, false);
}
// RFC3986
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
protected static final String UNRESERVED = "[a-zA-Z0-9\\-\\._~]";
// pct-encoded = "%" HEXDIG HEXDIG
protected static final String PCT_ENCODED = "(?:%[0-9A-Fa-f][0-9A-Fa-f])";
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
protected static final String SUB_DELIMS = "[\\!\\$&'\\(\\)\\*\\+,;=]";
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
protected static final String PCHAR = "(?>" + UNRESERVED + "|" + PCT_ENCODED + "|" + SUB_DELIMS + "|[:@])";
// fragment = *( pchar / "/" / "?" )
protected static final String FRAGMENT = "(?>" + PCHAR + "|\\/|\\?)*";
private static final Pattern anchorRegex;
static {
anchorRegex = Pattern.compile("^#" + FRAGMENT + "$");
}
@Override
public String processURI(String u, String overrideType, boolean forBaseHref, boolean inline) throws CommentException {
if(anchorRegex.matcher(u).matches()) {
// Hack for anchors, see #710
return u;
}
boolean noRelative = forBaseHref;
// evil hack, see #2451 and r24565,r24566
u = u.replaceAll(" #", " %23");
URI uri;
URI resolved;
try {
if(logMINOR) Logger.minor(this, "Processing "+u);
uri = URIPreEncoder.encodeURI(u).normalize();
if(logMINOR) Logger.minor(this, "Processing "+uri);
if(u.startsWith("/") || u.startsWith("%2f"))
// Don't bother with relative URIs if it's obviously absolute.
// Don't allow encoded /'s, they're just too confusing (here they would get decoded and then coalesced with other slashes).
noRelative = true;
if(!noRelative)
resolved = baseURI.resolve(uri);
else
resolved = uri;
if(logMINOR) Logger.minor(this, "Resolved: "+resolved);
} catch (URISyntaxException e1) {
if(logMINOR) Logger.minor(this, "Failed to parse URI: "+e1);
throw new CommentException(l10n("couldNotParseURIWithError", "error", e1.getMessage()));
}
String path = uri.getPath();
HTTPRequest req = new HTTPRequestImpl(uri, "GET");
if (path != null) {
if (path.equals("/") && req.isParameterSet("newbookmark") && !forBaseHref) {
// allow links to the root to add bookmarks
String bookmark_key = req.getParam("newbookmark");
String bookmark_desc = req.getParam("desc");
String bookmark_activelink = req.getParam("hasAnActivelink", "");
try {
FreenetURI furi = new FreenetURI(bookmark_key);
bookmark_key = furi.toString();
bookmark_desc = URLEncoder.encode(bookmark_desc, "UTF-8");
} catch (UnsupportedEncodingException e) {
// impossible, UTF-8 is always supported
} catch (MalformedURLException e) {
throw new CommentException("Invalid Freenet URI: " + e);
}
String url = "/?newbookmark="+bookmark_key+"&desc="+bookmark_desc;
if (bookmark_activelink.equals("true")) {
url = url + "&hasAnActivelink=true";
}
return url;
} else if(path.startsWith(StaticToadlet.ROOT_URL)) {
// @see bug #2297
return path;
} else if (linkFilterExceptionProvider != null) {
if (linkFilterExceptionProvider.isLinkExcepted(uri)) {
return path + ((uri.getQuery() != null) ? ("?" + uri.getQuery()) : "");
}
}
}
String reason = l10n("deletedURI");
// Try as an absolute URI
URI origURI = uri;
// Convert localhost uri's to relative internal ones.
String host = uri.getHost();
if(host != null && (host.equals("localhost") || host.equals("127.0.0.1")) && uri.getPort() == 8888) {
try {
uri = new URI(null, null, null, -1, uri.getPath(), uri.getQuery(), uri.getFragment());
} catch (URISyntaxException e) {
Logger.error(this, "URI "+uri+" looked like localhost but could not parse", e);
throw new CommentException("URI looked like localhost but could not parse: "+e);
}
host = null;
}
String rpath = uri.getPath();
if(logMINOR) Logger.minor(this, "Path: \""+path+"\" rpath: \""+rpath+"\"");
if(host == null) {
boolean isAbsolute = false;
if(rpath != null) {
if(logMINOR) Logger.minor(this, "Resolved URI (rpath absolute): \""+rpath+"\"");
// Valid FreenetURI?
try {
String p = rpath;
while(p.startsWith("/")) {
p = p.substring(1);
}
FreenetURI furi = new FreenetURI(p, true);
isAbsolute = true;
if(logMINOR) Logger.minor(this, "Parsed: "+furi);
return processURI(furi, uri, overrideType, true, inline);
} catch (MalformedURLException e) {
// Not a FreenetURI
if(logMINOR) Logger.minor(this, "Malformed URL (a): "+e, e);
if(e.getMessage() != null) {
reason = l10n("malformedAbsoluteURL", "error", e.getMessage());
} else {
reason = l10n("couldNotParseAbsoluteFreenetURI");
}
}
}
if((!isAbsolute) && (!forBaseHref)) {
// Relative URI
rpath = resolved.getPath();
if(rpath == null) throw new CommentException("No URI");
if(logMINOR) Logger.minor(this, "Resolved URI (rpath relative): "+rpath);
// Valid FreenetURI?
try {
String p = rpath;
while(p.startsWith("/")) p = p.substring(1);
FreenetURI furi = new FreenetURI(p, true);
if(logMINOR) Logger.minor(this, "Parsed: "+furi);
return processURI(furi, uri, overrideType, forBaseHref, inline);
} catch (MalformedURLException e) {
if(logMINOR) Logger.minor(this, "Malformed URL (b): "+e, e);
if(e.getMessage() != null) {
reason = l10n("malformedRelativeURL", "error", e.getMessage());
} else {
reason = l10n("couldNotParseRelativeFreenetURI");
}
}
}
}
uri = origURI;
if(forBaseHref)
throw new CommentException(l10n("bogusBaseHref"));
if(GenericReadFilterCallback.allowedProtocols.contains(uri.getScheme()))
return ExternalLinkToadlet.escape(uri.toString());
else {
if(uri.getScheme() == null) {
throw new CommentException(reason);
}
throw new CommentException(l10n("protocolNotEscaped", "protocol", uri.getScheme()));
}
}
@Override
public String makeURIAbsolute(String uri) throws URISyntaxException{
return baseURI.resolve(URIPreEncoder.encodeURI(uri).normalize()).toASCIIString();
}
private static String l10n(String key, String pattern, String value) {
return NodeL10n.getBase().getString("GenericReadFilterCallback."+key, pattern, value);
}
private static String l10n(String key) {
return NodeL10n.getBase().getString("GenericReadFilterCallback."+key);
}
private String finishProcess(HTTPRequest req, String overrideType, String path, URI u, boolean noRelative) {
String typeOverride = req.getParam("type", null);
if(overrideType != null)
typeOverride = overrideType;
if(typeOverride != null) {
String[] split = HTMLFilter.splitType(typeOverride);
if(split[1] != null) {
String charset = split[1];
if(charset != null) {
try {
charset = URLDecoder.decode(charset, false);
} catch (URLEncodedFormatException e) {
charset = null;
}
}
if(charset != null && charset.indexOf('&') != -1)
charset = null;
if(charset != null && !Charset.isSupported(charset))
charset = null;
if(charset != null)
typeOverride = split[0]+"; charset="+charset;
else
typeOverride = split[0];
}
}
// REDFLAG any other options we should support?
// Obviously we don't want to support ?force= !!
// At the moment, ?type= and ?force= are the only options supported by FProxy anyway.
try {
// URI encoding issues: FreenetURI.toString() does URLEncode'ing of critical components.
// So if we just pass it in to the component-wise constructor, we end up encoding twice,
// so get %2520 for a space.
// However, we want to support encoded slashes or @'s in the path, so we don't want to
// just decode before feeding it to the constructor. It looks like the best option is
// to construct it ourselves and then re-parse it. This is doing unnecessary work, it
// would be much easier if we had a component-wise constructor for URI that didn't
// re-encode, but at least it works...
StringBuilder sb = new StringBuilder();
if(strippedBaseURI.getScheme() != null && !noRelative) {
sb.append(strippedBaseURI.getScheme());
sb.append("://");
sb.append(strippedBaseURI.getAuthority());
assert(path.startsWith("/"));
}
sb.append(path);
if(typeOverride != null) {
sb.append("?type=");
sb.append(freenet.support.URLEncoder.encode(typeOverride, "", false, "="));
}
if(u.getFragment() != null) {
sb.append('#');
sb.append(u.getRawFragment());
}
URI uri = new URI(sb.toString());
if(!noRelative)
uri = strippedBaseURI.relativize(uri);
if(logMINOR)
Logger.minor(this, "Returning "+uri.toASCIIString()+" from "+path+" from baseURI="+baseURI+" stripped base uri="+strippedBaseURI.toString());
return uri.toASCIIString();
} catch (URISyntaxException e) {
Logger.error(this, "Could not parse own URI: path="+path+", typeOverride="+typeOverride+", frag="+u.getFragment()+" : "+e, e);
String p = path;
if(typeOverride != null)
p += "?type="+typeOverride;
if(u.getFragment() != null){
try{
// FIXME encode it properly
p += URLEncoder.encode(u.getFragment(),"UTF-8");
}catch (UnsupportedEncodingException e1){
throw new Error("Impossible: JVM doesn't support UTF-8: " + e, e);
}
}
return p;
}
}
private String processURI(FreenetURI furi, URI uri, String overrideType, boolean noRelative, boolean inline) {
// Valid Freenet URI, allow it
// Now what about the queries?
HTTPRequest req = new HTTPRequestImpl(uri, "GET");
if(cb != null) cb.foundURI(furi);
if(cb != null) cb.foundURI(furi, inline);
return finishProcess(req, overrideType, '/' + furi.toString(false, false), uri, noRelative);
}
@Override
public String onBaseHref(String baseHref) {
String ret;
try {
ret = processURI(baseHref, null, true, false);
} catch (CommentException e1) {
Logger.error(this, "Failed to parse base href: "+baseHref+" -> "+e1.getMessage());
ret = null;
}
if(ret == null) {
Logger.error(this, "onBaseHref() failed: cannot sanitize "+baseHref);
return null;
} else {
try {
baseURI = new URI(ret);
setStrippedURI(ret);
} catch (URISyntaxException e) {
throw new Error(e); // Impossible
}
return baseURI.toASCIIString();
}
}
@Override
public void onText(String s, String type) {
if(cb != null)
cb.onText(s, type, baseURI);
}
static final String PLUGINS_PREFIX = "/plugins/";
/**
* Process a form.
* Current strategy:
* - Both POST and GET forms are allowed to /
* Anything that is hazardous should be protected through formPassword.
* @throws CommentException If the form element could not be parsed and the user should be told.
*/
@Override
public String processForm(String method, String action) throws CommentException {
if(action == null) return null;
if(method == null) method = "GET";
method = method.toUpperCase();
if(!(method.equals("POST") || method.equals("GET")))
return null; // no irregular form sending methods
// FIXME what about /downloads/ /friends/ etc?
// Allow access to Library for searching, form passwords are used for actions such as adding bookmarks
if(action.equals("/library/"))
return action;
try {
URI uri = URIPreEncoder.encodeURI(action);
if(uri.getScheme() != null || uri.getHost() != null || uri.getPort() != -1 || uri.getUserInfo() != null)
throw new CommentException(l10n("invalidFormURI"));
String path = uri.getPath();
if(path.startsWith(PLUGINS_PREFIX)) {
String after = path.substring(PLUGINS_PREFIX.length());
if(after.indexOf("../") > -1)
throw new CommentException(l10n("invalidFormURIAttemptToEscape"));
if(after.matches("[A-Za-z0-9\\.]+"))
return uri.toASCIIString();
}
} catch (URISyntaxException e) {
throw new CommentException(l10n("couldNotParseFormURIWithError", "error", e.getLocalizedMessage()));
}
// Otherwise disallow.
return null;
}
/** Processes a tag. It calls the TagReplacerCallback if present.
* @param pt - The tag, that needs to be processed
* @return The replacement for the tag, or null, if no replacement needed*/
@Override
public String processTag(ParsedTag pt) {
if(trc!=null){
return trc.processTag(pt,this);
}else{
return null;
}
}
@Override
public void onFinished() {
if(cb != null)
cb.onFinishedPage();
}
}