package org.kefirsf.bb.proc; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; /** * The pattern element to parse URLs. * * @author kefir */ public class ProcUrl extends AbstractUrl { private static final String pct_encoded = "(%\\p{XDigit}{2})"; private static final String pchar = "[\\w~!$&'*+,;=:@\\(\\)\\.\\-]|"+pct_encoded; private static final Pattern REGEX_PORT = Pattern.compile( ":\\d{1,4}" ); private static final Pattern REGEX_PATH = Pattern.compile( "(/(" + pchar + ")+)*/?" ); private static final Pattern REGEX_FRAGMENT = Pattern.compile( "#(" + pchar + "|[/?])*" ); private static final Pattern REGEX_LOCAL_PREFIX = Pattern.compile("\\.{0,2}/"); private static final String[] LOCAL_PREFIXES = {"/", "./", "../"}; private final boolean local; private final boolean schemaless; /** * Create a named URL variable * * @param name variable name * @param ghost don't move the cursor after parsing * @param local Parse local URLs also * @param schemaless Parse only schemaless URL */ public ProcUrl(String name, boolean ghost, boolean local, boolean schemaless) { super(name, ghost); this.local = local; this.schemaless = schemaless; } /** * {@inheritDoc} */ @Override public int findIn(Source source) { if (schemaless) { return -1; } int start = source.getOffset(); int sourceLength = source.length(); int index; int length = -1; do { index = sourceLength; // Prepare URL's prefixes. List<String> prefixes = preparePrefixes(); // Find nearest prefix for (String prefix : prefixes) { int ni = source.findFrom(start, prefix.toCharArray(), true); if (ni > 0 && ni < index) { index = ni; } } // Try to parse it if (index < sourceLength) { length = parseLength(source, index, null); if (length < 0) { start = index + 1; } } } while (length < 0 && index < sourceLength); if (length >= 0) { return index; } else { return -1; } } /** * Prepare URL's prefixes. * * @return list of schema prefixes and local prefixes if local URL are allowed. */ private List<String> preparePrefixes() { // Prepare prefixes for all schemas List<String> prefixes = new ArrayList<String>(Schema.values().length + (local ? 3 : 0)); for (Schema schema : Schema.values()) { prefixes.add(schema.getPrefix()); } // For local URls prefixes are "./", "../", "/" if (local) { Collections.addAll(prefixes, LOCAL_PREFIXES); } return prefixes; } /** * Parse URL. The offset must be on a URL element * * @param source text source * @param offset offset for parsing * @param terminator a terminator element which can be used to cut some URL parts. Can be null. * @return URL length or -1 if it is not a URL. */ @Override int parseLength(Source source, int offset, ProcPatternElement terminator) { int length = 0; // A schema like http://, https://, mailto: Schema schema = parseSchema(source, offset); if (schema != null && !schemaless) { length += schema.getLength(); } else if ((schema == null && !local && !schemaless) || (schema != null)) { return -1; } // An authority data like john.smith:pa55W0RD@ if (schema != null) { int authorityLength = parseAuthority(source, offset + length); if (schema.isAuthorityMandatory() && authorityLength <= 0) { return -1; } length += authorityLength; } // A host like example.com if (schema != null || schemaless) { int hostLength = parseHost(source, offset + length, terminator); if (hostLength <= 0) { return -1; } length += hostLength; } // Parse port if (schema != null || schemaless) { int portLength = parsePort(source, offset + length); length += portLength; } // For local URLs it is possible to use "./", "../", "/" if (schema == null && local) { int prefixLength = parseRegex(source, offset, calcEnd(source, terminator), REGEX_LOCAL_PREFIX); if (prefixLength <= 0) { return -1; } length += prefixLength - 1; } // A path like /home/web int pathLength = parsePath(source, offset + length, terminator); if (local && schema == null && pathLength <= 0) { return -1; } length += pathLength; // A query like ?key1=value1&key2=value2 length += parseQuery(source, offset + length, terminator); // A fragment like #anchor length += parseFragment(source, offset + length, terminator); return length; } int parseFragment(Source source, int offset, ProcPatternElement terminator) { return parseRegex(source, offset, calcEnd(source, terminator), REGEX_FRAGMENT); } int parsePath(Source source, int offset, ProcPatternElement terminator) { return parseRegex(source, offset, calcEnd(source, terminator), REGEX_PATH); } private int parsePort(Source source, int offset) { return parseRegex(source, offset, source.length(), REGEX_PORT); } Schema parseSchema(Source source, int offset) { for (Schema schema : Schema.values()) { String str = source.subSequence(offset, Math.min(offset + schema.getLength(), source.length())).toString(); if (schema.getPrefix().equalsIgnoreCase(str)) { return schema; } } return null; } enum Schema { HTTP("http://"), HTTPS("https://"), FTP("ftp://"), MAILTO("mailto:", true); private final String prefix; private final boolean authorityMandatory; Schema(String prefix) { this.prefix = prefix; authorityMandatory = false; } Schema(String prefix, boolean authorityMandatory) { this.prefix = prefix; this.authorityMandatory = authorityMandatory; } public String getPrefix() { return prefix; } public boolean isAuthorityMandatory() { return authorityMandatory; } public int getLength() { return prefix.length(); } } @Override public String toString() { return MessageFormat.format( "<url name=\"{0}\" ghost=\"{1}\" local=\"{2}\" schemaless=\"{3}\"/>", getName(), ghost, local, schemaless ); } }