package org.kefirsf.bb.proc;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author kefir
*/
public abstract class AbstractUrl extends ProcNamedElement implements ProcPatternElement {
static final Pattern REGEX_AUTHORITY = Pattern.compile(
"[\\w\\.\\-~_!\\$&'\\(\\)%;:=\\+,\\*]+(:[\\w\\.\\-~_!\\$&'\\(\\)%;:=\\+,\\*]+)?@"
);
private static final Pattern REGEX_HOST = Pattern.compile(
"([\\da-zA-Z](\\-?\\w+)*\\.)*[\\da-zA-Z](\\-?\\w+)*\\.?"
);
private static final Pattern REGEX_QUERY = Pattern.compile(
"\\?(([\\w%\\-\\+]|(%\\p{XDigit}{2}))+(=([\\w%\\-\\+]|(%\\p{XDigit}{2}))+)?(&|;))*(([\\w%\\-\\+]|(%\\p{XDigit}{2}))+(=([\\w%\\-\\+]|(%\\p{XDigit}{2}))+)?)?"
);
/**
* Don't move the cursor offset.
*/
protected final boolean ghost;
public AbstractUrl(String name, boolean ghost) {
super(name);
this.ghost = ghost;
}
/**
* {@inheritDoc}
*/
public boolean parse(Context context, ProcPatternElement terminator) throws NestingException {
Source source = context.getSource();
int length = parseLength(source, source.getOffset(), terminator);
if (length >= 0) {
context.setAttribute(getName(), source.sub(source.getOffset() + length));
if (!ghost) {
source.incOffset(length);
}
return true;
} else {
return false;
}
}
/**
* {@inheritDoc}
*/
public boolean isNextIn(Context context) {
Source source = context.getSource();
return parseLength(source, source.getOffset(), context.getTerminator()) >= 0;
}
public abstract int findIn(Source source);
/**
* Parse URL. The offset must be on a URL element
*
* @param source text source
* @param offset offset for parsing
* @param terminator a terminator element which can be used to cut some URL parts. Can be null.
* @return URL length or -1 if it is not a URL.
*/
abstract int parseLength(Source source, int offset, ProcPatternElement terminator);
int parseHost(Source source, int offset, ProcPatternElement terminator) {
return parseRegex(source, offset, calcEnd(source, terminator), REGEX_HOST);
}
protected int calcEnd(Source source, ProcPatternElement terminator) {
int end = source.length();
if (terminator != null) {
int ind = terminator.findIn(source);
if (ind > 0) {
end = ind;
}
}
return end;
}
protected int parseRegex(Source source, int offset, int end, Pattern pattern) {
CharSequence seq = source.subSequence(offset, end);
Matcher matcher = pattern.matcher(seq);
if (matcher.lookingAt()) {
return matcher.group().length();
} else {
return 0;
}
}
int parseAuthority(Source source, int offset) {
return parseRegex(source, offset, source.length(), REGEX_AUTHORITY);
}
int parseQuery(Source source, int offset, ProcPatternElement terminator) {
return parseRegex(source, offset, calcEnd(source, terminator), REGEX_QUERY);
}
}