package com.athena.asm.tool.notifier.selector;
import java.util.ArrayList;
import java.util.List;
import com.athena.asm.data.Post;
import com.athena.asm.tool.notifier.PostField;
import com.athena.asm.tool.notifier.markup.Markup;
/**
* 删选规则
* @author aleck
*
* WholeWordOnly参数只对英文的Pattern有效。
* 不用于正则表达式中的\bword\b,这里实际上只要求匹配时的前后分别来自于以下的1,2类,或者均来自于2类。
* 在处理中文时有必要,比如 ABC单词,如果使用 \bABC\b去匹配,是找不到的。
* 1. [a-zA-Z0-9]或者'-'
* 2. 其余字母
*
*/
public class KeywordCriteria extends Criteria {
public static enum Type {
REQUIRE,
AVOID,
}
private static final String WORD_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
public final Type type;
public final String keyword;
public final boolean caseSensitive;
public final boolean wholeWordOnly;
public KeywordCriteria(PostField field, Type type, String keyword,
boolean caseSensitive, boolean wholeWordOnly) {
super(field);
this.type = type;
this.keyword = (caseSensitive ? keyword : keyword.toUpperCase());
this.caseSensitive = caseSensitive;
this.wholeWordOnly = wholeWordOnly;
}
@Override
public boolean qualify(Post post) {
String target;
if (field == PostField.TITLE) {
target = post.getTitle();
} else if (field == PostField.CONTENT) {
target = post.getContent().toString();
} else {
target = null;
}
// 如果是null则直接不匹配
if (target == null) {
return false;
} else {
if (!caseSensitive) {
target = target.toUpperCase();
}
int idx, start = 0;
boolean found = false;
while ((idx = target.indexOf(keyword, start)) != -1) {
found = !wholeWordOnly ||
checkBoundary(target, idx - 1) &&
checkBoundary(target, idx + keyword.length());
if (found)
break;
start = idx + keyword.length();
}
return (type == Type.REQUIRE && found || type == Type.AVOID && !found);
}
}
/**
* 检查 idx 和 idx+1 这两个字符是否构成一个 Boundary
* @param text
* @param idx
* @return
*/
private boolean checkBoundary(CharSequence text, int idx) {
if (idx < 0 || idx + 1 >= text.length()) {
// 两头
return true;
} else {
char c1 = text.charAt(idx);
char c2 = text.charAt(idx + 1);
boolean wc1 = WORD_CHARS.indexOf(c1) != -1;
boolean wc2 = WORD_CHARS.indexOf(c2) != -1;
return (wc1 != wc2 || !wc1 && !wc2);
}
}
@Override
public boolean applicable(PostField field) {
return (field == PostField.TITLE || field == PostField.CONTENT);
}
@Override
public boolean requirePostContent() {
return field == PostField.CONTENT;
}
@Override
public List<Markup> mark(Post post) {
if (type == Type.REQUIRE) {
String target;
if (field == PostField.TITLE) {
target = post.getTitle();
} else if (field == PostField.CONTENT) {
target = post.getContent().toString();
} else {
target = null;
}
// 如果是null则直接不匹配
if (target == null) {
return new ArrayList<Markup>();
} else {
if (!caseSensitive) {
target = target.toUpperCase();
}
List<Markup> markups = new ArrayList<Markup>();
int idx, start = 0;
while ((idx = target.indexOf(keyword, start)) != -1) {
boolean found = !wholeWordOnly ||
checkBoundary(target, idx - 1) &&
checkBoundary(target, idx + keyword.length());
if (found) {
markups.add(new Markup(post, field, idx, idx + keyword.length()));
}
start = idx + keyword.length();
}
return markups;
}
} else {
return new ArrayList<Markup>();
}
}
}