/**
*
*/
package org.korsakow.domain.k3.code;
import java.util.ArrayList;
import java.util.List;
import org.korsakow.domain.Keyword;
import org.korsakow.domain.Rule;
import org.korsakow.domain.RuleFactory;
import org.korsakow.domain.interf.IRule;
import org.korsakow.ide.rules.RuleType;
/**
* K3 source code parser. With helper methods to generate K5 Rules.
* @author d
*
*/
public class K3RuleParser
{
List<K3Lexeme> k3Tokens;
public boolean isValidKeyword(String keyword)
{
// see K5RuleParser about why we currently don't validate keywords.
// also K3 seems to allow just about anything so we'd have to handle this with care.
return true;
// return keyword.matches("[^-<>+]+");
}
public void validateKeyword(String keyword) throws RuleParserException
{
if (!isValidKeyword(keyword))
throw new RuleParserException("invalid keyword: \"" + keyword + "\"");
}
public List<K3Lexeme> tokenize(String code) throws RuleParserException
{
List<K3Lexeme> tokens = new ArrayList<K3Lexeme>();
String[] parts = code.split(" ");
for (String part : parts) {
part = part.trim();
if (part.length() == 0)
continue;
char op = part.charAt(part.length()-1);
String keyword = part.substring(0, part.length()-1);
K3Lexeme lexeme;
if (part.equals(K3Symbol.CLEAR_PREVIOUS_LINKS)) {
lexeme = new K3Lexeme(K3OpType.CLEAR_PREVIOUS_LINKS, null, K3Symbol.CLEAR_PREVIOUS_LINKS);
} else
if (part.equals(K3Symbol.KEEP_PREVIOUS_LINKS)) {
lexeme = new K3Lexeme(K3OpType.KEEP_PREVIOUS_LINKS, null, K3Symbol.KEEP_PREVIOUS_LINKS);
} else {
switch (op)
{
case K3Symbol.INBOUND_KEYWORD:
// if (!K3Symbol.ENDFILM_KEYWORD.equals(keyword))
// throw new RuleParserException("cannot use '"+keyword+"' in this context");
validateKeyword(keyword);
lexeme = new K3Lexeme(K3OpType.INBOUND_KEYWORD, op, keyword);
break;
case K3Symbol.EXCLUSION_KEYWORD:
if (K3Symbol.RANDOM_KEYWORD.equals(keyword))
throw new RuleParserException("cannot use '"+keyword+"' in this context");
validateKeyword(keyword);
lexeme = new K3Lexeme(K3OpType.KEYWORD_EXCLUSION, op, keyword);
break;
case K3Symbol.LOOKUP_KEYWORD:
validateKeyword(keyword);
lexeme = new K3Lexeme(K3OpType.KEYWORD_LOOKUP, op, keyword);
break;
case K3Symbol.REQUIRED_KEYWORD:
if (K3Symbol.RANDOM_KEYWORD.equals(keyword))
throw new RuleParserException("cannot use '"+keyword+"' in this context");
validateKeyword(keyword);
lexeme = new K3Lexeme(K3OpType.KEYWORD_REQUIRED, op, keyword);
break;
default:
if (isValidKeyword(keyword + op)) {
lexeme = new K3Lexeme(K3OpType.INBOUND_AND_LOOKUP_KEYWORD, keyword + op);
} else
throw new RuleParserException("invalid op: " + op);
}
}
tokens.add(lexeme);
}
return tokens;
}
public List<IRule> createRules(List<K3Lexeme> lexemes) throws RuleParserException
{
List<IRule> rules = new ArrayList<IRule>();
List<String> outboundKeywords = new ArrayList<String>();
List<String> requireKeywords = new ArrayList<String>();
List<String> excludeKeywords = new ArrayList<String>();
boolean clearScores = false;
boolean setendfilm = false;
for (K3Lexeme lexeme : lexemes)
{
switch(lexeme.getOpType())
{
case INBOUND_AND_LOOKUP_KEYWORD:
// in this special case we have to handle INBOUND and LOOKUP
// just copy and paste those two into this one
if (K3Symbol.ENDFILM_KEYWORD.equals(lexeme.getToken()))
setendfilm = true;
outboundKeywords.add(lexeme.getToken());
break;
case KEYWORD_LOOKUP:
outboundKeywords.add(lexeme.getToken());
break;
case INBOUND_KEYWORD:
// this special case asside, inbound keywords are lost in translation
// since k5 doesnt really handle inbound keywords as rules
// so inbound are handled in a completely separate way
if (K3Symbol.ENDFILM_KEYWORD.equals(lexeme.getToken()))
setendfilm = true;
break;
case KEYWORD_REQUIRED:
requireKeywords.add(lexeme.getToken());
break;
case KEYWORD_EXCLUSION:
excludeKeywords.add(lexeme.getToken());
break;
case CLEAR_PREVIOUS_LINKS:
clearScores = true;
break;
case KEEP_PREVIOUS_LINKS:
clearScores = false;
break;
default:
throw new RuleParserException("k3 rule not yet parsable: " + lexeme.getSymbol());
}
}
// the order in which we generate most of the rules is important!
// generally: Lookup, Require, Exclude
if (clearScores) {
Rule rule = RuleFactory.createNew(RuleType.ClearScores.getId());
rules.add(rule);
}
if (setendfilm) { // this rule is order-independant
Rule rule = RuleFactory.createNew(RuleType.SetEndfilm.getId());
rules.add(rule);
}
{ // scope local vars for clarity
List<String> regularKeywords = new ArrayList<String>();
List<String> randomKeywords = new ArrayList<String>();
List<String> endfilmKeywords = new ArrayList<String>();
for (String keyword : outboundKeywords) {
if (K3Symbol.RANDOM_KEYWORD.equals(keyword)) {
randomKeywords.add(keyword);
} else if (K3Symbol.ENDFILM_KEYWORD.equals(keyword)) {
endfilmKeywords.add(keyword);
} else {
regularKeywords.add(keyword);
}
}
// the order of random/endfilm vs regular is not important
for (String random : randomKeywords) {
Rule rule = RuleFactory.createNew(RuleType.RandomLookup.getId());
rules.add(rule);
}
for (String endfilm : endfilmKeywords) {
Rule rule = RuleFactory.createNew(RuleType.EndfilmLookup.getId());
rules.add(rule);
}
if (!regularKeywords.isEmpty()) {
Rule rule = RuleFactory.createNew(RuleType.KeywordLookup.getId());
rule.setKeywords(Keyword.fromStrings(regularKeywords));
rules.add(rule);
}
}
{ // scope local vars for clarity
// requirekeywords must be after all other keyword lookup rules except exclusion
List<String> regularRequires = new ArrayList<String>();
List<String> endfilmRequires = new ArrayList<String>();
for (String keyword : requireKeywords) {
if (K3Symbol.ENDFILM_KEYWORD.equals(keyword)) {
endfilmRequires.add(keyword);
} else {
regularRequires.add(keyword);
}
}
// the order of endfilm vs regular is not important
for (String endfilm : endfilmRequires) {
Rule rule = RuleFactory.createNew(RuleType.RequireEndfilm.getId());
rules.add(rule);
}
if (!regularRequires.isEmpty()) {
Rule rule = RuleFactory.createNew(RuleType.RequireKeywords.getId());
rule.setKeywords(Keyword.fromStrings(regularRequires));
rules.add(rule);
}
}
{ // scope local vars for clarity
// exclude keywords must be after all other keyword lookup rules
List<String> regularExcludes = new ArrayList<String>();
List<String> endfilmExcludes = new ArrayList<String>();
for (String keyword : excludeKeywords) {
if (K3Symbol.ENDFILM_KEYWORD.equals(keyword)) {
endfilmExcludes.add(keyword);
} else {
regularExcludes.add(keyword);
}
}
// the order of endfilm vs regular is not important
for (String endfilm : endfilmExcludes) {
Rule rule = RuleFactory.createNew(RuleType.ExcludeEndfilm.getId());
rules.add(rule);
}
if (!regularExcludes.isEmpty()) {
Rule rule = RuleFactory.createNew(RuleType.ExcludeKeywords.getId());
rule.setKeywords(Keyword.fromStrings(regularExcludes));
rules.add(rule);
}
}
return rules;
}
/**
* K3 and K5 rules are many-to-many. That is any one K3 rule might correspond to many K5 rules
* and vice versa.
*
* @param code
* @param triggerTime if null no time is set
* @return
* @throws RuleParserException
*/
public List<IRule> parse(String code, Long triggerTime) throws RuleParserException
{
k3Tokens = tokenize(code);
List<IRule> rules = createRules(k3Tokens);
if (triggerTime != null) {
for (IRule rule : rules)
rule.setTriggerTime(triggerTime);
}
return rules;
}
public List<IRule> parse(String code) throws RuleParserException
{
return parse(code, null);
}
}