package semanticMarkup.ling.learn.knowledge;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
public class Constant {
// the following two patterns are used in mySQL rlike
public static final String PREFIX = "ab|ad|bi|deca|de|dis|di|dodeca|endo|end|e|hemi|hetero|hexa|homo|infra|inter|ir|macro|mega|meso|micro|mid|mono|multi|ob|octo|over|penta|poly|postero|post|ptero|pseudo|quadri|quinque|semi|sub|sur|syn|tetra|tri|uni|un|xero|[a-z0-9]+_";
// 3_nerved, )_nerved, dealt with in subroutine
public static final String SUFFIX = "er|est|fid|form|ish|less|like|ly|merous|most|shaped";
public static final String TAGS = "";
public static final String PLENDINGS = "[^aeiou]ies|i|ia|(x|ch|sh)es|ves|ices|ae|s";
public static final String SUBSTRUCTURESTRING = "part|parts|area|areas|portion|portions";
public static final String PROPERNOUN = "propernouns"; // EOL
public static final String IGNORE_PATTERN = "(IGNOREPTN)"; // disabled
public static final String NENDINGS = "\\w\\w(?:ist|sure)\\b";
public static final String VENDINGS = "(ing)\\b";
public static final String SENDINGS = "(on|is|ex|ix|um|us)\\b";
public static final String PENDINGS = "(a|ia|es|ices|i|ae)\\b";
// abbreviations may appear in original sentence
//public static final String PEOPLE_ABBR = "jr|mr|mrs|ms|dr|prof|sr|sens?|reps?|gov|attys?|supt|det|rev";
public static final String PEOPLE_ABBR = "jr|mr|mrs|ms|dr|prof|sr|sens|reps|gov|attys|supt|det|rev";
public static final String ARMY_ABBR= "col|gen|lt|cmdr|adm|capt|sgt|cpl|maj";
public static final String INSTITUTES_ABBR = "dept|univ|assn|bros";
public static final String COMPANIES_ABBR = "inc|ltd|co|corp";
// the question mark mean the prior character can be exist or not
public static final String PLACES_ABBR = "arc|al|ave|blv?d|cl|ct|cres|dr|expy?|dist|mt|ft|fw?y|hwa?y|la|pde?|pl|plz|rd|st|tce|Ala|Ariz|Ark|Cal|Calif|Col|Colo|Conn|Del|Fed|Fla|Ga|Ida|Id|Ill|Ind|Ia|Kan|Kans|Ken|Ky|La|Me|Md|Is|Mass|Mich|Minn|Miss|Mo|Mont|Neb|Nebr|Nev|Mex|Okla|Ok|Ore|Penna|Penn|Pa|Dak|Tenn|Tex|Ut|Vt|Va|Wash|Wis|Wisc|Wy|Wyo|USAFA|Alta|Man|Ont|Qué|Sask|Yuk";
//public static final String PLACES_ABBR = "arc|al|ave|blvd|cl|ct|cres|dr|expy|dist|mt|ft|fwy|hway|la|pde|pl|plz|rd|st|tce|Ala|Ariz|Ark|Cal|Calif|Col|Colo|Conn|Del|Fed|Fla|Ga|Ida|Id|Ill|Ind|Ia|Kan|Kans|Ken|Ky|La|Me|Md|Is|Mass|Mich|Minn|Miss|Mo|Mont|Neb|Nebr|Nev|Mex|Okla|Ok|Ore|Penna|Penn|Pa|Dak|Tenn|Tex|Ut|Vt|Va|Wash|Wis|Wisc|Wy|Wyo|USAFA|Alta|Man|Ont|Qu��|Sask|Yuk";
public static final String MONTHS_ABBR = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|sept";
public static final String MISC_ABBR = "vs|etc|no|esp";
public static final String BOT1_ABBR = "diam|sq|Rottb";
public static final String BOT2_ABBR = "ca|fl|Fl|Fr|fr|var";
public static final String LATIN_ABBR = "et al";
public static final String mptn = "((?:[mbq][,&]*)*(?:m|b|q(?=[pon])))";// grouped #may contain q but not the last m, unless it is followed by a p
public static final String nptn = "((?:[nop][,&]*)*[nop])"; // grouped #must present, no q allowed
public static final String bptn = "([,;:\\.]*$|,*[bm]|(?<=[pon]),*q)"; // grouped #when following a p, a b could be a q
public static final String SEGANDORPTN = "(?:"+mptn+"?"+nptn+")"; // ((?:[mq],?)*&?(?:m|q(?=p))?)((?:[np],?)*&?[np])
public static final String ANDORPTN = "^(?:"+SEGANDORPTN+"[,&]+)*"+SEGANDORPTN+bptn;
public String CHARACTER;
public String CLUSTERSTRING;
public String FORBIDDEN; // Words in this list can not be treated as boundaries "to|a|b" etc.
public String NUMBER;
public String PREPOSITION;
public String PRONOUN;
public String STOP;
public Set<String> singularExceptions;
public Set<String> forbiddenWords;
public Set<String> prepositionWords;
public Set<String> pronounWords;
public Set<String> characterWords;
public Set<String> numberWords;
public Set<String> clusterStringWords;
public Set<String> stopWords;
private String singularExceptionList;
public Constant() {
this.CHARACTER = "lengths|length|lengthed|width|widths|widthed|heights|height|character|characters|distribution|distributions|outline|outlines|profile|profiles|feature|features|form|forms|mechanism|mechanisms|nature|natures|shape|shapes|shaped|size|sizes|sized";
this.CLUSTERSTRING = "group|groups|clusters|cluster|arrays|array|series|fascicles|fascicle|pairs|pair|rows|number|numbers|\\d+";
this.FORBIDDEN = "to|and|or|nor";
this.NUMBER = "zero|one|ones|first|two|second|three|third|thirds|four|fourth|fourths|quarter|five|fifth|fifths|six|sixth|sixths|seven|seventh|sevenths|eight|eighths|eighth|nine|ninths|ninth|tenths|tenth";
this.PREPOSITION = "above|across|after|along|around|as|at|before|below|beneath|between|beyond|by|during|for|from|in|into|near|of|off|on|onto|out|outside|over|than|through|throughout|toward|towards|up|upward|with|without";
this.PRONOUN = "all|each|every|some|few|individual|both|other";
this.STOP = "state|page|fig|"
+ "a|about|above|across|after|along|also|although|amp|an|and|are|as|at|be|because|become|becomes|becoming|been|before|behind|being|beneath|between|beyond|but|by|ca|can|could|did|do|does|doing|done|during|for|from|had|has|have|hence|here|how|if|in|into|inside|inward|is|it|its|least|may|might|more|most|near|no|not|of|off|on|onto|or|out|outside|outward|over|should|so|than|that|the|then|there|these|this|those|throughout|to|toward|towards|under|up|upward|via|was|were|what|when|where|whereas|which|why|with|within|without|would";
this.singularExceptionList = "medium";
this.characterWords = new HashSet<String>();
this.characterWords.addAll(Arrays.asList(this.CHARACTER.split("\\|")));
this.clusterStringWords = new HashSet<String>();
this.clusterStringWords.addAll(Arrays.asList(this.CLUSTERSTRING.split("\\|")));
this.forbiddenWords = new HashSet<String>();
this.forbiddenWords.addAll(Arrays.asList(this.FORBIDDEN.split("\\|")));
this.numberWords = new HashSet<String>();
this.numberWords.addAll(Arrays.asList(this.NUMBER.split("\\|")));
this.prepositionWords = new HashSet<String>();
this.prepositionWords.addAll(Arrays.asList(this.PREPOSITION.split("\\|")));
this.pronounWords = new HashSet<String>();
this.pronounWords.addAll(Arrays.asList(this.PRONOUN.split("\\|")));
this.singularExceptions = new HashSet<String>();
this.singularExceptions.addAll(Arrays.asList(this.singularExceptionList.split("\\|")));
this.stopWords = new HashSet<String>();
this.stopWords.addAll(Arrays.asList(this.STOP.split("\\|")));
}
public void updateCharacter() {
this.CHARACTER = StringUtils.join(this.characterWords, '|');
}
public void updateClusterString() {
this.CLUSTERSTRING = StringUtils.join(this.clusterStringWords, '|');
}
public void updateForbidden() {
this.FORBIDDEN = StringUtils.join(this.forbiddenWords, '|');
}
public void updateNumber() {
this.NUMBER = StringUtils.join(this.numberWords, '|');
}
public void updatePreposition() {
this.PREPOSITION = StringUtils.join(this.prepositionWords, '|');
}
public void updatePronoun() {
this.PRONOUN = StringUtils.join(this.pronounWords, '|');
}
public void updateStop() {
this.STOP = StringUtils.join(this.stopWords, '|');
}
}