Learner.java example

Explorer
charaparser-unsupervised-master
- src
  - main
    - java
      - semanticMarkup
        core
        Treatment.java
        io
        input
        lib
        db
        ParentTagProvider.java
        know
        IGlossary.java
        IPOSKnowledgeBase.java
        Stemmer.java
        lib
        InMemoryGlossary.java
        WordNetPOSKnowledgeBase.java
        knowledge
        KnowledgeBase.java
        ling
        Token.java
        learn
        Configuration.java
        ITerminologyLearner.java
        Learner.java
        UnsupervisedClauseMarkup.java
        auxiliary
        AjectiveReplacementForNoun.java
        FileLoader.java
        GetNounsAfterPtnReturnValue.java
        KnownTagCollection.java
        POS.java
        POSInfo.java
        SentenceLeadLengthComparator.java
        StringAndInt.java
        StringPair.java
        dataholder
        DataHolder.java
        DiscountedKey.java
        IsAValue.java
        ModifierTableValue.java
        SentenceStructure.java
        SingularPluralPair.java
        WordPOSKey.java
        WordPOSValue.java
        knowledge
        AdditionalBootstrapping.java
        AdditionalBootstrappingLearner.java
        AdjectiveSubjectBootstrappingLearner.java
        AdjectiveVerifier.java
        AndOrTagSetter.java
        AnnotationNormalizer.java
        CommaAsAndAnnotator.java
        CommonSubstructureAnnotator.java
        Constant.java
        CoreBootstrappingLearner.java
        DittoAnnotator.java
        FiniteSetsLoader.java
        HeuristicNounLearnerUseMorphology.java
        HeuristicNounLearnerUseSuffix.java
        IModule.java
        IgnorePatternAnnotator.java
        IgnoredFinalizer.java
        Initializer.java
        MarkupByPatternLearner.java
        ModifierTagSeparator.java
        NMBResolver.java
        NullSentenceTagger.java
        POSBasedAnnotator.java
        PatternBasedAnnotator.java
        PhraseClauseAnnotator.java
        PronounCharactersAnnotator.java
        UnknownWordBootstrappingLearner.java
        utility
        LearnerUtility.java
        StringUtility.java
        WordFormUtility.java
        pos
        POS.java
        transform
        ITokenizer.java
        lib
        OpenNLPSentencesTokenizer.java
        OpenNLPTokenizer.java
  - test
    - java
      - semanticMarkup
        ling
        learn
        DataHolderTest.java
        LearnerTest.java
        LearnerUtilityTest.java
        SentenceLeadLengthComparatorTest.java
        StringUtilityTest.java
        UnsupervisedClauseMarkupTest.java
        WordFormUtilityTest.java
        knowledge
        AndOrTagSetterTest.java
        CommonSubstructureAnnotatorTest.java
        CoreBootstrappingLearnerTest.java
        InitializerTest.java
        POSBasedAnnotatorTest.java
        UnknownWordBootstrappingTest.java
package semanticMarkup.ling.learn;

import static org.junit.Assert.assertEquals;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;

import semanticMarkup.core.Treatment;
import semanticMarkup.know.IGlossary;
import semanticMarkup.know.lib.WordNetPOSKnowledgeBase;
import semanticMarkup.knowledge.KnowledgeBase;
import semanticMarkup.ling.learn.auxiliary.GetNounsAfterPtnReturnValue;
import semanticMarkup.ling.learn.auxiliary.KnownTagCollection;
import semanticMarkup.ling.learn.auxiliary.POSInfo;
import semanticMarkup.ling.learn.auxiliary.SentenceLeadLengthComparator;
import semanticMarkup.ling.learn.auxiliary.StringAndInt;
import semanticMarkup.ling.learn.dataholder.DataHolder;
import semanticMarkup.ling.learn.dataholder.ModifierTableValue;
import semanticMarkup.ling.learn.dataholder.SentenceStructure;
import semanticMarkup.ling.learn.dataholder.WordPOSKey;
import semanticMarkup.ling.learn.dataholder.WordPOSValue;
import semanticMarkup.ling.learn.knowledge.AdditionalBootstrappingLearner;
import semanticMarkup.ling.learn.knowledge.AdjectiveSubjectBootstrappingLearner;
import semanticMarkup.ling.learn.knowledge.AdjectiveVerifier;
import semanticMarkup.ling.learn.knowledge.AndOrTagSetter;
import semanticMarkup.ling.learn.knowledge.AnnotationNormalizer;
import semanticMarkup.ling.learn.knowledge.CommaAsAndAnnotator;
import semanticMarkup.ling.learn.knowledge.CommonSubstructureAnnotator;
import semanticMarkup.ling.learn.knowledge.Constant;
import semanticMarkup.ling.learn.knowledge.CoreBootstrappingLearner;
import semanticMarkup.ling.learn.knowledge.DittoAnnotator;
import semanticMarkup.ling.learn.knowledge.FiniteSetsLoader;
import semanticMarkup.ling.learn.knowledge.HeuristicNounLearnerUseMorphology;
import semanticMarkup.ling.learn.knowledge.IgnorePatternAnnotator;
import semanticMarkup.ling.learn.knowledge.IgnoredFinalizer;
import semanticMarkup.ling.learn.knowledge.Initializer;
import semanticMarkup.ling.learn.knowledge.ModifierTagSeparator;
import semanticMarkup.ling.learn.knowledge.NMBResolver;
import semanticMarkup.ling.learn.knowledge.NullSentenceTagger;
import semanticMarkup.ling.learn.knowledge.POSBasedAnnotator;
import semanticMarkup.ling.learn.knowledge.PatternBasedAnnotator;
import semanticMarkup.ling.learn.knowledge.PhraseClauseAnnotator;
import semanticMarkup.ling.learn.knowledge.PronounCharactersAnnotator;
import semanticMarkup.ling.learn.knowledge.HeuristicNounLearnerUseSuffix;
import semanticMarkup.ling.learn.knowledge.UnknownWordBootstrappingLearner;
import semanticMarkup.ling.learn.utility.LearnerUtility;
import semanticMarkup.ling.learn.utility.StringUtility;
import semanticMarkup.ling.transform.ITokenizer;

public class Learner {
	private static final Set<String> NONS = null; // ??
	private Configuration myConfiguration;
	private ITokenizer myTokenizer;

	// Data holder
	private DataHolder myDataHolder;

	// Learner utility
	private LearnerUtility myLearnerUtility;

	// Class variables
	// Leading three words of sentences

	
	Map<String, Boolean> checkedModifiers;

	// Modules
	KnowledgeBase knowledgeBase;
	
	Initializer initializer;
	
	HeuristicNounLearnerUseMorphology heuristicNounLearnerUseMorphology;

	FiniteSetsLoader finiteSetsLoader;
	
	HeuristicNounLearnerUseSuffix heuristicNounLearnerUseSuffix;
	
	PatternBasedAnnotator patternBasedAnnotator; 
	
	IgnorePatternAnnotator ignorePatternAnnotator;
	
	CoreBootstrappingLearner coreBootstrappingLearner;
	
	AdditionalBootstrappingLearner additionalBootstrappingLearner;
	
	UnknownWordBootstrappingLearner unknownWordBootstrappingLearner;
	
	AdjectiveVerifier adjectiveVerifier;
	
	ModifierTagSeparator modifierTagSeparator;
	
	NMBResolver nMBResolver;
	
	AndOrTagSetter andOrTagSetter;
	
	AdjectiveSubjectBootstrappingLearner adjectiveSubjectBootstrappingLearner;

	POSBasedAnnotator posBasedAnnotator;
	
	PhraseClauseAnnotator phraseClauseAnnotator;
	
	DittoAnnotator dittoAnnotator;
	
	PronounCharactersAnnotator pronounCharactersAnnotator;
	
	IgnoredFinalizer ignoredFinalizer; 
	
	CommonSubstructureAnnotator commonSubstructureAnnotator;
	
	CommaAsAndAnnotator commaAsAndAnnotator;
	
	NullSentenceTagger nullSentenceTagger;
	
	AnnotationNormalizer annotationNormalizer; 
	


	public Learner(Configuration configuration, ITokenizer tokenizer,
			LearnerUtility learnerUtility) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("Learner");

		this.myConfiguration = configuration;
		this.myTokenizer = tokenizer;

		// Utilities
		this.myLearnerUtility = learnerUtility;

		// Data holder
		this.myDataHolder = new DataHolder(myConfiguration,
				myLearnerUtility.getConstant(), myLearnerUtility.getWordFormUtility());

		// Class variables

		this.checkedModifiers = new HashMap<String, Boolean>();

		myLogger.info("Created Learner");
		myLogger.info("\tLearning Mode: " + myConfiguration.getLearningMode());
		myLogger.info("\tMax Tag Lengthr: " + myConfiguration.getMaxTagLength());
		myLogger.info("\n");

		this.knowledgeBase = new KnowledgeBase();
		
		this.initializer = new Initializer(this.myLearnerUtility,
				this.myConfiguration.getNumLeadWords());
		this.heuristicNounLearnerUseMorphology = new HeuristicNounLearnerUseMorphology(this.myLearnerUtility);
		
		this.finiteSetsLoader = new FiniteSetsLoader(this.myLearnerUtility);
		
		this.heuristicNounLearnerUseSuffix = new HeuristicNounLearnerUseSuffix(this.myLearnerUtility);
		
		this.patternBasedAnnotator = new PatternBasedAnnotator();
		
		this.ignorePatternAnnotator = new IgnorePatternAnnotator();
		
		this.coreBootstrappingLearner = new CoreBootstrappingLearner(this.myLearnerUtility, this.myConfiguration);
		
		this.additionalBootstrappingLearner = new AdditionalBootstrappingLearner(this.myLearnerUtility, this.myConfiguration);
		
		this.unknownWordBootstrappingLearner = new UnknownWordBootstrappingLearner(
				this.myLearnerUtility);
		
		this.adjectiveVerifier = new AdjectiveVerifier(this.myLearnerUtility);
		
		this.modifierTagSeparator = new ModifierTagSeparator(this.myLearnerUtility);
		
		this.nMBResolver = new NMBResolver();
		
		this.andOrTagSetter = new AndOrTagSetter(this.myLearnerUtility);
		
		this.adjectiveSubjectBootstrappingLearner = new AdjectiveSubjectBootstrappingLearner(this.myLearnerUtility, this.myConfiguration.getLearningMode(), this.myConfiguration.getMaxTagLength());
		
		this.posBasedAnnotator = new POSBasedAnnotator(this.myLearnerUtility);
		
		this.phraseClauseAnnotator = new PhraseClauseAnnotator(this.myLearnerUtility);
		
		this.dittoAnnotator = new DittoAnnotator(this.myLearnerUtility);
		
		this.pronounCharactersAnnotator = new PronounCharactersAnnotator(this.myLearnerUtility);
		
		this.ignoredFinalizer = new IgnoredFinalizer();
		
		this.nullSentenceTagger = new NullSentenceTagger(this.myLearnerUtility, this.myConfiguration.getDefaultGeneralTag());
		
		this.commonSubstructureAnnotator = new CommonSubstructureAnnotator();
		
		this.commaAsAndAnnotator = new CommaAsAndAnnotator(this.myLearnerUtility);
		
		this.annotationNormalizer 
			= new AnnotationNormalizer(this.getConfiguration().getLearningMode(), 
					this.checkedModifiers, this.getLearnerUtility());
	}

	public DataHolder learn(List<Treatment> treatments, IGlossary glossary,
			String markupMode) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("Learn");
		myLogger.trace("Enter Learn");
		myLogger.trace(String.format("Learning Mode: %s",
				this.myConfiguration.getLearningMode()));

		this.knowledgeBase.importKnowledgeBase(this.myDataHolder, "kb", this.myLearnerUtility.getConstant());
		
		this.initializer.loadTreatments(treatments);
		this.initializer.run(myDataHolder);

		this.heuristicNounLearnerUseMorphology.run(this.myDataHolder);

		this.finiteSetsLoader.run(this.myDataHolder);

		this.heuristicNounLearnerUseSuffix.run(myDataHolder);
	
		// Set the certaintyU and certaintyL value of every entry in WordPOS collection to be 0
		this.resetCounts(myDataHolder);
		
		this.patternBasedAnnotator.run(myDataHolder);

		this.ignorePatternAnnotator.run(myDataHolder);

		
		this.coreBootstrappingLearner.setStatus("start");
		this.coreBootstrappingLearner.run(myDataHolder);
		
		this.coreBootstrappingLearner.setStatus("normal");
		this.coreBootstrappingLearner.run(myDataHolder);
		
		this.additionalBootstrappingLearner.run(myDataHolder);

		myLogger.info("Unknownword bootstrappings:");
		this.unknownWordBootstrappingLearner.run(myDataHolder);

		myLogger.info("Adjectives Verification:");
		this.adjectiveVerifier.run(myDataHolder);

		// For those sentences whose tag has a space between words, separate modifier and update the tag
		this.modifierTagSeparator.run(myDataHolder);
		
		// deal with words that plays N, and B roles
		this.nMBResolver.run(myDataHolder);

		// set and/or tags
		this.andOrTagSetter.run(myDataHolder);

		this.adjectiveSubjectBootstrappingLearner.run(myDataHolder);

		// set tags of sentences with "andor" tag to null
		this.resetAndOrTags(myDataHolder);

		this.getLearnerUtility().tagAllSentences(myDataHolder, "singletag",
				"sentence");

		this.posBasedAnnotator.run(myDataHolder);

		this.phraseClauseAnnotator.run(myDataHolder);

		this.dittoAnnotator.run(myDataHolder);

		this.pronounCharactersAnnotator.run(myDataHolder);
		
		this.ignoredFinalizer.run(myDataHolder);
		
		this.posBasedAnnotator.run(myDataHolder);

		// tag remaining sentences with null tags 
		this.nullSentenceTagger.run(myDataHolder);

		if (StringUtils.equals(this.myConfiguration.getLearningMode(), "adj")) {
			// Modify the sentences which are tagged with commons substructure
			this.commonSubstructureAnnotator.run(myDataHolder);

		}
		
		this.commaAsAndAnnotator.run(myDataHolder);
		
		this.annotationNormalizer.run(myDataHolder);
		
		this.prepareTables4Parser(myDataHolder);

		myDataHolder.writeToFile("dataholder", "");

		myLogger.info("Learning done!");

		return myDataHolder;
	}

	private void adjectiveSubjectBootstrappingLearner(DataHolder dataholderHandler,
			String learningMode) {
		if (StringUtils.equals(learningMode, "adj")) {
//			myLogger.info("Bootstrapping on adjective subjects");
			 adjectiveSubjectBootstrapping(myDataHolder); // !!!
		} else {
			int v = 0;
			do {
				v = 0;
				this.handleAndOr(myDataHolder); // !!!
			} while (v > 0);
		}
		
	}

	public void addGlossary(IGlossary glossary) {
		if (glossary != null) {
			String category = "struture";
			Set<String> pWords = glossary.getWords(category);
			Set<String> categories = new HashSet<String>();
			categories.add(category);
			Set<String> bWords = glossary.getWordsNotInCategories(categories);
			this.getDataHolder().addWords2WordPOSHolder(pWords, "p");
			this.getDataHolder().addWords2WordPOSHolder(bWords, "b");
		}
	}

//	private void addPredefinedWords() {
//		this.addStopWords();
//		this.addCharacters();
//		this.addNumbers();
//		this.addClusterStrings();
//		this.addProperNouns();
//	}

	/**
	 * 
	 * @return
	 */
	public DataHolder getDataHolder() {
		return this.myDataHolder;
	}

	/**
	 * 
	 */
	public void addHeuristicsNouns() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.addHeuristicsNouns");

		myLogger.trace("Enter addHeuristicsNouns");

		Set<String> nouns = this.learnHeuristicsNouns();
		myLogger.debug("Nouns learned from heuristics:");
		myLogger.debug("\t" + nouns.toString());
		myLogger.debug("Total: " + nouns.size());

		List<Set<String>> results = this.characterHeuristics();
		Set<String> rnouns = results.get(0);
		Set<String> descriptors = results.get(1);
		addDescriptors(descriptors);
		addNouns(rnouns);

		// this.myDataHolder.printHolder(DataHolder.SINGULAR_PLURAL);

		myLogger.debug("Total: " + nouns.size());
		Iterator<String> iter = nouns.iterator();
		myLogger.info("Learn singular-plural pair");
		while (iter.hasNext()) {
			String e = iter.next();
			myLogger.trace("Check Word: " + e);

			if ((e.matches("^.*\\w.*$"))
					&& (!StringUtility.isMatchedWords(e, "NUM|"
							+ this.myLearnerUtility.getConstant().NUMBER + "|" + this.myLearnerUtility.getConstant().CLUSTERSTRING
							+ "|" + this.myLearnerUtility.getConstant().CHARACTER + "|"
							+ this.myLearnerUtility.getConstant().PROPERNOUN))) {
				myLogger.trace("Pass");

				// same word may have two different pos tags
				String[] nounArray = e.split("\\|");
				for (int i = 0; i < nounArray.length; i++) {
					String nounAndPOS = nounArray[i];
					Pattern p = Pattern.compile("(\\w+)\\[([spn])\\]");
					Matcher m = p.matcher(nounAndPOS);
					if (m.lookingAt()) {
						String word = m.group(1);
						String pos = m.group(2);
						this.myDataHolder.updateDataHolder(word, pos, "*",
								"wordpos", 0);

						if (pos.equals("p")) {
							String plural = word;
							String singular = this.myLearnerUtility
									.getWordFormUtility().getSingular(plural);
							if (singular != null) {
								if (!singular.equals("")) {
									this.myDataHolder.addSingularPluralPair(
											singular, plural);
								}
							}
						}

						if (pos.equals("s")) {
							String singular = word;
							List<String> pluralList = this.myLearnerUtility
									.getWordFormUtility().getPlural(singular);
							Iterator<String> pluralIter = pluralList.iterator();
							while (pluralIter.hasNext()) {
								String plural = pluralIter.next();
								if (plural != null) {
									if (!plural.equals("")) {
										this.myDataHolder
												.addSingularPluralPair(
														singular, plural);
									}
								}
							}
						}
					}
				}
			}
		}

		myLogger.trace("Quite addHeuristicsNouns");
	}

	/**
	 * 
	 * @param descriptors
	 */
	public void addDescriptors(Set<String> descriptors) {
		Iterator<String> iter = descriptors.iterator();
		while (iter.hasNext()) {
			String descriptor = iter.next();

			if (!StringUtility.isMatchedWords(descriptor, this.myLearnerUtility.getConstant().FORBIDDEN)) {
				this.myDataHolder.updateDataHolder(descriptor, "b", "",
						"wordpos", 1);
			}
		}

	}

	/**
	 * 
	 * @param rnouns
	 */
	public void addNouns(Set<String> rnouns) {
		Iterator<String> iter = rnouns.iterator();
		while (iter.hasNext()) {
			String noun = iter.next();
			if (!StringUtility.isMatchedWords(noun, this.myLearnerUtility.getConstant().FORBIDDEN)) {
				this.myDataHolder.updateDataHolder(noun, "n", "", "wordpos", 1);
			}
		}
	}

	/**
	 * 
	 * @return nouns learned by heuristics
	 */
	public Set<String> learnHeuristicsNouns() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger
				.getLogger("learn.addHeuristicsNouns.learnHeuristicsNouns");

		// Set of words
		Set<String> words = new HashSet<String>();

		// Set of nouns
		Set<String> nouns = new HashSet<String>();

		List<String> sentences = new LinkedList<String>();
		for (int i = 0; i < this.myDataHolder.getSentenceHolder().size(); i++) {
			String originalSentence = this.myDataHolder.getSentenceHolder()
					.get(i).getOriginalSentence();
			myLogger.trace("Original Sentence: " + originalSentence);
			sentences.add(StringUtility.strip(originalSentence));
		}

		// Now we have original sentences in sentences
		// Method addWords
		for (int i = 0; i < sentences.size(); i++) {
			String sentence = sentences.get(i);
			sentence = sentence.toLowerCase();
			String noun = this.getPresentAbsentNouns(sentence);
			if (!noun.equals("")) {
				nouns.add(noun);
			}

			// add words
			List<String> tokens = this.myLearnerUtility.tokenizeText(sentence,
					"all");
			for (String token : tokens) {
				if (StringUtility.isWord(token)) {
					words.add(token);
					myLogger.trace("Add a word into words: " + token);
				}
			}
		}

		// solve the problem: septa and septum are both s
		Iterator<String> nounsIterator = nouns.iterator();
		while (nounsIterator.hasNext()) {
			String oldNoun = nounsIterator.next();
			String newNoun = this.getHeuristicsNounsHelper(oldNoun, nouns);
			if (!newNoun.equals(oldNoun)) {
				nouns.remove(oldNoun);
				nouns.add(newNoun);
			}
		}

		// sort all words
		Map<String, Set<String>> wordMap = new HashMap<String, Set<String>>();
		Iterator<String> wordsIterator = words.iterator();
		while (wordsIterator.hasNext()) {
			String word = wordsIterator.next();
			String root = myLearnerUtility.getWordFormUtility().getRoot(word);
			if (wordMap.containsKey(root)) {
				Set<String> wordList = wordMap.get(root);
				wordList.add(word);
				// List<String> wordList2 = wordMap.get(root);
				// System.out.println(wordList2);
			} else {
				Set<String> wordList = new HashSet<String>();
				wordList.add(word);
				wordMap.put(root, wordList);
			}
		}

		// print out the wordMap
		myLogger.trace("WordMap:");
		Iterator<Map.Entry<String, Set<String>>> wordMapIter = wordMap
				.entrySet().iterator();
		while (wordMapIter.hasNext()) {
			Map.Entry<String, Set<String>> e = wordMapIter.next();
			myLogger.trace(e.toString());
		}

		// find nouns
		myLogger.info("Learn singular-plural pair");
		Iterator<Map.Entry<String, Set<String>>> iter = wordMap.entrySet()
				.iterator();
		while (iter.hasNext()) {
			Map.Entry<String, Set<String>> e = iter.next();
			Set<String> wordSet = e.getValue();
			Iterator<String> wordIterator = wordSet.iterator();
			while (wordIterator.hasNext()) {
				String word = wordIterator.next();

				// getnouns
				if (word.matches("^.*" + Constant.NENDINGS)) {
					nouns.add(word + "[s]");
					if (wordSet.contains(word + "s")) {
						nouns.add(word + "s" + "[p]");
						this.myDataHolder.addSingularPluralPair(word, word
								+ "s");
					}
					if (wordSet.contains(word + "es")) {
						nouns.add(word + "es" + "[p]");
						this.myDataHolder.addSingularPluralPair(word, word
								+ "es");
					}
				}
			}
		}

		// Iterator<LinkedList> wordMapIterator = wordMap.i
		Iterator<Map.Entry<String, Set<String>>> wordMapIterator = wordMap
				.entrySet().iterator();
		while (wordMapIterator.hasNext()) {
			Map.Entry<String, Set<String>> wordMapEntry = wordMapIterator
					.next();
			Set<String> wordSet = wordMapEntry.getValue();

			// check if there is a word with Vending
			boolean hasVending = false;
			// for (int i1 = 0; i1 < wordList.size(); i1++) {
			Iterator<String> wordIterator = wordSet.iterator();
			while (wordIterator.hasNext()) {
				String tempWord = wordIterator.next();
				if (tempWord.matches("^.*" + Constant.VENDINGS)) {
					hasVending = true;
					break;
				}
			}

			// at least two words without verb endings
			if ((!hasVending) && (wordSet.size() > 1)) {
				List<String> wordList = new LinkedList<String>(wordSet);
				for (int i = 0; i < wordList.size(); i++) {
					for (int j = i + 1; j < wordList.size(); j++) {
						String word1 = wordList.get(i);
						String word2 = wordList.get(j);
						List<String> pair = myLearnerUtility
								.getWordFormUtility().getSingularPluralPair(
										word1, word2);
						if (pair.size() == 2) {
							String singular = pair.get(0);
							String plural = pair.get(1);
							nouns.add(singular + "[s]");
							nouns.add(plural + "[p]");
							this.myDataHolder.addSingularPluralPair(singular,
									plural);
						}
					}
				}
			}
		}

		// print out nouns
		myLogger.debug("Nouns: " + nouns);

		return nouns;
	}

	// ---------------addHeuristicsNouns Help Function----
	// #solve the problem: septa and septum are both s
	// septum - Singular
	// septa -Plural
	// septa[s] => septa[p]
	public String getHeuristicsNounsHelper(String oldNoun, Set<String> words) {
		String newNoun = oldNoun;

		if (oldNoun.matches("^.*a\\[s\\]$")) {
			String noun = oldNoun.replaceAll("\\[s\\]", "");
			if (words.contains(noun)) {
				newNoun = noun + "[p]";
			}
		}

		return newNoun;
	}

	/**
	 * any word preceeding "present"/"absent" would be a n
	 * 
	 * @param text
	 *            the content to learn from
	 * @return nouns learned
	 */
	public String getPresentAbsentNouns(String text) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger
				.getLogger("learn.addHeuristicsNouns.learnHeuristicsNouns.getPresentAbsentNouns");

		String pachecked = "and|or|to";

		// if (text.matches("(\\w+?)\\s+(present|absent)")) {
		// System.out.println(text);
		// }

		Matcher matcher = Pattern.compile("^.*?(\\w+?)\\s+(present|absent).*$")
				.matcher(text);
		if (matcher.lookingAt()) {
			String word = matcher.group(1);
			if ((!word.matches("\\b(" + pachecked + ")\\b"))
					&& (!word.matches("\\b(" + this.myLearnerUtility.getConstant().STOP + ")\\b"))
					&& (!word
							.matches("\\b(always|often|seldom|sometimes|[a-z]+ly)\\b"))) {

				myLogger.trace("present/absent " + word);

				if (((word.matches("^.*" + Constant.PENDINGS))
						|| (word.matches("^.*[^s]s$")) || (word
							.matches("teeth")))
						&& (!word.matches(Constant.SENDINGS))) {
					return word + "[p]";
				} else {
					return word + "[s]";
				}
			}
		}

		return "";
	}

	/**
	 * Discover nouns and descriptors according to a set of rules
	 * 
	 * @return a linked list, whose first element is a set of nouns, and second
	 *         element is a set of descriptors
	 */
	public List<Set<String>> characterHeuristics() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger
				.getLogger("learn.addHeuristicsNouns.characterHeuristics");

		Set<String> taxonNames = new HashSet<String>();
		Set<String> nouns = new HashSet<String>();
		Set<String> anouns = new HashSet<String>();
		Set<String> pnouns = new HashSet<String>();
		Set<String> descriptors = new HashSet<String>();
		Map<String, Boolean> descriptorMap = new HashMap<String, Boolean>();

		int sent_num = this.myDataHolder.getSentenceHolder().size();
		for (int i = 0; i < sent_num; i++) {

			// taxon rule
			SentenceStructure sent = this.myDataHolder.getSentenceHolder().get(
					i);
			String source = sent.getSource();
			String sentence = sent.getSentence();
			String originalSentence = sent.getOriginalSentence();

			myLogger.trace("Source: " + source);
			myLogger.trace("Sentence: " + sentence);
			myLogger.trace("Original Sentence: " + originalSentence);

			originalSentence = StringUtility.trimString(originalSentence);

			// noun rule 0: taxon names
			taxonNames = this.getTaxonNameNouns(originalSentence);

			// $sentence =~ s#<\s*/?\s*i\s*>##g;
			// $originalsent =~ s#<\s*/?\s*i\s*>##g;

			sentence = sentence.replaceAll("<\\s*/?\\s*i\\s*>", "");
			originalSentence = originalSentence.replaceAll("<\\s*/?\\s*i\\s*>",
					"");
			// Update getSentenceHolder()
			this.myDataHolder.getSentenceHolder().get(i).setSentence(sentence);

			// noun rule 0.5: Meckle#s cartilage

			Set<String> nouns0 = this
					.getNounsMecklesCartilage(originalSentence);
			nouns.addAll(nouns0);
			sentence = sentence.replaceAll("#", "");
			// Update getSentenceHolder()
			this.myDataHolder.getSentenceHolder().get(i).setSentence(sentence);

			// noun rule 2: end of sentence nouns
			// (a|an|the|some|any|this|that|those|these) noun$
			Set<String> nouns2 = this.getNounsRule2(originalSentence);
			nouns.addAll(nouns2);

			// noun rule 3: proper nouns and acronyms
			String copy = originalSentence;
			Set<String> nouns_temp = this.getNounsRule3Helper(copy);
			Iterator<String> iter = nouns_temp.iterator();
			while (iter.hasNext()) {
				String token = iter.next();
				if (token.matches("^.*[A-Z].+$")
						&& (!token.matches("^.*-\\w+ed$"))) {
					if (token.matches("^[A-Z0-9]+$")) {
						token = token.toLowerCase();
						anouns.add(token);
					} else {
						token = token.toLowerCase();
						pnouns.add(token);
					}
					nouns.add(token);
				}
			}

			// noun rule 1: sources with 1 _ are character statements, 2 _ are
			// descriptions
			Set<String> nouns1 = getNounsRule1(source, originalSentence,
					descriptorMap);
			nouns.addAll(nouns1);

			// noun rule 4: non-stop/prep followed by a number: epibranchial 4
			// descriptor heuristics
			Set<String> nouns4 = this.getNounsRule4(originalSentence);
			nouns.addAll(nouns4);

			// remove puncts for descriptor rules
			originalSentence = StringUtility.removePunctuation(
					originalSentence, "-");
			// System.out.println("oSent:");
			// System.out.println(originalSentence);

			// Descriptor rule 1: single term descriptions are descriptors
			descriptors.addAll(this.getDescriptorsRule1(source,
					originalSentence, nouns));

			// Descriptor rule 2: (is|are) red: isDescriptor
			descriptors.addAll(this.getDescriptorsRule2(originalSentence,
					descriptorMap));
		}

		nouns = this.filterOutDescriptors(nouns, descriptors);
		anouns = this.filterOutDescriptors(anouns, descriptors);
		pnouns = this.filterOutDescriptors(pnouns, descriptors);

		this.getDataHolder().add2HeuristicNounTable(nouns, "organ");
		this.getDataHolder().add2HeuristicNounTable(anouns, "acronyms");
		this.getDataHolder().add2HeuristicNounTable(pnouns, "propernouns");
		this.getDataHolder().add2HeuristicNounTable(taxonNames, "taxonnames");

		nouns.addAll(anouns);
		nouns.addAll(pnouns);
		nouns.addAll(taxonNames);

		List<Set<String>> results = new LinkedList<Set<String>>();
		results.add(nouns);
		results.add(descriptors);

		return results;
	}

	/**
	 * filter out descriptors from nouns, and return remaining nouns
	 * 
	 * @param rNouns
	 *            set of nouns
	 * @param rDescriptors
	 *            set of descriptors
	 * @return set of nouns that are not descriptors
	 */
	public Set<String> filterOutDescriptors(Set<String> rNouns,
			Set<String> rDescriptors) {
		Set<String> filtedNouns = new HashSet<String>();

		Iterator<String> iter = rNouns.iterator();
		while (iter.hasNext()) {
			String noun = iter.next();
			noun = noun.toLowerCase();

			Pattern p = Pattern.compile("\\b(" + this.myLearnerUtility.getConstant().PREPOSITION + "|"
					+ this.myLearnerUtility.getConstant().STOP + ")\\b", Pattern.CASE_INSENSITIVE);
			Matcher m = p.matcher(noun);

			if ((!m.lookingAt()) && (!rDescriptors.contains(noun))) {
				filtedNouns.add(noun);
			}
		}
		return filtedNouns;
	}

	/**
	 * Nouns rule 0: get <i></i> enclosed taxon names
	 * 
	 * @param oSent
	 * @return
	 */
	public Set<String> getTaxonNameNouns(String oSent) {
		Set<String> taxonNames = new HashSet<String>();
		String regex = "(.*?)<\\s*i\\s*>\\s*([^<]*)\\s*<\\s*\\/\\s*i\\s*>(.*)";
		String copy = oSent;

		while (true) {
			Matcher matcher = Pattern.compile(regex).matcher(copy);
			if (matcher.lookingAt()) {
				String taxonName = matcher.group(2);
				if (taxonName.length() > 0) {
					taxonNames.add(taxonName);
					String[] taxonNameArray = taxonName.split("\\s+");
					for (int i = 0; i < taxonNameArray.length; i++) {
						taxonNames.add(taxonNameArray[i]);
					}
					copy = matcher.group(3);
				} else {
					break;
				}
			} else {
				break;
			}
		}

		return taxonNames;
	}

	/**
	 * Nouns rule 0.5: Meckle#s cartilage
	 * 
	 * @param oSent
	 * @return
	 */
	public Set<String> getNounsMecklesCartilage(String oSent) {
		Set<String> nouns = new HashSet<String>();
		String regex = "^.*\\b(\\w+#s)\\b.*$";
		Matcher m = Pattern.compile(regex).matcher(oSent);
		if (m.lookingAt()) {
			String noun = "";
			noun = m.group(1);

			noun = noun.toLowerCase();
			nouns.add(noun);

			noun = noun.replaceAll("#", "");
			nouns.add(noun);

			noun = noun.replaceAll("s$", "");
			nouns.add(noun);
		}

		return nouns;
	}

	/**
	 * 
	 * @param source
	 * @param originalSentence
	 * @param descriptorMap
	 * @return
	 */
	public Set<String> getNounsRule1(String source, String originalSentence,
			Map<String, Boolean> descriptorMap) {
		Set<String> nouns = new HashSet<String>();

		if ((!(source.matches("^.*\\.xml_\\S+_.*$")))
				&& (!(originalSentence.matches("^.*\\s.*$")))) {
			if (!this.isDescriptor(originalSentence, descriptorMap)) {
				originalSentence = originalSentence.toLowerCase();
				nouns.add(originalSentence);
			}
		}

		return nouns;
	}

	/**
	 * 
	 * @param oSent
	 * @return
	 */
	public Set<String> getNounsRule2(String oSent) {
		String copy = oSent;
		String regex = "(.*?)\\b(a|an|the|some|any|this|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth) +(\\w+)\\s*($|\\(|\\[|\\{|\\b"
				+ this.myLearnerUtility.getConstant().PREPOSITION + "\\b)(.*)";
		Set<String> nouns = new HashSet<String>();

		while (true) {
			if (copy == null) {
				break;
			}
			Matcher m = Pattern.compile(regex).matcher(copy);
			if (m.lookingAt()) {
				String t = m.group(3);
				String prep = m.group(4);
				copy = m.group(5);

				if (prep.matches("^.*\\w.*$")
						&& t.matches("^.*\\b(length|width|presence|\\w+tion)\\b.*$")) {
					continue;
				}
				t = t.toLowerCase();
				nouns.add(t);
			} else {
				break;
			}
		}

		return nouns;
	}

	/**
	 * 
	 * @param sentence
	 * @return
	 */
	public Set<String> getNounsRule3Helper(String sentence) {
		Set<String> nouns = new HashSet<String>();

		String[] segs = sentence.split("[()\\[\\]\\{\\}]");
		for (int i1 = 0; i1 < segs.length; i1++) {
			String seg = segs[i1];
			seg = StringUtility.removePunctuation(seg, "-");
			String[] tokens = seg.split("\\s+");

			// #ignore the first word in character statements--this is normally
			// capitalized
			for (int j = 1; j < tokens.length; j++) {
				String token = tokens[j];
				if (token.matches("^.*[A-Z].+$")
						&& (!token.matches("^.*-\\w+ed$"))) {
					nouns.add(token);
				}
			}
		}

		return nouns;
	}

	/**
	 * noun rule 4: non-stop/prep followed by a number: epibranchial 4
	 * descriptor heuristics
	 * 
	 * @param oSent
	 * @return a set of nouns
	 */
	public Set<String> getNounsRule4(String oSent) {
		Set<String> nouns = new HashSet<String>();

		String copy = oSent;
		String regex = "(.*?)\\s(\\w+)\\s+\\d+(.*)";

		while (true) {
			if (copy == null) {
				break;
			}
			Matcher m = Pattern.compile(regex).matcher(copy);
			if (m.lookingAt()) {
				String t = m.group(2);
				copy = m.group(3);
				String regex2 = "\\b(" + this.myLearnerUtility.getConstant().PREPOSITION + "|"
						+ this.myLearnerUtility.getConstant().STOP + ")\\b";
				if (!t.matches(regex2)) {
					t = t.toLowerCase();
					nouns.add(t);
				}
			} else {
				break;
			}
		}

		return nouns;
	}

	/**
	 * 
	 * @param source
	 * @param sentence
	 * @param nouns
	 * @return
	 */
	public Set<String> getDescriptorsRule1(String source, String sentence,
			Set<String> nouns) {
		Set<String> descriptors = new HashSet<String>();
		// single word
		if (source.matches("^.*\\.xml_\\S+_.*$")
				&& (!sentence.matches("^.*\\s.*$"))) {
			Iterator<String> iter = nouns.iterator();
			boolean isExist = false;
			while (iter.hasNext()) {
				String noun = iter.next();
				if (noun.equals(sentence)) {
					isExist = true;
					break;
				}
			}
			if (isExist == false) {
				sentence = sentence.toLowerCase();
				descriptors.add(sentence);
			}
		}

		return descriptors;
	}

	/**
	 * (is|are) red: isDescriptor
	 * 
	 * @param oSent
	 * @return
	 */
	public Set<String> getDescriptorsRule2(String sentence,
			Map<String, Boolean> descriptorMap) {
		Set<String> descriptors = new HashSet<String>();

		String[] tokens = sentence.split("\\s+");

		for (int i = 0; i < tokens.length; i++) {
			String token = tokens[i];
			token = token.toLowerCase();
			if (isDescriptor(token, descriptorMap)) {
				token = token.toLowerCase();
				descriptors.add(token);
			}
		}

		return descriptors;
	}

	/**
	 * Check if the term is a descriptor
	 * 
	 * @param term
	 * @param descriptorMap
	 *            descriptors have already learned
	 * @return a boolean value indicating whether the term is a descriptor. This
	 *         result will be stored in the descriptorMap for future use
	 */
	public boolean isDescriptor(String term, Map<String, Boolean> descriptorMap) {
		if (descriptorMap.containsKey(term)) {
			if (descriptorMap.get(term).booleanValue()) {
				return true;
			} else {
				return false;
			}
		} else {
			for (int i = 0; i < this.myDataHolder.getSentenceHolder().size(); i++) {
				String originalSentence = this.myDataHolder.getSentenceHolder()
						.get(i).getOriginalSentence();
				if (isMatched(originalSentence, term, descriptorMap)) {
					return true;
				}
			}
			term = term.toLowerCase();
			descriptorMap.put(term, false);
			return false;
		}

	}

	/**
	 * Check if the term matches the sentence
	 * 
	 * @param sentence
	 * @param term
	 * @param descriptorMap
	 * @return a boolean value indicating whether the term matches the sentence
	 */
	public boolean isMatched(String sentence, String term,
			Map<String, Boolean> descriptorMap) {
		if (sentence.matches("^.*" + " (is|are|was|were|be|being) " + term
				+ ".*$")) {
			term = term.toLowerCase();
			descriptorMap.put(term, true);
			return true;
		} else {
			return false;
		}
	}

	/**
	public void addStopWords() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.addStopWords");
		myLogger.trace("Add stop words");

		List<String> stops = new ArrayList<String>();
		stops.addAll(Arrays.asList(this.myLearnerUtility.getConstant().STOP.split("\\|")));
		stops.addAll(Arrays.asList(new String[] { "NUM", "(", "[", "{", ")",
				"]", "}", "d+" }));

		myLogger.trace("Stop Words: " + stops);
		for (int i = 0; i < stops.size(); i++) {
			String word = stops.get(i);
			if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
				continue;
			}
			this.myDataHolder.updateDataHolder(word, "b", "*", "wordpos", 0);
			myLogger.trace(String.format(
					"(\"%s\", \"b\", \"*\", \"wordpos\", 0) added\n", word));
			// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
			// WordPOSValue("*", 0, 0, null, null));
			// System.out.println("Add Stop Word: " + word+"\n");
		}
		myLogger.trace("Quite\n");
	}

	public void addCharacters() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.addCharacters");
		myLogger.trace("Add characters");

		List<String> chars = new ArrayList<String>();
		chars.addAll(Arrays.asList(this.myLearnerUtility.getConstant().CHARACTER.split("\\|")));
		//
		// System.out.println(chars);
		// System.out.println(this.myLearnerUtility.getConstant().CHARACTER);

		for (int i = 0; i < chars.size(); i++) {
			String word = chars.get(i);
			// String reg="\\b("+this.myLearnerUtility.getConstant().FORBIDDEN+")\\b";
			// boolean f = word.matches(reg);
			if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
				continue;
			}
			this.myDataHolder.updateDataHolder(word, "b", "*", "wordpos", 0);
			// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
			// WordPOSValue("", 0, 0, null, null));
			// System.out.println("addCharacter word: " + word);
		}
	}

	public void addNumbers() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.addNumbers");
		myLogger.trace("Add numbers");

		List<String> nums = new ArrayList<String>();
		nums.addAll(Arrays.asList(this.myLearnerUtility.getConstant().NUMBER.split("\\|")));

		// System.out.println(nums);
		// System.out.println(this.myLearnerUtility.getConstant().NUMBER);

		for (int i = 0; i < nums.size(); i++) {
			String word = nums.get(i);
			// String reg="\\b("+this.myLearnerUtility.getConstant().FORBIDDEN+")\\b";
			// boolean f = word.matches(reg);
			if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
				continue;
			}
			this.myDataHolder.updateDataHolder(word, "b", "*", "wordpos", 0);
			// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
			// WordPOSValue("*", 0, 0, null, null));
			// System.out.println("add Number: " + word);
		}
		this.myDataHolder.updateDataHolder("NUM", "b", "*", "wordpos", 0);
		// this.getWordPOSHolder().put(new WordPOSKey("NUM", "b"), new
		// WordPOSValue("*",0, 0, null, null));
	}

	public void addClusterStrings() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.addClusterstrings");
		myLogger.trace("Add clusterstrings");

		List<String> cltstrs = new ArrayList<String>();
		cltstrs.addAll(Arrays.asList(this.myLearnerUtility.getConstant().CLUSTERSTRING.split("\\|")));

		// System.out.println(cltstrs);
		// System.out.println(this.myLearnerUtility.getConstant().CLUSTERSTRING);

		for (int i = 0; i < cltstrs.size(); i++) {
			String word = cltstrs.get(i);
			if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
				continue;
			}
			this.myDataHolder.updateDataHolder(word, "b", "*", "wordpos", 0);
			// this.getWordPOSHolder().put(new WordPOSKey(word, "b"), new
			// WordPOSValue("*", 1, 1, null, null));
			// System.out.println("addClusterString: " + word);
		}
	}

	public void addProperNouns() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.addProperNouns");
		myLogger.trace("Add proper nouns");

		List<String> ppnouns = new ArrayList<String>();
		ppnouns.addAll(Arrays.asList(Constant.PROPERNOUN.split("\\|")));

		for (int i = 0; i < ppnouns.size(); i++) {
			String word = ppnouns.get(i);
			if (word.matches("\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
				continue;
			}
			this.myDataHolder.updateDataHolder(word, "b", "*", "wordpos", 0);
			// this.getWordPOSHolder().put(new WordPOSKey(word, "z"), new
			// WordPOSValue("*", 0, 0, null, null));
			// System.out.println("Add ProperNoun: " + word);
		}
	}
	**/

	// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	// suffix: -fid(adj), -form (adj), -ish(adj), -less(adj), -like (adj)),
	// -merous(adj), -most(adj), -shaped(adj), -ous(adj)
	// -ly (adv), -er (advj), -est (advj),
	// foreach unknownword in unknownwords table
	// seperate root and suffix
	// if root is a word in WN or in unknownwords table
	// make the unknowword a "b" boundary

	/**
	 * for each unknown word in unknownwords table seperate root and suffix if
	 * root is a word in WN or in unknownwords table make the unknowword a "b"
	 * boundary
	 * 
	 * suffix: -fid(adj), -form (adj), -ish(adj), -less(adj), -like (adj)),
	 * -merous(adj), -most(adj), -shaped(adj), -ous(adj)
	 */
	public void posBySuffix() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.posBySuffix");
		myLogger.trace("Enter posBySuffix");

		Iterator<Map.Entry<String, String>> iterator = this.myDataHolder
				.getUnknownWordHolder().entrySet().iterator();

		while (iterator.hasNext()) {
			Map.Entry<String, String> unknownWordEntry = iterator.next();
			String unknownWord = unknownWordEntry.getKey();
			String unknownWordTag = unknownWordEntry.getValue();

			if (unknownWordTag.equals("unknown")) {
				// boolean flag1 =
				posBySuffixCase1Helper(unknownWord);
				// boolean flag2 =
				posBySuffixCase2Helper(unknownWord);
			}
		}

		myLogger.trace("Quite posBySuffix");
	}

	/**
	 * Set the certaintyU and certaintyL value of every entry in WordPOS
	 * collection to be 0
	 * 
	 * @param dh
	 *            DataHolder handler to update the dataholder and return the
	 *            updated dataholder
	 * @return Number of records that have been changed
	 */
	public int resetCounts(DataHolder dh) {
		int count = 0;
		Iterator<Entry<WordPOSKey, WordPOSValue>> iter = dh
				.getWordPOSHolderIterator();
		while (iter.hasNext()) {
			Entry<WordPOSKey, WordPOSValue> wordPOSObject = iter.next();
			wordPOSObject.getValue().setCertiantyU(0);
			wordPOSObject.getValue().setCertiantyL(0);
			count++;
		}

		return count;
	}

	public boolean posBySuffixCase1Helper(String unknownWord) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.posBySuffix");

		String pattern1 = "^[a-z_]+(" + Constant.SUFFIX + ")$";
		myLogger.debug("Pattern1: " + pattern1);

		if (unknownWord.matches(pattern1)) {
			Matcher matcher = Pattern
					.compile("(.*?)(" + Constant.SUFFIX + ")$").matcher(
							unknownWord);
			if ((unknownWord.matches("^[a-zA-Z0-9_-]+$")) && matcher.matches()) {
				myLogger.debug("posBySuffix - check word: " + unknownWord);
				String base = matcher.group(1);
				String suffix = matcher.group(2);
				if (this.containSuffix(unknownWord, base, suffix)) {
					myLogger.debug("Pass\n");
					this.myDataHolder.updateDataHolder(unknownWord, "b", "*",
							"wordpos", 0);
					myLogger.debug("posBySuffix - set word: " + unknownWord);
					return true;
				} else {
					myLogger.debug("Not Pass\n");
				}
			}
		}
		return false;
	}

	public boolean posBySuffixCase2Helper(String unknownWord) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.posBySuffix");

		String pattern2 = "^[._.][a-z]+"; // , _nerved
		myLogger.debug("Pattern2: " + pattern2);

		if (unknownWord.matches(pattern2)) {
			this.myDataHolder.getWordPOSHolder().put(
					new WordPOSKey(unknownWord, "b"),
					new WordPOSValue("*", 0, 0, null, null));
			myLogger.debug("posbysuffix set " + unknownWord
					+ " a boundary word\n");
			return true;
		}

		return false;
	}

	/**
	 * return false or true depending on if the word contains the suffix as the
	 * suffix
	 * 
	 * @param word
	 * @param base
	 * @param suffix
	 * @return
	 */
	public boolean containSuffix(String word, String base, String suffix) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.posBySuffix.containSuffix");
		myLogger.trace("Enter containSuffix");

		boolean flag = false; // return value
		boolean wordInWN = false; // if this word is in WordNet
		boolean baseInWN = false;
		WordNetPOSKnowledgeBase myWN = this.myLearnerUtility
				.getWordNetPOSKnowledgeBase();

		// check base
		if (base.length() == 0) {
			myLogger.trace("case 0");
			return true;
		}

		base.replaceAll("_", ""); // cup_shaped

		if (myWN.contains(word)) {
			myLogger.trace("case 1.1");
			wordInWN = true; // word is in WordNet
		} else {
			myLogger.trace("case 1.2");
			wordInWN = false;
		}

		if (myWN.contains(base)) {
			myLogger.trace("case 2.1");
			baseInWN = true;
		} else {
			myLogger.trace("case 2.2");
			baseInWN = false;
		}

		// if WN pos is adv, return 1: e.g. ly, or if $base is in
		// unknownwords table
		if (suffix.equals("ly")) {
			myLogger.trace("case 3.1");
			if (wordInWN) {
				if (myWN.isAdverb(word)) {
					return true;
				}
			}
			// if the word is in unknown word set, return true
			if (this.myDataHolder.getUnknownWordHolder().containsKey(base)) {
				return true;
			}
		}

		// if WN recognize superlative, comparative adjs, return 1: e.g. er, est
		else if (suffix.equals("er") || suffix.equals("est")) {
			myLogger.trace("case 3.2");
			if (wordInWN) {
				boolean case1 = !myWN.isAdjective(word);
				boolean case2 = myWN.isAdjective(base);
				if (case1 && case2) {
					return true;
				} else {
					return false;
				}
			}
		}

		// if $base is in WN or unknownwords table, or if $word has sole pos
		// adj in WN, return 1: e.g. scalelike
		else {
			myLogger.trace("case 3.3");
			if (myWN.isSoleAdjective(word)) {
				return true;
			}
			if (baseInWN) {
				return true;
			}
			if (this.myDataHolder.getUnknownWordHolder().containsKey(base)) {
				return true;
			}
		}

		return flag;
	}

	public void markupByPattern() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.markupByPattern");
		myLogger.trace("Enter markupByPattern");

		int size = this.myDataHolder.getSentenceHolder().size();

		for (int i = 0; i < size; i++) {
			boolean flag = markupByPatternHelper(this.myDataHolder
					.getSentenceHolder().get(i));
			if (flag) {
				myLogger.debug("Updated Sentence #" + i);
			}
		}
		myLogger.trace("Quite markupByPattern");
	}

	public boolean markupByPatternHelper(SentenceStructure sentence) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("markupByPattern");
		// case 1
		if (sentence.getOriginalSentence().matches("^x=.*")) {
			myLogger.trace("Case 1");
			sentence.setTag("chromosome");
			sentence.setModifier("");
			return true;
		}
		// case 2
		else if (sentence.getOriginalSentence().matches("^2n=.*")) {
			myLogger.trace("Case 2");
			sentence.setTag("chromosome");
			sentence.setModifier("");
			return true;
		}
		// case 3
		else if (sentence.getOriginalSentence().matches("^x .*")) {
			myLogger.trace("Case 3");
			sentence.setTag("chromosome");
			sentence.setModifier("");
			return true;
		}
		// case 4
		else if (sentence.getOriginalSentence().matches("^2n .*")) {
			myLogger.trace("Case 4");
			sentence.setTag("chromosome");
			sentence.setModifier("");
			return true;
		}
		// case 5
		else if (sentence.getOriginalSentence().matches("^2 n.*")) {
			myLogger.trace("Case 5");
			sentence.setTag("chromosome");
			sentence.setModifier("");
			return true;
		}
		// case 6
		else if (sentence.getOriginalSentence().matches("^fl.*")) {
			myLogger.trace("Case 6");
			sentence.setTag("flowerTime");
			sentence.setModifier("");
			return true;
		}
		// case 7
		else if (sentence.getOriginalSentence().matches("^fr.*")) {
			myLogger.trace("Case 7");
			sentence.setTag("fruitTime");
			sentence.setModifier("");
			return true;
		}
		return false;
	}

	// private String IGNOREPTN ="(IGNOREPTN)"; //disabled
	public void markupIgnore() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.markupIgnore");
		myLogger.trace("Enter markupIgnore");

		for (int i = 0; i < this.myDataHolder.getSentenceHolder().size(); i++) {
			boolean flag = markupIgnoreHelper(this.myDataHolder
					.getSentenceHolder().get(i));
			if (flag) {
				myLogger.debug("Updated Sentence #" + i);
			}
		}

		myLogger.trace("Quite markupIgnore");
	}

	public boolean markupIgnoreHelper(SentenceStructure sentence) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("markupIgnore");

		String thisOriginalSentence = sentence.getOriginalSentence();
		String pattern = "(^|^ )" + Constant.IGNORE_PATTERN + ".*$";
		if (thisOriginalSentence.matches(pattern)) {
			sentence.setTag("ignore");
			sentence.setModifier("");
			myLogger.trace("Set Tag to \"ignore\", Modifier to \"\"");

			return true;
		}

		return false;
	}



	/**
	 * A helper of method discover(). Check if the tag of the i-th sentence is
	 * NOT null
	 * 
	 * @param sentence
	 *            the sentence to check
	 * @return if the tag of the i-th sentence is NOT null, returns true;
	 *         otherwise returns false
	 */
	public boolean isMarked(SentenceStructure sentence) {
		String thisTag = sentence.getTag();

		if (thisTag != null) {
			return true;
		} else {
			return false;
		}
	}



	/**
	 * Find the IDs of the sentences that matches the pattern
	 * 
	 * @param pattern
	 * @param status
	 * @param hasTag
	 * @return a set of sentence IDs of the sentences that matches the pattern
	 */
	public Set<Integer> matchPattern(String pattern, String status,
			boolean hasTag) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.discover.matchPattern");

		myLogger.trace("Enter matchPattern");
		myLogger.trace("Pattern: " + pattern);
		myLogger.trace("Status: " + status);
		myLogger.trace("HasTag: " + hasTag);

		Set<Integer> matchedIDs = new HashSet<Integer>();

		for (int i = 0; i < this.myDataHolder.getSentenceHolder().size(); i++) {
			SentenceStructure sent = this.myDataHolder.getSentenceHolder().get(
					i);
			String thisSentence = sent.getSentence();
			String thisStatus = sent.getStatus();
			String thisTag = sent.getTag();

			boolean a = hasTag;
			boolean b = (thisTag == null);

			if ((a ^ b) && (StringUtils.equals(status, thisStatus))) {
				Pattern p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
				Matcher m = p.matcher(thisSentence);
				if (m.lookingAt()) {
					myLogger.debug("Push Sentence #" + i);
					myLogger.debug("Sentence: " + thisSentence);
					myLogger.debug("Status: " + thisStatus);
					myLogger.debug("Tag: " + thisTag);
					myLogger.debug("\n");

					matchedIDs.add(i);
				}
			}
		}

		myLogger.trace("Return IDs: " + matchedIDs);
		myLogger.trace("Quite matchPattern");
		myLogger.trace("\n");
		return matchedIDs;
	}

	/**
	 * return a positive number if anything new is learnt from @source sentences
	 * by applying rules and clues to grow %NOUNS and %BDRY and to confirm tags
	 * create and maintain decision tables
	 * 
	 * @param matched
	 * @return
	 */
	public int ruleBasedLearn(Set<Integer> matched) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.discover.ruleBasedLearn");

		myLogger.trace("Enter ruleBasedLearn");
		myLogger.trace("Matched IDs: " + matched);

		int sign = 0;

		Iterator<Integer> iter = matched.iterator();
		while (iter.hasNext()) {
			int sentID = iter.next().intValue();
			SentenceStructure sentence = this.myDataHolder.getSentenceHolder()
					.get(sentID);
			if (!isMarked(sentence)) {
				StringAndInt tagAndNew = null;
				String tag = null;
				int numNew = 0;

				tagAndNew = this.myLearnerUtility.learnTerms(this.myDataHolder, sentID);
				tag = tagAndNew.getString();
				numNew = tagAndNew.getInt();

				this.myLearnerUtility.tagSentence(this.myDataHolder, this.myConfiguration.getMaxTagLength(), sentID, tag);
				sign = sign + numNew;
			}
		}

		myLogger.trace("Return: " + sign);
		myLogger.trace("Quit ruleBaseLearn");
		myLogger.trace("\n");

		return sign;
	}










	/**
	 * 
	 */
	public void additionalBootstrapping() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.additionalBootStrapping");
		myLogger.trace("[additionalBootStrapping]Start");

		// this.myDataHolder.printHolder(DataHolder.SENTENCE);

		int flag = 0;

		do {
			myLogger.trace(String.format("Enter one do-while loop iteration"));
			flag = 0;

			// warmup markup
			int cmReturn = wrapupMarkup();
			myLogger.trace(String
					.format("wrapupMarkup() returned %d", cmReturn));
			flag += cmReturn;

			// one lead word markup
			Set<String> tags = myDataHolder.getCurrentTags();
			myLogger.trace(tags.toString());
			int omReturn = oneLeadWordMarkup(tags);
			myLogger.trace(String.format("oneLeadWordMarkup() returned %d",
					omReturn));
			flag += omReturn;

			// doit markup
			int dmReturn = this.myLearnerUtility.doItMarkup(this.myDataHolder, this.myConfiguration.getMaxTagLength());
			myLogger.trace(String.format("doItMarkup() returned %d", dmReturn));
			flag += dmReturn;

			myLogger.trace(String.format("Quite this iteration with flag = %d",
					flag));
		} while (flag > 0);

		myLogger.trace("[additionalBootStrapping]End");
	}

	/**
	 * In the sentence collections, search for such sentence, whose lead is
	 * among the tags passed in, and add the lead into word POS collections as a
	 * noun
	 * 
	 * @param tags
	 *            a set of all tags in the tagged sentences in the sentence
	 *            collection
	 * @return the numbet of updates made
	 */
	public int oneLeadWordMarkup(Set<String> tags) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger
				.getLogger("learn.additionalBootStrapping.oneLeadWordMarkup");
		// String tags = StringUtility.joinList("|", tags);
		int sign = 0;
		myLogger.trace(String.format("Enter (%s)", tags));

		Iterator<SentenceStructure> iter = this.myDataHolder
				.getSentenceHolder().iterator();

		while (iter.hasNext()) {
			SentenceStructure sentence = iter.next();
			int ID = sentence.getID();
			String tag = sentence.getTag();
			String lead = sentence.getLead();

			if ((tag == null)
					&& (!(StringUtility.createMatcher(lead, ".* .*").find()))) {
				if (tags.contains(lead)) {
					this.myLearnerUtility.tagSentence(this.myDataHolder, this.myConfiguration.getMaxTagLength(), ID, lead);
					myLogger.trace(String.format(
							"updateDataHolder(%s, n, -, wordpos, 1)", lead));
					sign += myDataHolder.updateDataHolder(lead, "n", "-",
							"wordpos", 1);
				}
			}
		}

		myLogger.trace("Return: " + sign);
		return 0;
	}

	/**
	 * for the remaining of sentences that do not have a tag yet, look for lead
	 * word co-ocurrance, use the most freq. co-occured phrases as tags e.g.
	 * plication induplicate (n times) and plication reduplicate (m times) =>
	 * plication is the tag and a noun e.g. stigmatic scar basal (n times) and
	 * stigmatic scar apical (m times) => stigmatic scar is the tag and scar is
	 * a noun. what about externally like A; externally like B, functionally
	 * staminate florets, functionally staminate xyz?
	 * 
	 * @return
	 */
	public int wrapupMarkup() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger
				.getLogger("learn.additionalBootStrapping.wrapupMarkup");
		myLogger.trace("Enter");

		int sign = 0;
		Set<Integer> checkedIDs = new HashSet<Integer>();
		List<SentenceStructure> sentenceList = new LinkedList<SentenceStructure>();

		for (int id1 = 0; id1 < this.myDataHolder.getSentenceHolder().size(); id1++) {
			SentenceStructure sentence = this.myDataHolder.getSentenceHolder()
					.get(id1);
			String tag = sentence.getTag();
			String lead = sentence.getLead();

			if ((tag == null)
					&& (StringUtility.createMatcher(lead, ".* .*").find())) {
				sentenceList.add(sentence);
			}
		}

		SentenceLeadLengthComparator myComparator = new SentenceLeadLengthComparator(
				false);
		Collections.sort(sentenceList, myComparator);

		Iterator<SentenceStructure> iter1 = sentenceList.iterator();
		while (iter1.hasNext()) {
			SentenceStructure sentence = iter1.next();
			int ID1 = sentence.getID();
			String lead = sentence.getLead();
			// if this sentence has been checked, pass
			if (checkedIDs.contains(ID1)) {
				continue;
			}

			List<String> words = new ArrayList<String>();
			words.addAll(Arrays.asList(lead.split("\\s+")));

			List<String> sharedHead = new ArrayList<String>();
			sharedHead.addAll(words.subList(0, words.size() - 1));
			String match = StringUtility.joinList(" ", sharedHead);

			Set<SentenceStructure> sentenceSet = new HashSet<SentenceStructure>();
			for (int index = 0; index < this.myDataHolder.getSentenceHolder()
					.size(); index++) {
				SentenceStructure thisSentence = this.myDataHolder
						.getSentenceHolder().get(index);
				String thisLead = thisSentence.getLead();
				String tag = thisSentence.getTag();
				String pTemp = "^" + match + " [\\S]+$";
				myLogger.trace(thisLead);
				myLogger.trace(pTemp);

				// if ((tag==null) && StringUtility.isMatchedNullSafe(pTemp,
				// thisLead)) {
				if ((tag == null)
						&& StringUtility.isMatchedNullSafe(thisLead, pTemp)) {
					if (!StringUtils.equals(thisLead, lead)) {
						sentenceSet.add(thisSentence);
					}
				}
			}

			if (sentenceSet.size() > 1) {
				String ptn = this.myLearnerUtility.getPOSptn(this.myDataHolder, sharedHead);
				String wnPOS = this.myLearnerUtility.getWordFormUtility()
						.checkWN(sharedHead.get(sharedHead.size() - 1), "pos");

				myLogger.trace("ptn: " + ptn);
				myLogger.trace("wnPOS: " + wnPOS);

				if ((StringUtility.createMatcher(ptn, "[nsp]$").find())
						|| ((StringUtility.createMatcher(ptn, "\\?$").find()) && (StringUtility
								.createMatcher(wnPOS, "n").find()))) {

					Iterator<SentenceStructure> iter2 = sentenceSet.iterator();
					while (iter2.hasNext()) {
						SentenceStructure thisSentence = iter2.next();
						int ID = thisSentence.getID();
						String thisLead = thisSentence.getLead();

						List<String> words2 = new ArrayList<String>();
						words2.addAll(Arrays.asList(thisLead.split("\\s+")));

						// case 1
						boolean case1 = false;
						boolean case2 = false;
						case1 = words2.size() > sharedHead.size();
						if (case1) {
							List<String> checkWord = new ArrayList<String>();
							checkWord.add(words2.get(sharedHead.size()));
							case2 = StringUtility.createMatcher(
									this.myLearnerUtility.getPOSptn(this.myDataHolder, checkWord), "[psn]").find();
						}

						if (case1 && case2) {
							myLogger.trace("Case 1");
							String nb = words2.size() >= sharedHead.size() + 2 ? words2
									.get(sharedHead.size() + 1) : "";
							words2 = StringUtility.stringArraySplice(words2, 0,
									sharedHead.size() + 1);
							String nmatch = StringUtility.joinList(" ", words2);

							this.myLearnerUtility.tagSentence(this.myDataHolder, this.myConfiguration.getMaxTagLength(),ID, nmatch);
							myLogger.trace(String.format("tag (%d, %s)", ID,
									nmatch));
							this.myLearnerUtility.tagSentence(this.myDataHolder, this.myConfiguration.getMaxTagLength(),ID1, match);
							myLogger.trace(String.format("tag (%d, %s)", ID1,
									match));

							String updatedWord = words2.get(words2.size() - 1);
							int update1 = this.myDataHolder.updateDataHolder(
									updatedWord, "n", "-", "wordpos", 1);
							sign += update1;
							myLogger.trace(String.format("update (%s)",
									updatedWord));

							if (!StringUtils.equals(nb, "")) {
								int update2 = this.myDataHolder
										.updateDataHolder(nb, "b", "",
												"wordpos", 1);
								sign += update2;
								myLogger.trace(String.format("update (%s)", nb));
							}

							updatedWord = words.get(words.size() - 1);
							int update3 = this.myDataHolder.updateDataHolder(
									words.get(words.size() - 1), "b", "",
									"wordpos", 1);
							sign += update3;
							myLogger.trace(String.format("update (%s)",
									updatedWord));
						}
						// case 2
						else {
							myLogger.trace("Case 2");
							String b = words2.size() >= sharedHead.size() + 1 ? words2
									.get(sharedHead.size()) : "";

							this.myLearnerUtility.tagSentence(this.myDataHolder, this.myConfiguration.getMaxTagLength(),ID, match);
							this.myLearnerUtility.tagSentence(this.myDataHolder, this.myConfiguration.getMaxTagLength(),ID1, match);

							// if (sharedHead.get(sharedHead.size() -
							// 1).equals("tissue")) {
							// System.out.println();
							// }

							int update1 = this.myDataHolder.updateDataHolder(
									sharedHead.get(sharedHead.size() - 1), "n",
									"-", "wordpos", 1);
							sign += update1;
							if (!StringUtils.equals(b, "")) {
								int update2 = this.myDataHolder
										.updateDataHolder(b, "b", "",
												"wordpos", 1);
								sign += update2;
							}
							int update3 = this.myDataHolder.updateDataHolder(
									words.get(words.size() - 1), "b", "",
									"wordpos", 1);
							sign += update3;

						}
						checkedIDs.add(ID);
					}
				} else {
					Iterator<SentenceStructure> iter2 = sentenceSet.iterator();
					while (iter2.hasNext()) {
						SentenceStructure thisSentence = iter2.next();
						int ID = thisSentence.getID();
						checkedIDs.add(ID);
					}
				}
			} else {
				checkedIDs.add(ID1);
			}
		}

		myLogger.trace("Return " + sign);
		return sign;
	}

	/**
	 * check if the lead has the head in the beginning of it
	 * 
	 * @param head
	 * @param lead
	 * @return true if it has, false if it does not have
	 */
	public boolean hasHead(List<String> head, List<String> lead) {

		// null case
		if (head == null || lead == null) {
			return false;
		}

		int headSize = head.size();
		int leadSize = lead.size();
		if (headSize > leadSize) {
			return false;
		}

		for (int i = 0; i < headSize; i++) {
			if (!StringUtils.equals(head.get(i), lead.get(i))) {
				return false;
			}
		}

		return true;
	}



	public void unknownWordBootstrapping() {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.unknownWordBootstrapping");
		myLogger.trace("[unknownWordBootstrapping]Start");

		unknownWordBootstrappingPreprocessing();
		unknownWordBootstrappingMain();
		unknownWordBootstrappingPostprocessing();

		myLogger.trace("[unknownWordBootstrapping]End");
	}

	public void unknownWordBootstrappingPreprocessing() {
		this.myLearnerUtility.tagAllSentences(this.myDataHolder, "singletag",
				"sentence");
	}

	public void unknownWordBootstrappingMain() {
		String plMiddle = "(ee)";

		int newInt = 0;
		do {
			// this.unknownWordBootstrappingGetUnknownWord(plMiddle);
		} while (newInt > 0);
	}

	public void unknownWordBootstrappingPostprocessing() {
		// pistillate_zone
		// get all nouns from wordPOS holder
		Set<String> POSTags = new HashSet<String>();
		POSTags.add("p");
		POSTags.add("s");
		Set<String> nouns = this.getDataHolder().getWordsFromWordPOSByPOSs(
				POSTags);

		// get boudaries
		Set<String> boundaries = new HashSet<String>();
		Set<String> words = this.getDataHolder().getWordsFromUnknownWord(
				"^.*_.*$", true, "^unknown$", true);
		Iterator<String> wordIter = words.iterator();
		String pattern = "_(" + StringUtils.join(nouns, "|") + ")$";
		while (wordIter.hasNext()) {
			String word = wordIter.next();
			Pattern p1 = Pattern.compile("^[a-zA-Z0-9_-]+$");
			Matcher m1 = p1.matcher(word);
			Pattern p2 = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
			Matcher m2 = p2.matcher(word);
			if (m1.matches() && (!m2.matches())) {
				if (!StringUtility.createMatcher(word,
						"\\b(" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b").find()) {
					boundaries.add(word);
				}
				this.getDataHolder().updateDataHolder(word, "b", "", "wordpos",
						1);
			}
		}

		// if the boundaries is not empty
		if (boundaries.size() > 0) {
			Iterator<SentenceStructure> iter = this.getDataHolder()
					.getSentenceHolderIterator();
			while (iter.hasNext()) {
				SentenceStructure sentenceItem = iter.next();
				String tag = sentenceItem.getTag();
				String sentence = sentenceItem.getSentence();
				int sentenceID = sentenceItem.getID();

				if ((!(StringUtils.equals(tag, "ignore")) || (tag == null))
						&& (StringUtility.createMatcher(sentence, "(^| )("
								+ StringUtils.join(boundaries, "|") + ") ")
								.find())) {
					KnownTagCollection tags = new KnownTagCollection(null,
							null, null, boundaries, null, null);
					sentence = this.myLearnerUtility.annotateSentence(sentence,
							tags, this.myDataHolder.getBMSWords());
					SentenceStructure updatedSentence = this.getDataHolder()
							.getSentence(sentenceID);
					updatedSentence.setSentence(sentence);
				}
			}
		}
	}


	public void adjectiveSubjectBootstrapping(DataHolder dataholderHandler) {
		int flag = 0;
		int count = 0;
		
		do {
			// tag all sentences
			this.myLearnerUtility.tagAllSentences(dataholderHandler, "singletag", "sentence");
			
			// adjective subject markup: may discover new modifier, new boundary, and new nouns
			int res1 = this.adjectiveSubjects(dataholderHandler);
			flag += res1;
			
			// work on tag='andor' clauses, move to the main bootstrapping
			int res2 = discoverNewModifiers(dataholderHandler);
			flag += res2;
			
			int res3 = this.handleAndOr(dataholderHandler);
			flag += res3;			
			dataholderHandler.untagSentences();
			
			int res4 = this.myLearnerUtility.doItMarkup(this.myDataHolder, this.myConfiguration.getMaxTagLength());
			
		} while (flag > 0);
		
		// reset unsolvable andor to NULL
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			String tag = sentenceItem.getTag();
			if (StringUtils.equals(tag, "andor")) {
				sentenceItem.setTag(null);
			}
		}
		
		// cases releazed from andor[m&mn] may be marked by adjectivesubjects
		this.myLearnerUtility.tagAllSentences(dataholderHandler, "singletag", "sentence");
		this.adjectiveSubjects(dataholderHandler);
	}
	
	/**
	 * works on annotated sentences that starts with a M in all non-ignored
	 * sentences, find sentences that starts with a modifer <m> followed by a
	 * boundary word <b>. (note, if the <B> is a punct mark, this sentence
	 * should be tagged as ditto) Use the context to find the tag, use the
	 * modifier as the modifie (markup process, no new discovery). for
	 * "modifier unknown" pattern, check WNPOS of the "unknown" to decide if
	 * "unknown" is a structure name (if it is a pl) or a boundary word (may
	 * have new discoveries). Works on sentences, not leads
	 * 
	 * @param dataholderHandler
	 * @return # of updates
	 */
	public int adjectiveSubjects(DataHolder dataholderHandler) {
		Set<String> typeModifiers = new HashSet<String>();
		
		// Part 1: collect evidence for the usage of "modifier boundry":
		typeModifiers = adjectiveSubjectsPart1(dataholderHandler, typeModifiers);
		
		for (String typeModifier : typeModifiers) {
			if (dataholderHandler.getModifierHolder().containsKey(typeModifier)) {
				dataholderHandler.getModifierHolder().get(typeModifier)
						.setIsTypeModifier(true);
			}
		}
		
		// Part 2: process "typemodifier unknown" patterns
		int flag = adjectiveSubjectsPart2(dataholderHandler, typeModifiers);
		
		return flag;		
	}
	
	public Set<String> adjectiveSubjectsPart1(DataHolder dataholderHandler, Set<String> typeModifiers) {
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			String sentenceCopy = ""+sentenceItem.getSentence();
			String tag = sentenceItem.getTag();
			
			if (!StringUtils.equals(tag, "ignore") || tag == null) {
				Pattern p = Pattern.compile(".*?<M>(\\S+)</M> <B>[^,.]+</B> (.*)");
				Matcher m = p.matcher(sentenceCopy);
				while (m.find()) {
					sentenceCopy = m.group(2);
					String temp = m.group(1);
					temp = temp.replaceAll("<\\S+?>", "");
					if (!typeModifiers.contains(temp)) {
						typeModifiers.add(temp);
					}
				}
			}
					
		}
		
		return typeModifiers;

	}
	
	public int adjectiveSubjectsPart2(DataHolder dataholderHandler,
			Set<String> typeModifiers) {
		String pos = null;
		int flag = 0;
		
		
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();
			String tag = sentenceItem.getTag();
			String pattern = "<M>\\S*(" + StringUtils.join(typeModifiers, "|")
					+ ")\\S*</M> .*";
			int count = 0;
			
			if (((tag == null) || StringUtils.equals(tag, "") || StringUtils
					.equals(tag, "unknown"))
					&& adjectiveSubjectsPart2Helper1(sentence, typeModifiers)) {
				
				
				if (sentence != null) {
					String sentenceCopy = sentence + "";
					String regex = "(.*?)((?:(\\S+)\\s*(?:and|or|nor|and / or|or / and)\\s*)*(?:<M>\\S+</M>\\s*)+) (\\S+)\\s*(.*)";
					Pattern p = Pattern.compile(regex);
					Matcher m = p.matcher(sentenceCopy);
					while (m.find()) {
						int knownPOS = 0;
						String start = m.group(1);
						String modifier = m.group(2);
						String newModifier = m.group(3);
						String word = m.group(4);
						sentenceCopy = m.group(5);

						// case 1
						if (!this.myLearnerUtility.getConstant().forbiddenWords
								.contains(word)) {
							count++;
							continue;
						}

						// case 2
						if (StringUtility.isMatchedNullSafe(
								newModifier.toUpperCase(), "<N>")
								|| StringUtility.isMatchedNullSafe(
										start.toUpperCase(), "<N>")) {
							count++;
						continue;
						}
						
						// case 3
						boolean c3 = this.myLearnerUtility.getConstant().prepositionWords.contains(word);
						if (count == 0 
								&& ((StringUtility.isMatchedNullSafe(word, "[;,]") || c3) 
										|| (StringUtility.isMatchedNullSafe(word, "[.;,]") 
												&& !StringUtility.isMatchedNullSafe(sentence, "\\w")))) {
							// case 3.1
							// start with a <[BM]>, followed by a <[BM]>
							if ((StringUtility.isMatchedNullSafe(word,
									"\\b(with|without|of)\\b"))
									&& ((StringUtility.isMatchedNullSafe(modifier,
													"^(<M>)?<B>(<M>)?\\w+(</M)?</B>(</M>)? (?:and|or|nor|and / or|or / and)?\\s*(<[BM]>)+\\w+(</[BM]>)+\\s*$")) 
									|| (StringUtility.isMatchedNullSafe(modifier, "^(<[BM]>)+\\w+(</[BM]>)+$")))) { 
								dataholderHandler.tagSentenceWithMT(sentenceID,
										sentenceCopy, "", "ditto",
										"adjectivesubject[ditto]");
								count++;
								continue;
							} 
							// case 3.2
							// modifier={<M>outer</M> <M><B>pistillate</B></M>} word= <B>,</B> sentence= <N>corollas</N>....
							// make the last modifier b
							else {
								if (modifier != null) {
									Pattern p2 = Pattern
											.compile("^(.*) (\\S+)$");
									Matcher m2 = p2.matcher(modifier);
									if (m2.find()) {
										modifier = m2.group(1);
										String b = m2.group(2);
										String bCopy = "" + b;
										b = b.replaceAll("<\\S+?>", "");
										dataholderHandler.updateDataHolder(b,"b", "", "wordpos", 1);
										tag = dataholderHandler.getParentSentenceTag(sentenceID);
										List<String> modifierAndTag = 
												dataholderHandler.getMTFromParentTag(tag);
										String modifier2 = modifierAndTag.get(0);
										tag = modifierAndTag.get(1);
										modifier = modifier.replaceAll(
												"<\\S+?>", "");
										if (StringUtility.isMatchedNullSafe(modifier2, "\\w")) {
											modifier = modifier + " " + modifier2;
										}
										dataholderHandler.tagSentenceWithMT(
												sentenceID, sentence, modifier,
												tag, "adjectivesubject[M-B,]");
										count++;
										continue;
									}
								}
							}
						}
						
						// case 4
						// get new modifier from modifiers like
						// "mid and/or <m>distal</m>"
						if (!StringUtility.isMatchedNullSafe(newModifier,"<")
								&& StringUtility.isMatchedNullSafe(newModifier, "\\w")
								&& StringUtility.isMatchedNullSafe(start,",(?:</B>)?\\s*$")) {

						
							flag += dataholderHandler.updateDataHolder(newModifier, "m", "", "modifiers", 1);
//							print "find a modifier [E0]: $newm\n" if $debug;
						}
						
						// case 5
						// pos = "N"/"B"
						if (word != null) {
							Pattern p5 = Pattern.compile("([A-Z])>(<([A-Z])>)?(.*?)<");
							Matcher m5 = p5.matcher(word);
							if (m5.find()) {
								String g1 = m5.group(1);
								String g2 = m5.group(2);
								String g3 = m5.group(3);
								String g4 = m5.group(4);
								
								String t1 = g1;
								String t2 = g3;
								
								word = g4;
								pos = t1 + t2;
								
								// if <N><B>, decide on one tag
								if (pos.length() > 1) {
									if (StringUtility.isMatchedNullSafe(sentence, "^\\s*<B>[,;:]<\\/B>\\s*<N>")
											||StringUtility.isMatchedNullSafe(sentence, "^\\s*<B>\\.<\\/B>\\s*$")){
										pos = "B";
									}
									else {
										pos = "N";
									}
								}
								knownPOS = 1;
							}
							else {
								List<POSInfo> POSs = dataholderHandler.checkPOSInfo(word);
								pos = POSs.get(0).getPOS();
							}
						}
						
						pos = StringUtils.equals(pos, "?") ? this.myLearnerUtility.getWordFormUtility().getNumber(word) : pos;
						
						// part 6
						// markup sentid, update pos for word, new modifier
						if (StringUtils.equals(pos, "p") || StringUtils.equals(pos, "N")) {
							if (knownPOS != 0) {
								flag += dataholderHandler.updateDataHolder(word, "p", "-", "wordpos", 1);
//								/print "update [$word] pos: p\n" if (!$knownpos) && $debug;
							}
							
							if (count == 0 
									&& (StringUtility.isMatchedNullSafe(start, "^\\S+\\s?(?:and |or |and \\/ or |or \\/ and )?$")
											||start.length() == 0)) {
								modifier = start + modifier;
								modifier = modifier.replaceAll("<\\S+?>", "");
								word = word.replaceAll("<\\S+?>", "");
								dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "adjectivesubject[M-N]");
								// new modifier
								start = start.replaceAll("\\s*(and |or |and \\/ or |or \\/ and )\\s*", "");
								start = start.replaceAll("<\\S+?>", "");
								
								while (StringUtility.isMatchedNullSafe(start, "^("+this.myLearnerUtility.getConstant().STOP+")\\b")) {
									start = start.replaceAll("^("+this.myLearnerUtility.getConstant().STOP+")\\b\\s*", "");
								}
								
								if (start.length() > 0) {
									flag += dataholderHandler.updateDataHolder(start, "m", "", "modifiers", 1);
									//print "find a modifier [E]: $start\n" if $debug;
											
								}
							}
						}
						// not p
						else {
							if (knownPOS != 0) {
								// update pos for word, markup sentid (get tag
								// from context), new modifier
								flag += dataholderHandler.updateDataHolder(word, "b", "", "wordpos", 1);
								// print "update [$word] pos: b\n" if $debug;
							}
							
							if (count == 0 
									&& (StringUtility.isMatchedNullSafe(start, "^\\S+\\s?(?:and |or |and \\/ or |or \\/ and )?$")
											||start.length() == 0)) {
								while (StringUtility.isMatchedNullSafe(start, "^("+this.myLearnerUtility.getConstant().STOP+"|"+this.myLearnerUtility.getConstant().FORBIDDEN+"|\\w+ly)\\b")) {
									start = start.replaceAll("^("+this.myLearnerUtility.getConstant().STOP+"|"+this.myLearnerUtility.getConstant().FORBIDDEN+"|\\w+ly)\\b\\s*", "");									
								}
								
								modifier = start + modifier;
								modifier = modifier.replaceAll("<\\S+?>", "");
								tag = dataholderHandler.getParentSentenceTag(sentenceID);
								List<String> modifierAndTag = dataholderHandler.getMTFromParentTag(tag);
								String newM = modifierAndTag.get(0);
								tag = modifierAndTag.get(1);
								if (StringUtility.isMatchedNullSafe(newM, "\\w")) {
									modifier = modifier + " " + newM;
								}
								dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "adjectivesubject[M-B]");
								// new modifier
								start = start.replaceAll("\\s*(and |or |and \\/ or |or \\/ and )\\s*", "");
								start = start.replaceAll("<\\S+?>", "");
								if (start.length() > 0) {
									if (!StringUtility.isMatchedNullSafe(start, "ly\\s*$") 
											&& !StringUtility.isMatchedNullSafe(start, "\\b(" + this.myLearnerUtility.getConstant().STOP + "|" + this.myLearnerUtility.getConstant().FORBIDDEN + ")\\b")) {
										flag += dataholderHandler.updateDataHolder(word, "m", "", "modifiers", 1);
										// print "find a modifier [F]: $start\n" if $debug;
									}
								}	
							}
						}

						count++;
					}
				}
			}
		}
		
		return flag;
	}
	
	public boolean adjectiveSubjectsPart2Helper1(String sentence,
			Set<String> typeModifiers) {
		String pattern = "<M>\\S*(" + StringUtils.join(typeModifiers, "|")
				+ ")\\S*</M> .*";
		return StringUtility.isMatchedNullSafe(sentence, pattern);
	}
	
	/**
	 * Discover new modifiers using and/or pattern. 
	 * For "modifier and/or unknown boundary" pattern or
	 * "unknown and/or modifier boundary" pattern, make "unknown" a modifier
	 * 
	 * @param dataholderHandler
	 * @return
	 */
	public int discoverNewModifiers(DataHolder dataholderHandler) {
		int sign = 0;
		
		// "modifier and/or unknown boundary" pattern
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			String sentenceTag = sentenceItem.getTag();
			String sentence = sentenceItem.getSentence();	
			int sentenceID = sentenceItem.getID();
			if ((!StringUtility.isMatchedNullSafe(sentenceTag, "ignore") || sentenceTag == null) 
				&& StringUtility.isMatchedNullSafe(sentence, "<M>[^\\s]+</M> (or|and|and / or|or / and) .*")){
				String POS = "";
				// if "<m>xxx</m> (and|or) yyy (<b>|\d)" pattern appears at the
				// beginning or is right after the 1st word of the sentence,
				// mark up the sentence, add yyy as a modifier
				if (sentence != null) {
					Pattern p1 = Pattern.compile("^(?:\\w+\\s)?<M>(\\S+)<\\/M> (and|or|nor|and \\/ or|or \\/ and) ((?:<[^M]>)*[^<]+(?:<\\/[^M]>)*) <B>[^,;:\\.]");
					Matcher m1 = p1.matcher(sentence);
					if (m1.find()) {
						String g1 = m1.group(1);
						String g2 = m1.group(2);
						String g3 = m1.group(3);
						String modifier = g1 +" "+ g2+" "+ g3;
						String newM = g3;
						
						if (!StringUtility.isMatchedNullSafe(newM, "\\b("+this.myLearnerUtility.getConstant().STOP+")\\b")) {
							modifier = modifier.replaceAll("<\\S+?>", "");
							if (newM != null) {
								Pattern p11 = Pattern.compile("(.*?>)(\\w+)<\\/");
								Matcher m11 = p11.matcher(newM);
								if (m11.find()) {
									newM = m11.group(2);
									POS = m11.group(1);
								}
							}
							
							// update N to M: retag sentences tagged as $newm, remove [s] record from wordpos
							if (StringUtility.isMatchedNullSafe(POS, "<N>")) {
								sign += dataholderHandler.changePOS(newM, "s", "m", "", 1);
							}
							// B
							else {
								sign += dataholderHandler.updateDataHolder(newM, "m", "", "modifiers", 1);
							}
							// print "find a modifier [A]: $newm\n" if $debug;
							String tag = dataholderHandler.getParentSentenceTag(sentenceID);
							List<String> modifierAndTag = dataholderHandler.getMTFromParentTag(tag);
							String m = modifierAndTag.get(0);
							tag = modifierAndTag.get(1);
							if (StringUtility.isMatchedNullSafe(m, "\\w")) {
								modifier = modifier + " "+m;
							}
							dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "discovernewmodifiers");							
						}
					}
					// if the pattern appear in the middle of the sentence, add yyy as modifier
					else {
						Pattern p2 = Pattern.compile("<M>(\\S+)<\\/M> (and|or|nor|and \\/ or|or \\/ and) (\\w+) <B>[^,;:\\.]");
						Matcher m2 = p2.matcher(sentence); 
						if (m2.find()) {
							String newM = m2.group(3);
							sign += dataholderHandler.updateDataHolder(newM, "m", "", "modifiers", 1);
							// print "find a modifier[B]: $newm\n" if $debug;
						}
						
					}
				}
			}
		}
		
		// "unknown and/or modifier boundary"
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			String sentence = sentenceItem.getSentence();
			String sentenceTag = sentenceItem.getTag();
			if ((!StringUtility.isMatchedNullSafe(sentenceTag, "ignore") || sentenceTag == null) 
					&& StringUtility.isMatchedNullSafe(sentence, "[^\\w]+ (and|or|nor|and / or|or / and) <M>[^\\w]+</M> .*")) {
				int sentenceID = sentenceItem.getID();
				
				String POS = "";
				// if "xxx (and|or|nor) <m>yyy</m> (<b>|\d)" pattern appear at the beginning or is right after the 1st word of the sentence, mark up the sentence, add yyy as a modifier
				if (sentence != null) {
					Pattern p3 = Pattern.compile("^(?:\\w+\\s)?((?:<[^M]>)*[^<]+(?:<\\/[^M]>)*) (and|or|nor|and \\/ or|or \\/ and) <M>(\\S+)<\\/M> <B>[^:;,\\.]");
					Matcher m3 = p3.matcher(sentence);
					if (m3.find()) {
						String g1 = m3.group(1);
						String g2 = m3.group(2);
						String g3 = m3.group(3);
						
						String modifier = g1 + " " + g2 + " " + g3; 
						String newM = g1;
						modifier = modifier.replaceAll("<\\S+?>", "");
						if (newM != null) {
							Pattern p31 = Pattern.compile("(.*?>)(\\w+)<\\/");							
							Matcher m31 = p31.matcher(newM);
							if (m31.find()) { // N or B
								newM = m31.group(2);
								POS = m31.group(1);
							}
						}
						
						if (StringUtility.isMatchedNullSafe(POS, "<N>")) { // update N to M
							sign += dataholderHandler.changePOS(newM, "s", "m", "", 1); // update $newm to m
						}
						else { // B
							sign += dataholderHandler.updateDataHolder(newM, "m", "", "modifiers", 1);
						}
						// print "find a modifier [C]: $newm\n" if $debug;
						String tag = dataholderHandler.getParentSentenceTag(sentenceID);
						List<String> modifierAndTag = dataholderHandler.getMTFromParentTag(tag);
						String m = modifierAndTag.get(0);
						tag = modifierAndTag.get(1);
						
						if (StringUtility.isMatchedNullSafe(m, "\\w")) {
							modifier = modifier +" "+m;
						}
						
						dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "discovernewmodifiers");
					}
					else {
						Pattern p32 = Pattern.compile("(\\w+) (and|or|nor|and \\/ or|or \\/ and) <M>(\\S+)<\\/M> <B>[^,:;\\.]");
						Matcher m32 = p32.matcher(sentence);
						// if the pattern appear in the middle of the sentence, add yyy as modifier
						if (m32.find()) {
							String newM = m32.group(1);
							sign += dataholderHandler.updateDataHolder(newM, "m", "", "modifiers", 1);
						}
						//print "find a modifier [D]: $newm\n" if $debug;
					}
				}
			}
		}
		
		return sign;
	}

	public int handleAndOr(DataHolder dataholderHandler) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.handleAndOr");

		myLogger.info("to match pattern " + Constant.ANDORPTN);

		List<SentenceStructure> sentenceItems = dataholderHandler
				.getSentencesByTagPattern("^andor$");

		int sign = 0;
		for (SentenceStructure sentenceItem : sentenceItems) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();
			// myLogger.trace(Constant.SEGANDORPTN);
			// myLogger.trace(Constant.ANDORPTN);
			int result = this.andOrTag(dataholderHandler, sentenceID, sentence,
					Constant.SEGANDORPTN, Constant.ANDORPTN);
			sign = sign + result;
		}
		
		return sign;
	}

	public int andOrTag(DataHolder dataholderHandler, int sentenceID,
			String sentence, String sPattern, String wPattern) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.andOrTag");
		myLogger.trace("Enter");

		int sign = 0;

		List<String> mPatterns = new ArrayList<String>();
		List<String> sPatterns = new ArrayList<String>();
		List<String> mSegments = new ArrayList<String>();
		List<String> sSegments = new ArrayList<String>();

		Set<String> token = new HashSet<String>();
		token.addAll(Arrays.asList("and or nor".split(" ")));
		token.add("\\");
		token.add("and / or");
		String strToken = "(" + StringUtils.join(token, " ") + ")";

		int limit = 80;
		List<String> words = new ArrayList<String>();
		words.addAll(Arrays.asList(sentence.split(" ")));
		String pattern = this.getLearnerUtility().getSentencePtn(
				dataholderHandler, token, limit, words);
		pattern = pattern.replaceAll("t", "m");

		myLogger.info(String.format("Andor pattern %s for %s", pattern,
				words.toString()));

		if (pattern == null) {
			return -1;
		}

		// Matcher m1 = StringUtility.createMatcher(pattern, wPattern);
		Matcher m2 = StringUtility.createMatcher(pattern, "^b+&b+[,:;.]");

		if (sentenceID == 163) {
			System.out.println();
		}

		List<List<String>> res = this.andOrTagCase1Helper(pattern, wPattern, words, token);
		if (res != null) {
			mPatterns = res.get(0);
			mSegments = res.get(1);
			sPatterns = res.get(2);
			sSegments = res.get(3);
			List<String> tagAndModifier1 = res.get(4);
			List<String> tagAndModifier2 = res.get(5);
			List<String> update1 = res.get(6);
			List<String> update2 = res.get(7);

			if (tagAndModifier1.size() > 0) {
				String modifier = tagAndModifier1.get(0);
				String tag = tagAndModifier1.get(1);
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "",
						tag, "andor[n&n]");
				myLogger.trace("tagSentenceWithMT(" + sentenceID + ", "
						+ sentence + ", , " + tag + ", andor[n&n]");
			} else {
				myLogger.debug(String.format(
						"Andor can not determine a tag or modifier for %d: %s",
						sentenceID, sentence));
			}

			if (tagAndModifier2.size() > 0) {
				String modifier = tagAndModifier2.get(0);
				String tag = tagAndModifier2.get(1);
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag, "andor[m&mn]");
				myLogger.trace("tagSentenceWithMT(" + sentenceID + ", "
						+ sentence + ", " + modifier + ", " + tag
						+ ", andor[m&mn]");
			} else {
				myLogger.debug(String.format(
						"Andor can not determine a tag or modifier for %d: %s",
						sentenceID, sentence));
			}

			if (update1.size() > 0) {
				String newBoundaryWord = update1.get(0);
				sign = sign
						+ dataholderHandler.updateDataHolder(newBoundaryWord,
								"b", "", "wordpos", 1);
			}

			if (update2.size() > 0) {
				for (String tempWord : update2) {
					sign = sign
							+ dataholderHandler.updateDataHolder(tempWord, "p",
									"-", "wordpos", 1);
				}
			}
		}

		else if (m2.find()) {
			myLogger.trace("Case 2");
			dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "",
					"ditto", "andor");
		} else {
			myLogger.trace("Case 3");
			myLogger.trace("[andortag]Andor can not determine a tag or modifier for "
					+ sentenceID + ": " + sentence);
		}
		myLogger.trace("Return " + sign + "\n");
		return sign;
	}

	public List<List<String>> andOrTagCase1Helper(String pattern,
			String wPattern, List<String> words, Set<String> token) {
		PropertyConfigurator.configure("conf/log4j.properties");
		Logger myLogger = Logger.getLogger("learn.andOrTag");

		List<String> mPatterns = new ArrayList<String>();
		List<String> sPatterns = new ArrayList<String>();
		List<String> mSegments = new ArrayList<String>();
		List<String> sSegments = new ArrayList<String>();

		List<String> update1 = new ArrayList<String>();
		List<String> update2 = new ArrayList<String>();

		List<String> tagAndModifier1 = new ArrayList<String>();
		List<String> tagAndModifier2 = new ArrayList<String>();

		String strToken = "(" + StringUtils.join(token, " ") + ")";

		Matcher m1 = StringUtility.createMatcher(pattern, wPattern);

		if (m1.find()) {
			myLogger.trace("Case 1");
			if (pattern.equals("n&qqnbq")) {
				// System.out.println();
			}

			int start1 = m1.start(1);
			int end1 = m1.end(1);

			int start2 = m1.start(2);
			int end2 = m1.end(2);

			int start3 = m1.start(3);
			int end3 = m1.end(3);

			int start4 = m1.start(4);
			int end4 = m1.end(4);

			int start5 = m1.start(5);
			int end5 = m1.end(5);


			// System.out.println(pattern);
			// System.out.println(start1);
			// System.out.println();
			String earlyGroupsPattern = start1 == -1 ? "" : pattern.substring(
					0, start1);
			String[] patterns = earlyGroupsPattern.split("s*<B>,<\\/B>\\s*");
			String earlyGroupsWords = start1 == -1 ? "" : StringUtils.join(
					words.subList(0, start1), " ");
			String[] segments = earlyGroupsWords.split("\\s*<B>,<\\/B>s*");

			String secondLastModifierPattern = m1.group(1);
			String secondLastModifierWords = secondLastModifierPattern == null ? ""
					: StringUtils.join(words.subList(start1, end1), " ");

			String sencondLastStructurePattern = m1.group(2);
			String secondLastStructureWords = sencondLastStructurePattern == null ? ""
					: StringUtils.join(words.subList(start2, end2), " ");

			String lastModifierPattern = m1.group(3);
			String lastModifierWords = lastModifierPattern == null ? ""
					: StringUtils.join(words.subList(start3, end3), " ");

			String lastStructurePattern = m1.group(4);
			String lastStructureWords = lastStructurePattern == null ? ""
					: StringUtils.join(words.subList(start4, end4), " ");

			String endSegmentPattern = m1.group(5);
			String endSegmentWords = endSegmentPattern == null ? ""
					: StringUtils.join(words.subList(start5, end5), " ");

			int bIndex = start5;

			// matching pattern with original text
			if (!(patterns.length == 1 && StringUtils.equals(patterns[0], ""))) {
				for (int i = 0; i < patterns.length; i++) {
					Pattern p = Pattern.compile("sPattern");
					Matcher m10 = p.matcher(patterns[i]);
					if (m10.find()) {
						String g1 = m10.group(1);
						mPatterns.add(g1);
						String g2 = m10.group(2);
						sPatterns.add(g2);

						List<String> w = new ArrayList<String>(
								Arrays.asList(segments[i].split(" ")));
						String m = StringUtils.join(w.subList(0, m10.end(1)),
								" ");

						if (StringUtility.isMatchedNullSafe(m,
								"\\b(although|but|when|if|where)\\b")) {
							return null;
						}

						mSegments.add(m);
						sSegments.add(StringUtils.join(
								w.subList(m10.end(1), w.size()), " "));
					} else {
						myLogger.info("wrong segment: " + patterns[i] + "=>"
								+ segments[i] + "\n");
						return null;
					}
				}
			}

			if (secondLastModifierPattern != null)
				mPatterns.add(secondLastModifierPattern);
			if (!StringUtils.equals(secondLastModifierWords, ""))
				mSegments.add(secondLastModifierWords);
			if (sencondLastStructurePattern != null)
				sPatterns.add(sencondLastStructurePattern);
			if (!StringUtils.equals(secondLastStructureWords, ""))
				sSegments.add(secondLastStructureWords);

			if (lastModifierPattern != null)
				mPatterns.add(lastModifierPattern);
			if (!StringUtils.equals(lastModifierWords, ""))
				mSegments.add(lastModifierWords);
			if (lastStructurePattern != null)
				sPatterns.add(lastStructurePattern);
			if (!StringUtils.equals(lastStructureWords, ""))
				sSegments.add(lastStructureWords);

			// find the modifier and the tag for sentenceID
			// case 1.1
			if (this.countStructures(sPatterns) > 1) {
				// compound subject involving multiple structures: mn,mn,&mn =>
				// use all but bounary as the tag, modifier="";
				String tag = StringUtils.join(words.subList(0, bIndex), " ");
				String modifier = "";
				tag = tag.replaceAll("<\\S+?>", "");
				if (tag != null) {
					String regex11 = "\\b(" + StringUtils.join(token, "|")
							+ ")\\b";
					Matcher m11 = StringUtility.createMatcher(tag, regex11);

					if (m11.find()) {
						String conj = m11.group(1);

						tag = tag.replaceAll(",", " " + conj + " ");
						tag = tag.replaceAll("\\s+", " ");
						tag = tag.replaceAll("(" + conj + " )+", "$1");
						tag = tag.replaceAll("^\\s+", "");
						tag = tag.replaceAll("\\s+$", "");

						// dataholderHandler.tagSentenceWithMT(sentenceID,
						// sentence, "", tag, "andor[n&n]");
						tagAndModifier1.add("");
						tagAndModifier1.add(tag);
					}
					// else {
					// myLogger.debug(String.format("Andor can not determine a tag or modifier for %d: %s",
					// sentenceID, sentence));
					// }
				}
				// case 1.2
				else if (this.countStructures(sPatterns) == 1) {
					// m&mn => connect all modifiers as the modifier, and the n
					// as the tag
					int i = 0;
					for (i = 0; i < sPatterns.size(); i++) {
						if (StringUtility.isMatchedNullSafe(sPatterns.get(i),
								"\\w")) {
							break;
						}
					}

					tag = sSegments.get(i);
					tag = tag.replaceAll("<\\S+?>", "");
					modifier = StringUtils.join(mSegments, " ");
					modifier = modifier.replaceAll("<\\S+?>", "");

					tag = StringUtility.trimString(tag);
					modifier = StringUtility.trimString(modifier);

					String myStop = this.myLearnerUtility.getConstant().STOP;
					myStop = myStop.replaceAll(
							String.format("\\b%s\\b", token), "");
					myStop = myStop.replaceAll("\\s+$", "");

					if (StringUtility.isMatchedNullSafe(modifier, "\\b"
							+ strToken + "\\b")
							&& StringUtility.isEntireMatchedNullSafe(modifier,
									"\\b(" + myStop + "|to)\\b")) {
						// case 1.2.1
						List<String> wordsTemp = new ArrayList<String>();
						wordsTemp.addAll(Arrays.asList(tag.split("\\s+")));
						modifier = modifier
								+ " "
								+ StringUtils.join(wordsTemp.subList(0,
										wordsTemp.size() - 1), " ");
						tag = wordsTemp.get(wordsTemp.size() - 1);
						// dataholderHandler.tagSentenceWithMT(sentenceID,
						// sentence, modifier, tag, "andor[m&mn]");
						tagAndModifier2.add(modifier);
						tagAndModifier2.add(tag);

					}
					// else {
					// myLogger.debug(String.format("Andor can not determine a tag or modifier for %d: %s",
					// sentenceID, sentence));
					// }
				}
				// case 1.3
				else {
					myLogger.debug("Andor can not determine a tag or modifier");
				}

				int q = -1;
				if (endSegmentPattern != null) {
					Matcher m13 = StringUtility.createMatcher(
							endSegmentPattern, "q");
					if (m13.find()) {
						q = m13.start();
					}
				}

				if (q >= 0) {
					String newBoundaryWord = endSegmentWords.split(" ")[q];
					if (StringUtility.isMatchedNullSafe(newBoundaryWord, "\\w")) {
						update1.add(newBoundaryWord);
						// sign = sign +
						// dataholderHandler.updateDataHolder(newBoundaryWord,
						// "b", "", "wordpos", 1);
					}
				}

				// structure patterns and segments: $nptn =
				// "((?:[np],?)*&?[np])"; #grouped #must present, no q allowed
				// mark all ps "p"
				for (int i = 0; i < sPatterns.size(); i++) {
					String sPatternI = sPatterns.get(i);
					sPatternI = sPatternI.replaceAll("(.)", "$1 ");
					sPatternI = StringUtility.trimString(sPatternI);
					String[] ps = sPatternI.split(" ");
					String[] ts = sSegments.get(i).split("\\s+");

					for (int j = 0; j < ps.length; j++) {
						if (StringUtils.equals(ps[j], "p")) {
							ts[j] = StringUtility.trimString(ts[j]);
							update2.add(ts[j]);
							// sign = sign
							// + dataholderHandler.updateDataHolder(ts[j],
							// "p", "-", "wordpos", 1);
						}
					}

				}

			}

			List<List<String>> res = new ArrayList<List<String>>();
			res.add(mPatterns);
			res.add(mSegments);
			res.add(sPatterns);
			res.add(sSegments);
			res.add(tagAndModifier1);
			res.add(tagAndModifier2);
			res.add(update1);
			res.add(update2);

			return res;
		} else {
			return null;
		}
	}

	public int countStructures(List<String> patterns) {
		int count = 0;
		for (String pattern : patterns) {
			if (StringUtility.isMatchedNullSafe(pattern, "\\w")) {
				count++;
			}
		}

		return count;
	}

	public void resetAndOrTags(DataHolder dataholderHandler) {
		dataholderHandler.updateSentenceTag("^andor$", null);
	}

	public void ditto(DataHolder dataholderHandler) {
		String nPhrasePattern = "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+";
		String mPhrasePattern = "(?:<[A-Z]*M[A-Z]*>[^<]+?<\\/[A-Z]*M[A-Z]*>\\s*)+";

		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			if (sentenceItem.getTag() == null) {
				int sentenceID = sentenceItem.getID();
				String sentence = sentenceItem.getSentence();
				this.dittoHelper(dataholderHandler, sentenceID, sentence,
						nPhrasePattern, mPhrasePattern);
			}
		}
	}

	public int dittoHelper(DataHolder dataholderHandler, int sentenceID,
			String sentence, String nPhrasePattern, String mPhrasePattern) {
		int res = 0;
		String sentenceCopy = "" + sentence;
		sentenceCopy = sentenceCopy.replaceAll("></?", "");
		String modifier = "";

		Matcher m2 = StringUtility.createMatcher(sentenceCopy, "(.*?)"
				+ nPhrasePattern);

		if (!StringUtility.isMatchedNullSafe(sentence, "<[NO]>")) {
			String tag = "ditto";
			dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "", tag,
					"ditto-no-N");
			res = 1;
		} else if (m2.find()) {
			String head = m2.group(1);
			String pattern21 = String
					.format("\\b(%s)\\b", this.myLearnerUtility.getConstant().PREPOSITION);
			if (StringUtility.isMatchedNullSafe(head, pattern21)) {
				String tag = "ditto";
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag, "ditto-proposition");
				res = 21;
			} else if (StringUtility.isMatchedNullSafe(head, ",<\\/B>\\s*$")) {
				String tag = "ditto";
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag, "ditto-,-N");
				res = 22;
			}
		}

		return res;
	}

	public void phraseClause(DataHolder dataholderHandler) {
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			if (sentenceItem.getTag() == null) {
				int sentenceID = sentenceItem.getID();
				String sentence = sentenceItem.getSentence();
				List<String> res = this.phraseClauseHelper(sentence);
				if (res != null && res.size() == 2) {
					String modifier = res.get(0);
					String tag = res.get(1);
					dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
							modifier, tag, "phraseclause");
				}
			}
		}
	}

	public List<String> phraseClauseHelper(String sentence) {
		if (sentence == null) {
			return null;
		}

		List<String> res = new ArrayList<String>(2);
		String pattern = "^(.*?)((?:<[A-Z]*M[A-Z]*>[^<]*?<\\/[A-Z]*M[A-Z]*>\\s*)*)((?:<[A-Z]*[NO]+[A-Z]*>[^<]*?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+)<B>[,:\\.;]<\\/B>\\s*$";
		String sentenceCopy = "" + sentence;
		sentenceCopy = sentenceCopy.replaceAll("></?", "");

		Matcher m = StringUtility.createMatcher(sentenceCopy, pattern);
		if (m.find()) {
			String head = m.group(1);
			String modifier = m.group(2);
			String tag = m.group(3);

			String prepositionPattern = String.format("\\b(%s)\\b",
					this.myLearnerUtility.getConstant().PREPOSITION);
			if (!StringUtility.isMatchedNullSafe(head, prepositionPattern)
					&& !StringUtility.isMatchedNullSafe(head, "<\\/N>")
					&& !StringUtility.isMatchedNullSafe(modifier,
							prepositionPattern)) {
				if (tag != null) {
					Matcher m2 = StringUtility.createMatcher(tag,
							"(.*?)<N>([^<]+)<\\/N>\\s*$");
					if (m2.find()) {
						modifier = modifier + m2.group(1);
						tag = m2.group(2);
					}
					tag = tag.replaceAll("<\\S+?>", "");
					modifier = modifier.replaceAll("<\\S+?>", "");
					tag = tag.replaceAll("(^\\s*|\\s*$)", "");
					modifier = modifier.replaceAll("(^\\s*|\\s*$)", "");
					res.add(modifier);
					res.add(tag);

					return res;
				}
			}
		}
		return res;
	}

	public void pronounCharacterSubject(DataHolder dataholderHandler) {

		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {

			int sentenceID = sentenceItem.getID();
			String lead = sentenceItem.getLead();
			String sentence = sentenceItem.getSentence();
			String modifier = sentenceItem.getModifier();
			String tag = sentenceItem.getTag();

			List<String> mt = pronounCharacterSubjectHelper(lead, sentence,
					modifier, tag);
			if (mt != null) {
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag,
						"pronouncharactersubject[character subject]");
			}
		}

		// preposition cases
		String prepositionPattern = String
				.format("^(%s)", this.myLearnerUtility.getConstant().PREPOSITION);
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			int sentenceID = sentenceItem.getID();
			String lead = sentenceItem.getLead();
			String modifier = sentenceItem.getModifier();
			String tag = sentenceItem.getTag();
			String sentence = sentenceItem.getSentence();
			boolean case1 = (StringUtils.equals(tag, "ignore"));
			boolean case2 = (tag == null);
			boolean case3 = StringUtility.isMatchedNullSafe(tag,
					prepositionPattern + " ");
			if ((case1 || case2) && case3) {
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence, "",
						"", "pronouncharactersubject[proposition subject]");
			}
		}

		// pronoun cases
		String pronounPattern = String.format("(%s)", this.myLearnerUtility.getConstant().PRONOUN);
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			int sentenceID = sentenceItem.getID();
			String lead = sentenceItem.getLead();
			String modifier = sentenceItem.getModifier();
			String tag = sentenceItem.getTag();
			String sentence = sentenceItem.getSentence();

			boolean case1 = StringUtility.isMatchedNullSafe(tag,
					String.format("(^| )%s( |\\$)", pronounPattern));
			boolean case2 = StringUtility.isMatchedNullSafe(modifier,
					String.format("(^| )%s( |\\$)", pronounPattern));
			if (case1 || case2) {
				modifier = modifier.replaceAll("\\b(" + this.myLearnerUtility.getConstant().PRONOUN
						+ ")\\b", "");
				tag = tag.replaceAll("\\b(" + this.myLearnerUtility.getConstant().PRONOUN + ")\\b", "");
				modifier = modifier.replaceAll("\\s+", " ");
				tag = tag.replaceAll("\\s+", " ");

				if (!StringUtility.isMatchedNullSafe(tag, "\\w")
						|| StringUtility.isMatchedNullSafe(tag, "ditto")) {
					tag = dataholderHandler.getParentSentenceTag(sentenceID);
				}

				modifier = modifier.replaceAll("(^\\s*|\\s*$)", "");
				tag = tag.replaceAll("(^\\s*|\\s*$)", "");

				List<String> mt = dataholderHandler.getMTFromParentTag(tag);
				String m = mt.get(0);
				tag = mt.get(1);

				if (StringUtility.isMatchedNullSafe(m, "\\w")) {
					modifier = modifier + m;
					dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
							modifier, tag,
							"pronouncharactersubject[pronoun subject]");
				}
			}
		}

		// correct to missed N
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			int sentenceID = sentenceItem.getID();
			String lead = sentenceItem.getLead();
			String modifier = sentenceItem.getModifier();
			String tag = sentenceItem.getTag();
			String sentence = sentenceItem.getSentence();

			List<String> mt = this.pronounCharacterSubjectHelper4(lead,
					sentence, modifier, tag);
			if (mt != null) {
				modifier = mt.get(0);
				tag = mt.get(1);
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag,
						"pronouncharactersubject[correct to missed N]");
			}
		}
	}

	public List<String> pronounCharacterSubjectHelper4(String lead,
			String sentence, String modifier, String tag) {
		boolean case1 = (StringUtils.equals(tag, "ignore"));
		boolean case2 = (tag == null);
		boolean case3 = !StringUtility.isMatchedNullSafe(tag, " (and|nor|or) ");
		boolean case4 = !StringUtility.isMatchedNullSafe(sentence, "\\[");
		boolean case5 = false;
		if (sentence != null) {
			Pattern p = Pattern.compile("^[^N]*<N>" + tag);
			Matcher m = p.matcher(sentence);
			if (m.find()) {
				case5 = true;
			}
		}

		if ((case1 || case2) && case3 && case4 && case5) {
			if (sentence != null) {
				sentence = sentence.replaceAll("></?", "");
				Pattern p = Pattern
						.compile("^(\\S*) ?<N>([^<]+)<\\/N> <[MB]+>(\\S+)<\\/[MB]+> \\S*\\b"
								+ tag + "\\b\\S*");
				Matcher m2 = p.matcher(sentence);
				if (m2.find()) {
					modifier = m2.group(1);
					tag = m2.group(2);
					String g3 = m2.group(3);
					if (!StringUtility.isMatchedNullSafe(g3, "\\bof\\b")) {
						modifier = modifier.replaceAll("<\\S+?>", "");
						tag = tag.replaceAll("<\\S+?>", "");
						modifier = modifier.replaceAll("(^\\s*|\\s*$)", "");
						tag = tag.replaceAll("(^\\s*|\\s*$)", "");
						List<String> mt = new ArrayList<String>();
						mt.add(modifier);
						mt.add(tag);
						return mt;
					}
				}
			}
		}
		return null;

	}

	public List<String> pronounCharacterSubjectHelper(String lead,
			String sentence, String modifier, String tag) {
		String t = "(?:<\\/?[A-Z]+>)?";

		boolean b1 = !StringUtils.equals(tag, "ignore");
		boolean b2 = (tag == null);
		boolean b3 = StringUtility.isMatchedNullSafe(lead, "(^| )("
				+ this.myLearnerUtility.getConstant().CHARACTER + ")( |$)");
		boolean b4 = StringUtility.isMatchedNullSafe(tag, "(^| )("
				+ this.myLearnerUtility.getConstant().CHARACTER + ")( |$)");
		if (((b1 || b2) && b3) || b4) {
			sentence = sentence.replaceAll("></?", "");
			if (sentence != null) {
				String pattern1 = String
						.format("^.*?%s\\b(%s)\\b%s %s(?:of)%s (.*?)(<[NO]>([^<]*?)<\\/[NO]> ?)+ ",
								t, this.myLearnerUtility.getConstant().CHARACTER, t, t, t);
				Matcher m1 = StringUtility.createMatcher(sentence, pattern1);

				String pattern2 = String
						.format("^(.*?)((?:<\\/?[BM]+>\\w+?<\\/?[BM]+>\\s*)*)%s\\b(%s)\\b%s",
								t, this.myLearnerUtility.getConstant().CHARACTER, t);
				Matcher m2 = StringUtility.createMatcher(sentence, pattern2);

				// case 1.1
				if (m1.find()) {
					tag = m1.group(4);
					modifier = sentence.substring(m1.start(2), m1.start(4));
					String s2 = m1.group(2);
					String s3 = m1.group(3);

					if ((!StringUtility.isMatchedNullSafe(s2,
							String.format("\\b(%s)\\b", this.myLearnerUtility.getConstant().PREPOSITION)))
							&& (!StringUtility.isMatchedNullSafe(s3, String
									.format("\\b(%s|\\d)\\b", this.myLearnerUtility.getConstant().STOP)))) {
						modifier = modifier.replaceAll("<\\S+?>", "");
						modifier = modifier.replaceAll("(^\\s*|\\s*$)", "");
						tag = tag.replaceAll("<\\S+?>", "");
						tag = tag.replaceAll("(^\\s*|\\s*$)", "");
					} else {
						modifier = "";
						tag = "ditto";
					}
				}

				// case 1.2
				else if (m2.find()) {
					String text = m2.group(1);

					if ((!StringUtility.isMatchedNullSafe(text, "\\b("
							+ this.myLearnerUtility.getConstant().STOP + "|\\d+)\\b"))
							&& (StringUtility.isMatchedNullSafe(text, "\\w"))
							&& (!StringUtility
									.isMatchedNullSafe(text, "[,:;.]"))) {
						text = text.replaceAll("<\\S+?>", "");
						// $text =~ s#(^\s*|\s*$)##g;
						// $text =~ s#[[:punct:]]##g;
						text = text.replaceAll("(^\\s*|\\s*$)", "");
						text = text.replaceAll("\\p{Punct}", "");

						String[] textArray = text.split("\\s+");
						// List<String> textList = new LinkedList<String>();
						// textList.addAll(Arrays.asList(textArray));
						if (textArray.length >= 1) {
							tag = textArray[textArray.length - 1];
							String pattern = "<[NO]>" + tag + "</[NO]>";
							if (StringUtility.isMatchedNullSafe(sentence,
									pattern)) {
								// 1.2.1.1
								text = text.replaceAll(tag, "");
								modifier = text;
							} else {
								// 1.2.1.2
								modifier = "";
								tag = "ditto";
							}
						}
					} else {
						// 1.2.2
						modifier = "";
						tag = "ditto";
					}
				}

				// case 1.3
				else if (StringUtility.isMatchedNullSafe(sentence, "\\b("
						+ this.myLearnerUtility.getConstant().CHARACTER + ")\\b")) {
					modifier = "";
					tag = "ditto";
				}

			}
			List<String> mt = new ArrayList<String>(2);
			mt.add(modifier);
			mt.add(tag);
			return mt;
		} else {
			return null;
		}

	}


	/**
	 * comma used for 'and': seen in TreatiseH, using comma for 'and' as in
	 * "adductor , diductor scars clearly differentiated ;", which is the same
	 * as "adductor and diductor scars clearly differentiated ;". ^m*n+,m*n+ or
	 * m*n+,m*n+;$, or m,mn. Clauses dealt in commaand do not contain "and/or".
	 * andortag() deals with clauses that do.
	 * 
	 * @param dataholderHandler
	 */
	public void commaAnd(DataHolder dataholderHandler) {
		// cover m,mn

		// last + =>*
		// "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\/[A-Z]*[NO]+[A-Z]*>\\s*)+"
		String nPhrasePattern = "(?:<[A-Z]*[NO]+[A-Z]*>[^<]+?<\\/[A-Z]*[NO]+[A-Z]*>\\s*)+";

		// add last \\s*
		// "(?:<[A-Z]*M[A-Z]*>[^<]+?<\/[A-Z]*M[A-Z]*>\\s*)"
		String mPhrasePattern = "(?:<[A-Z]*M[A-Z]*>[^<]+?<\\/[A-Z]*M[A-Z]*>\\s*)";

		// "(?:<[A-Z]*B[A-Z]*>[,:\.;<]<\/[A-Z]*B[A-Z]*>)"
		String bPattern = "(?:<[A-Z]*B[A-Z]*>[,:.;<]<\\/[A-Z]*B[A-Z]*>)";

		String commaPattern = "<B>,</B>";

		String phrasePattern = mPhrasePattern + "\\s*" + nPhrasePattern;
		String pattern = phrasePattern + "\\s+" + commaPattern + "\\s+(?:"
				+ phrasePattern + "| |" + commaPattern + ")+";
		String pattern1 = "^(" + pattern + ")";
		String pattern2 = "(.*?)(" + pattern + ")\\s*" + bPattern + "\\$";
		// changed last * to +
		String pattern3 = "^((?:" + mPhrasePattern + "\\s+)+" + commaPattern
				+ "\\s+(?:" + mPhrasePattern + "|\\s*|" + commaPattern + ")+"
				+ mPhrasePattern + "+\\s*" + nPhrasePattern + ")";

		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();

			String sentenceCopy = "" + sentence;
			sentenceCopy = sentenceCopy.replaceAll("></?", "");

			Matcher m1 = StringUtility.createMatcher(sentenceCopy, pattern1);
			Matcher m2 = StringUtility.createMatcher(sentenceCopy, pattern2);
			Matcher m3 = StringUtility.createMatcher(sentenceCopy, pattern3);

			// case 1
			if (m1.find()) {
				String tag = m1.group(1);
				tag = tag.replaceAll(",", "and");
				tag = tag.replaceAll("</?\\S+?>", "");
				tag = StringUtility.trimString(tag);
				// case 1.1
				if (!StringUtility.isMatchedNullSafe(tag, " and$")) {
					dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
							"", tag, "commaand[CA1]");
				}
			}
			// case 2
			else if (m2.find()) {
				String g1 = m2.group(1);
				String tag = m2.group(2);
				if (!StringUtility.isMatchedNullSafe(g1, "\\b("
						+ this.myLearnerUtility.getConstant().PREPOSITION + ")\\b")
						&& !StringUtility.isMatchedNullSafe(g1, "<N>")) {
					tag = tag.replaceAll(",", "and");
					tag = tag.replaceAll("</?\\S+?>", "");
					tag = StringUtility.trimString(tag);
					// case 2.1.1
					if (!StringUtility.isMatchedNullSafe(tag, " and$")) {
						dataholderHandler.tagSentenceWithMT(sentenceID,
								sentence, "", tag, "commaand[CA2]");
					}

				}
			}
			// case 3
			else if (m3.find()) {
				String tag = m3.group(1);
				String g1 = m3.group(1);
				// case 3.1
				if (!StringUtility.isMatchedNullSafe(g1, "\\b("
						+ this.myLearnerUtility.getConstant().PREPOSITION + ")\\b")) {
					tag = tag.replaceAll(",", "and");
					tag = tag.replaceAll("</?\\S+?>", "");
					tag = StringUtility.trimString(tag);
					// case 3.1.1
					if (!StringUtility.isMatchedNullSafe(tag, " and$")) {
						String[] tagWords = tag.split("\\s+");
						List<String> tagWordsList = new ArrayList<String>(
								Arrays.asList(tagWords));
						tag = tagWordsList.get(tagWordsList.size() - 1);
						String modifier = StringUtils.join(tagWordsList
								.subList(0, tagWordsList.size() - 1), " ");
						dataholderHandler.tagSentenceWithMT(sentenceID,
								sentence, modifier, tag, "commaand[CA3]");
					}
				}
			}
		}
	}
	
	public void normalizeModifiers(DataHolder dataholderHandler) {
		Comparator<SentenceStructure> stringLengthComparator = new Comparator<SentenceStructure>() {
			@Override
			public int compare(SentenceStructure s1, SentenceStructure s2) {
				String m1 = s1.getModifier();
				String m2 = s2.getModifier();
				if (m1.length() == m2.length()) {
					return 0;
				} else {
					return m1.length() < m2.length() ? -1 : 1;
				}
			}
		};

		// Part 1
		// non- and/or/to/plus cases
		List<SentenceStructure> sentenceList = new ArrayList<SentenceStructure>();
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			String modifier = sentenceItem.getModifier();
			boolean c1 = !StringUtils.equals(modifier, "");
			boolean c2 = !StringUtility.isMatchedNullSafe(modifier,
					" (and|or|nor|plus|to) ");
			if (c1 && c2) {
				sentenceList.add(sentenceItem);
			}
		}

		Collections.sort(sentenceList, stringLengthComparator);
		Collections.reverse(sentenceList);

		for (SentenceStructure sentenceItem : sentenceList) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();
			String tag = sentenceItem.getTag();
			String modifier = sentenceItem.getModifier();

			String mCopy = "" + modifier;
			modifier = finalizeModifier(dataholderHandler, modifier, tag, sentence);
			modifier = modifier.replaceAll("\\s*\\[.*?\\]\\s*", " ");
			modifier = StringUtility.trimString(modifier);

			if (!StringUtils.equals(mCopy, modifier)) {
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag, "normalizemodifiers");
			}
		}

		// Part 2
		// deal with to: characterA to characterB organ (small to median shells)
		List<SentenceStructure> sentenceList2 = new ArrayList<SentenceStructure>();
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			String modifier = sentenceItem.getModifier();
			boolean c1 = StringUtility.isMatchedNullSafe(modifier, " to ");
			if (c1) {
				sentenceList2.add(sentenceItem);
			}
		}

		Collections.sort(sentenceList2, stringLengthComparator);
		for (SentenceStructure sentenceItem : sentenceList2) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();
			String tag = sentenceItem.getTag();
			String modifier = sentenceItem.getModifier();

			String mCopy = "" + modifier;
			modifier = modifier.replaceAll(".*? to ", "");
			List<String> mWords = new ArrayList<String>(Arrays.asList(modifier
					.split("\\s+")));
			Collections.reverse(mWords);

			String m = "";
			int count = dataholderHandler.getSentenceCount(true, m, true, tag);
			String modi = "" + m;
			for (String word : mWords) {
				m = word + " " + m;
				m = m.replaceAll("\\s+$", "");
				int c = dataholderHandler.getSentenceCount(true, m, true, tag);
				if (c > count) {
					count = c;
					modi = "" + m;
				}
			}
			// tagsentwmt($sentid, $sentence, $modi, $tag,
			// "normalizemodifiers");
			dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modi,
					tag, "normalizemodifiers");
		}

		// Part 3
		// modifier with and/or/plus
		List<SentenceStructure> sentenceList3 = new ArrayList<SentenceStructure>();
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			String modifier = sentenceItem.getModifier();
			boolean con = !StringUtility.isMatchedNullSafe(modifier,
					" (and|or|nor|plus|to) ");
			if (con) {
				sentenceList3.add(sentenceItem);
			}
		}

		Collections.sort(sentenceList3, stringLengthComparator);
		Collections.reverse(sentenceList3);

		for (SentenceStructure sentenceItem : sentenceList3) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();
			String tag = sentenceItem.getTag();
			String modifier = sentenceItem.getModifier();

			String mCopy = "" + modifier;
			modifier = this.finalizeCompoundModifier(dataholderHandler,
					modifier, tag, sentence);

			modifier = modifier.replaceAll("\\s*\\[.*?\\]\\s*", " ");
			modifier = StringUtility.trimString(modifier);

			if (!StringUtils.equals(mCopy, modifier)) {
				// tagsentwmt($sentid, $sentence, $modifier, $tag,
				// "normalizemodifiers");
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag, "normalizemodifiers");
			}
		}

		// Part 4
		// modifier with and/or/plus
		List<SentenceStructure> sentenceList4 = new ArrayList<SentenceStructure>();
		for (SentenceStructure sentenceItem : dataholderHandler
				.getSentenceHolder()) {
			String modifier = sentenceItem.getModifier();
			// ???
			boolean con = !StringUtility.isMatchedNullSafe(modifier,
					"[_ ](and|or|nor|plus|to)[ _]");
			if (con) {
				sentenceList4.add(sentenceItem);
			}
		}

		Collections.sort(sentenceList4, stringLengthComparator);
		Collections.reverse(sentenceList4);

		for (SentenceStructure sentenceItem : sentenceList4) {
			int sentenceID = sentenceItem.getID();
			String sentence = sentenceItem.getSentence();
			String tag = sentenceItem.getTag();
			String modifier = sentenceItem.getModifier();

			String mTag = "" + tag;
			tag = this.finalizeCompoundTag(tag, sentence);
			tag = tag.replaceAll("\\s*\\[.*?\\]\\s*", " ");
			tag = StringUtility.trimString(tag);

			if (!StringUtils.equals(mTag, tag)) {
				// tagsentwmt($sentid, $sentence, $modifier, $tag,
				// "normalizemodifiers");
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence,
						modifier, tag, "normalizemodifiers");
			}
		}
	}

	public String finalizeCompoundModifier(DataHolder dataholderHandler, String modifier, String tag,
			String sentence) {
		// case 1
		if (StringUtility.isMatchedNullSafe(modifier, "\\[")) {
			return modifier;
		}
		
		modifier = modifier.replaceAll("\\(.*?\\)", " ");
		modifier = modifier.replaceAll("\\(.*", "");
		modifier = modifier.replaceAll("\\W","");
		modifier = modifier.replaceAll("\\s+", " ");
		
		String mCopy = ""+modifier;
		String result = "";
		String m = "";
				String n = "";
				
		List<String> lastPart = new ArrayList(Arrays.asList(modifier.split("\\s+")));
		Collections.reverse(lastPart);
		int cut = 0;		
		for (String l : lastPart) {
			if (cut == 0 && StringUtility.isMatchedNullSafe(sentence, "<N>"+l)) {
				n = l + " " + n;
				n = StringUtility.trimString(n);
			}
			else {
				cut = 1;
				String tm = StringUtility.isMatchedNullSafe(n, "\\w") ? l + " "
						+ n : l;
				for (SentenceStructure sentenceItem : dataholderHandler
						.getSentenceHolder()) {
					if (StringUtils.equals(sentenceItem.getModifier(), tm)
							&& StringUtils.equals(sentenceItem.getTag(), tag)) {
						m = l + " " + m;
					}
				}
				break;
			}
		}
		
		m = StringUtility.trimString(m);
		n = StringUtility.trimString(n);
		modifier = modifier.replaceAll("\\s*"+n, "");
		
		// components
		List<String> parts = new ArrayList<String>();
		List<String> conj = new ArrayList<String>();
		conj.add("");
		if (modifier != null) {
			Matcher m1 = StringUtility.createMatcher(modifier, "(^.*?) (and|or|nor|plus) (.*)");
			while (m1.find()) {
				String g1 = m1.group(1);
				String g2 = m1.group(2);
				String g3 = m1.group(3);
				parts.add(g1);
				parts.add(g2);
				modifier = g3;
				m1 = StringUtility.createMatcher(modifier, "(^.*?) (and|or|nor|plus) (.*)");
			}
		}
		parts.add(modifier);
		
		// at least one m in a part
//		for (String part : parts) {
		for (int i = 0; i < parts.size(); i++) {
			String part = parts.get(i);
			String[] words = part.split("\\s+");
			boolean isFound = false;
			String r = "";
			
			for (String word : words) {
				if ((this.checkedModifiers.containsKey(word) && this.checkedModifiers.get(word)) || StringUtility.isMatchedNullSafe(sentence, "<N>"+word)) {
					isFound = true;
					r = r + " " + word;
				}
			}
			r = StringUtility.trimString(r);
			
			result = result + " " + conj.get(i)+ " "+r;
			String regex2 = "\\b(" + this.myLearnerUtility.getConstant().CHARACTER + "|" + this.myLearnerUtility.getConstant().STOP
					+ "|" + this.myLearnerUtility.getConstant().NUMBER + "|" + this.myLearnerUtility.getConstant().CLUSTERSTRING
					+ ")\\b";
			if (!StringUtility.isMatchedNullSafe(r, "\\w")
					|| StringUtility.isMatchedNullSafe(r, regex2)) {
				result = "";
				break;
			}
		}
		result = StringUtility.isMatchedNullSafe(result, "\\w") ? result
				+ " " + n : m + " " + n;
		result = StringUtility.trimString(result);
		
		return result;
	}

	// [bm]+n+&[bm]+n+
	public String finalizeCompoundTag(String tag, String sentence) {
		// avoid unmatched ( in regexp
		tag = tag.replaceAll("\\(.*?\\)", " ");
		tag = tag.replaceAll("\\(.*", "");
		tag = tag.replaceAll("\\s+", " ");
		
		String tCopy = "" + tag;
		String result = "";
		
		// components
		List<String> parts = new ArrayList<String>();
		List<String> conj = new ArrayList<String>();
		conj.add("");
		
		Matcher m1 = StringUtility.createMatcher(tag, "(^.*?)[_ ](and|or|nor|plus)[_ ](.*)");
		while (m1.find()) {
			String g1 = m1.group(1);
			String g2 = m1.group(2);
			String g3 = m1.group(3);
			parts.add(g1);
			conj.add(g2);
			tag = g3;
			m1 = StringUtility.createMatcher(tag, "(^.*?)[_ ](and|or|nor|plus)[_ ](.*)");
		}
		
		parts.add(tag);
		
		// at least one m in a part
//		for (String part : parts) {
		for (int i = 0; i < parts.size(); i++) {
			String part = parts.get(i);
			String[] words = part.split("\\s+");
			boolean isFoundM = false;
			String r = "";
			for (String word : words) {
				String escapedW = StringUtility.escapePerlRegex(word);
				if ((this.checkedModifiers.containsKey(word) && this.checkedModifiers
						.get(word))
						|| StringUtility.isMatchedNullSafe(sentence, "<N>"
								+ escapedW)) {
					isFoundM = true;
					r = r + " " + word;
				}
			}
			String regex = "\\b(" + this.myLearnerUtility.getConstant().CHARACTER + "|" + this.myLearnerUtility.getConstant().STOP
					+ "|" + this.myLearnerUtility.getConstant().NUMBER + "|" + this.myLearnerUtility.getConstant().CLUSTERSTRING
					+ ")\\b";
			r = r.replaceAll(regex, "");
			r = StringUtility.trimString(r);
			
			if (StringUtility.isMatchedNullSafe(r, "\\w")) {
				result = result + " " + conj.get(i) +" "+r;
			}
		}
		
		result = result.replaceAll("\\s+", " ");
		result = StringUtility.trimString(result);
		
		return result;
	}

	public String finalizeModifier(DataHolder dataholderHandler, String modifier, String tag, String sentence) {
		String fModifier = "";
		modifier = modifier.replaceAll("\\[.*?\\]", "");
		modifier = StringUtility.trimString(modifier);
		if (StringUtility.isMatchedNullSafe(modifier, "\\w")) {
			List<String> mWords = new ArrayList<String>(Arrays.asList(modifier.split("\\s+")));
			Collections.reverse(mWords);
			
			for (String mWord : mWords) {
				boolean isModifier = this.isModifier(dataholderHandler, mWord, modifier, tag);
				if (isModifier) {
					fModifier = mWord + " " + fModifier;
				}
				else {
					break;
				}
			}
			
			fModifier = fModifier.replaceAll("\\s+", "");
		}
		
		return fModifier;
	}

	public boolean isModifier(DataHolder dataholderHandler, String word, String modifier, String tag) {
		if (this.checkedModifiers.containsKey(word)) {
			if (this.checkedModifiers.get(word)) {
				return true;
			} else {
				return false;
			}
		}
		
		// if word is a "s", return 1
		Set<String> nouns = new HashSet<String>(Arrays.asList("s p n"
				.split(" ")));
		List<Entry<WordPOSKey, WordPOSValue>> entries = dataholderHandler
				.getWordPOSEntriesByWordPOS(word, nouns);
		if (entries.size() > 0) {

			this.checkedModifiers.put(word, true);
			return true;

		}
		
		// if word is a "b", and not a "m", return 0
		Set<String> bPOS = new HashSet<String>();
		bPOS.add("b");
		List<Entry<WordPOSKey, WordPOSValue>> boundaries = dataholderHandler
				.getWordPOSEntriesByWordPOS(word, bPOS);
		boolean c1 = (boundaries.size() > 0);		
		boolean c2 = dataholderHandler.getModifierHolder().containsKey(word);
		if (c1 && !c2) {
			// the word is a boundary word, but not a modifier
			this.checkedModifiers.put(word, false);
			return false;
		}
		
		if (!c1 && c2) {
			this.checkedModifiers.put(word, true);
			return true;
		}
		
		// when word has been used as "b" and "m" or neither "b" nor "m" and is not a "s"
		int mCount = this.getMCount(dataholderHandler, word);
		String wCopy = ""+word;
		if (StringUtility.isMatchedNullSafe(word, "_")) {
			wCopy = wCopy.replaceAll("_", " - ");
		}
		
		int tCount = 0;
		String pattern = "(^| )"+wCopy+" ";
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			String oSentence = sentenceItem.getOriginalSentence();
			if (StringUtility.isMatchedNullSafe(oSentence, pattern)) {
				tCount++;
			}
		}
		
		if (tCount == 0 || tCount > 0.25 * mCount) {
			this.checkedModifiers.put(word, false);
			return false;
		}
		else {
			this.checkedModifiers.put(word, true);
			return true;			
		}
	}
	
	public int getMCount(DataHolder dataholderHandler, String word) {
		int count = 0;
		String pattern = "(>| )"+word+"(</B></M>)? <N";
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			String sentence = sentenceItem.getSentence();
			if (StringUtility.isMatchedNullSafe(sentence, pattern)) {
				count++;
			}
		}
		
		return count;
	}
	
	public void normalizeTags(DataHolder dataholderHandler) {
		for (SentenceStructure sentenceItem : dataholderHandler.getSentenceHolder()) {
			int sentenceID = sentenceItem.getID();
			String modifier = sentenceItem.getModifier();
			String tag = sentenceItem.getTag();
			if (tag != null && StringUtils.equals(tag, "ignore")) {				
				tag = this.normalizeItem(tag);
				modifier = this.normalizeItem(modifier);
			}
			
			String sentence = sentenceItem.getSentence();
			sentence = sentence.replaceAll("</?[NBM]>", "");
			dataholderHandler.getSentence(sentenceID).setSentence(sentence);
			if (StringUtility.isMatchedNullSafe(tag, "\\w")) {
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, tag, "normalizetags");
			}
			else {
				dataholderHandler.tagSentenceWithMT(sentenceID, sentence, modifier, null, "normalizetags");
			}
		}
	}
	
	

	public String normalizeItem(String tag) {
		tag = tag.replaceAll("\\s*NUM\\s*", " ");
		tag = StringUtility.trimString(tag);

		if (StringUtility.isMatchedNullSafe(tag, "\\w")) {
			tag = tag.replaceAll("\\[", "[*");
			tag = tag.replaceAll("\\]", "*]");

			String[] twSegs = tag.split("[\\]\\[]");

			StringBuilder tagSB = new StringBuilder();

			for (int j = 0; j < twSegs.length; j++) {
				StringBuilder outSB = new StringBuilder();
				// case 1
				if (StringUtility.isMatchedNullSafe(twSegs[j], "\\*")) {
					twSegs[j] = twSegs[j].replaceAll("\\*", "");
					String[] tagWords = twSegs[j].split("\\s+");
					outSB.append('[');
					for (int i = 0; i < tagWords.length; i++) {
						tagWords[i] = this.myLearnerUtility
								.getWordFormUtility().getSingular(tagWords[i]);
						outSB.append(tagWords[i]);
						outSB.append(" ");
					}
					outSB.deleteCharAt(outSB.length() - 1);
					outSB.append(']');
				} 
				// case 2
				else if (StringUtility.isMatchedNullSafe(twSegs[j], "\\w")) {
					String[] tagWords = twSegs[j].split("\\s+");
					for (int i = 0; i < tagWords.length; i++) {
						tagWords[i] = this.myLearnerUtility
								.getWordFormUtility().getSingular(tagWords[i]);
						outSB.append(tagWords[i]);
						outSB.append(" ");
					}
					outSB.deleteCharAt(outSB.length() - 1);
				}
				String out = outSB.toString();
				if (StringUtility.isMatchedNullSafe(out, "\\w")) {
					tagSB.append(out.toString());
					tagSB.append(' ');
				}
			}

			tagSB.deleteCharAt(tagSB.length() - 1);
			tag = tagSB.toString();
			tag = tag.replaceAll("\\s+", " ");
		}

		return tag;
	}
	
	/**
	 * Set saved_flag to red for the following terms in preparation to run the Parser
	 * 1. words that are not in allwords table 
	 * 2. special words added
	 */
	public void prepareTables4Parser(DataHolder dataholderHandler) {
		Set<String> toRemove = new HashSet<String>();
		toRemove.addAll(this.myLearnerUtility.getConstant().pronounWords);
		toRemove.addAll(this.myLearnerUtility.getConstant().characterWords);
		toRemove.addAll(this.myLearnerUtility.getConstant().numberWords);
		toRemove.addAll(this.myLearnerUtility.getConstant().clusterStringWords);
		toRemove.addAll(this.myLearnerUtility.getConstant().pronounWords);
		toRemove.addAll(this.myLearnerUtility.getConstant().stopWords);	
		
		Set<String> unknownWords =dataholderHandler.getUnknownWordHolder().keySet(); 
		
		// set saved_flag to red in WordPOS collection
		Iterator<Entry<WordPOSKey, WordPOSValue>> iter = dataholderHandler.getWordPOSHolderIterator();
		while (iter.hasNext()) {
			Entry<WordPOSKey, WordPOSValue> entry = iter.next();
			WordPOSKey key = entry.getKey();
			WordPOSValue value = entry.getValue();
			String word = key.getWord();
//			boolean c1 = toRemove.contains(word);
//			boolean c2 = StringUtility.isMatchedNullSafe(word, "[a-z]");
//			boolean c3 = unknownWords.contains(word);
			
			if (toRemove.contains(word)
					|| !StringUtility.isMatchedNullSafe(word, "[a-z]")
					|| !unknownWords.contains(word)) {
				value.setSavedFlag("red");
			}
		}
		
		// handle -ly words
		// If a word in WordPOS collection, has ending of -ly, and after
		// removing the -ly ending, it appears in the UnknownWords collections,
		// then set the savedFlag to "red"
		Iterator<Entry<WordPOSKey, WordPOSValue>> iter2 = dataholderHandler.getWordPOSHolderIterator();
		while (iter2.hasNext()) {
			Entry<WordPOSKey, WordPOSValue> entry = iter2.next();
			WordPOSKey key = entry.getKey();
			WordPOSValue value = entry.getValue();
			String lyWord = key.getWord();
			if (StringUtility.isMatchedNullSafe(lyWord, "ly$")) {
				String nWord = lyWord.replaceAll("ly$", "");
				if (unknownWords.contains(nWord)) {
					value.setSavedFlag("red");
				}
			}
		}
	}

	// some unused variables in perl
	// directory of /descriptions folder
	private String desDir = "";
	// directory of /characters folder
	private String chrDir = "";
	// prefix for all tables generated by this program
	private String prefix = "";
	// default general tag

	// knowledge base
	private String knlgBase = "phenoscape";

	private int DECISIONID = 0;

	private Map<String, String> numberRecords = new HashMap<String, String>(); // word->(p|s)
	private Map<String, String> singularRecords = new HashMap<String, String>();// word->singular
	private Map<String, String> POSRecords = new HashMap<String, String>(); // word->POSs
	// private Map<String, String> POSRecordsRECORDS = new HashMap<String,
	// String>();
	private String NEWDESCRIPTION = ""; // record the index of sentences that
										// ends a description

	private Hashtable<String, String> PLURALS = new Hashtable<String, String>();

	private String TAGS = "";

	// grouped #may contain q but not the last m, unless it is followed by a p
	private String mptn = "((?:[mbq][,&]*)*(?:m|b|q(?=[pon])))";
	// grouped #must present, no q allowed
	private String nptn = "((?:[nop][,&]*)*[nop])";
	// grouped #when following a p, a b could be a q
	private String bptn = "([,;:\\\\.]*\\$|,*[bm]|(?<=[pon]),*q)";
	private String SEGANDORPTN = "(?:" + mptn + nptn + ")";
	private String ANDORPTN = "^(?:" + SEGANDORPTN + "[,&]+)*" + SEGANDORPTN
			+ bptn;

	// utility method
	public LearnerUtility getLearnerUtility() {
		return this.myLearnerUtility;
	}

	public ITokenizer getTokenizer() {
		return this.myTokenizer;
	}

	public Configuration getConfiguration() {
		return this.myConfiguration;
	}

	public static void main(String[] args) {
		assertEquals("tagAllSentenceHelper", 1, 12);
	}

}