package com.cognitionis.nlp_files.parentical_parsers; /** * * @author Héctor Llorens * @since 2011 */ import java.util.*; public class SyntColSBarTMPRoleParser { private String full; private int parlevel; private String currentSubsent; private int currentSubsent_parlevel; private int sentnum; HashMap<String, String> subsentTMP; private Stack st; public SyntColSBarTMPRoleParser() { full = ""; parlevel = 0; sentnum = 1; currentSubsent = "0"; currentSubsent_parlevel = 0; st = new Stack(); subsentTMP = new HashMap<String, String>(); subsentTMP.put(currentSubsent, "-"); } public void parse(String synt, String role, String word) { try { String linput = synt.trim(); word = word.toLowerCase(); String element = ""; boolean inElem = false; for (int cn = 0; cn < linput.length(); cn++) { char cinput = linput.charAt(cn); // Ignore blanks if (cinput == ' ' || cinput == '\n' || cinput == '\r' || cinput == '\t') { continue; } // Ignore * if (cinput == '*') { continue; } // Parse full += cinput; if (cinput == '(') { if (inElem) { if (element.length() < 1) { throw new Exception("Empty element"); } st.push(element); if (element.equals("SBAR")) { currentSubsent = currentSubsent + sentnum; sentnum = 1; currentSubsent_parlevel = parlevel; if (role.matches(".*TMP.*")) { subsentTMP.put(currentSubsent, word); } else { String tempsub = currentSubsent; while (tempsub.length() > 1) { tempsub = tempsub.substring(0, tempsub.length() - 1); subsentTMP.put(currentSubsent, subsentTMP.get(tempsub)); if (!subsentTMP.get(tempsub).equals("-")) { break; } } } } element = ""; } else { inElem = true; } parlevel++; } else { if (cinput == ')') { if (parlevel == currentSubsent_parlevel && !currentSubsent.equals("0")) { currentSubsent = currentSubsent.substring(0, currentSubsent.length() - 1); currentSubsent_parlevel = 0; sentnum = Integer.parseInt(currentSubsent.substring(currentSubsent.length() - 1)) + 1; } parlevel--; //if (!st.empty()) { st.pop(); //} } else { element += cinput; } } } if (inElem) { if (element.length() < 1) { throw new Exception("Empty element"); } st.push(element); if (element.equals("SBAR")) { currentSubsent = currentSubsent + sentnum; currentSubsent_parlevel = parlevel; if (role.matches(".*TMP.*")) { subsentTMP.put(currentSubsent, word); } else { String tempsub = currentSubsent; while (tempsub.length() > 1) { tempsub = tempsub.substring(0, tempsub.length() - 1); subsentTMP.put(currentSubsent, subsentTMP.get(tempsub)); if (!subsentTMP.get(tempsub).equals("-")) { break; } } } } element = ""; } } catch (Exception e) { System.err.println("Errors found (" + this.getClass().getSimpleName() + "):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); System.exit(1); } } } public int getParlevel() { return parlevel; } public String getCurrentSubsent() { return currentSubsent; } public String getSubsentTMP(String ss) { return subsentTMP.get(ss); } public String getSubsentTMP() { return subsentTMP.get(currentSubsent); } public String getCurrent() { String[] st_arr = new String[st.size()]; String current = ""; st.copyInto(st_arr); for (int i = 0; i < st_arr.length; i++) { if (i != 0) { current += "-"; } current += st_arr[i]; } return current; } public String updateCurrent(String past) { String[] st_arr = new String[st.size()]; String current = ""; st.copyInto(st_arr); for (int i = 0; i < st_arr.length; i++) { if (i != 0) { current += "-"; } current += st_arr[i]; } String[] past_arr = past.split("-"); String[] curr_arr = current.split("-"); String commonSynt = ""; int shortestSyntTree = curr_arr.length; if (past_arr.length < shortestSyntTree) { shortestSyntTree = past_arr.length; } for (int i = 0; i < shortestSyntTree; i++) { if (curr_arr[i].equals(past_arr[i])) { if (i != 0) { commonSynt += "-"; } commonSynt += curr_arr[i]; } else { break; } } return commonSynt; } // IMPORTANT: IT WAS THE ORIGINAL "getCurrentPhrase" public String getCurrentPhrase(String synt) { String[] synt_arr = synt.split("-"); String phraseSynt = ""; int phraseDept = 0; for (int i = synt_arr.length - 1; i >= 0; i--) { if (phraseDept == 0) { if (synt_arr[i].equals("VP")) { phraseSynt = synt_arr[i]; break; } else { phraseSynt = synt_arr[i]; phraseDept++; } } else { // Originally uncommented /*if (phraseDept >= 3) { break; }*/ if (synt_arr[i].equals("VP") || synt_arr[i].matches("S.*")) { break; } else { phraseSynt = synt_arr[i] + "-" + phraseSynt; phraseDept++; } } } return phraseSynt; } public String getCurrentMainPhrase(String synt) { String[] synt_arr = synt.split("-"); String phraseSynt = ""; int phraseDept = 0; for (int i = synt_arr.length - 1; i >= 0; i--) { if (phraseDept == 0) { if (synt_arr[i].equals("VP")) { phraseSynt = synt_arr[i]; break; } else { phraseSynt = synt_arr[i]; phraseDept = 1; } } else { if (synt_arr[i].equals("VP") || synt_arr[i].matches("S.*")) { break; } else { phraseSynt = synt_arr[i]; } } } return phraseSynt; } public String getFull() { return full; } public ArrayList<String> getAllPhrases() { ArrayList<String> phrases = new ArrayList<String>(); try { Stack localst = new Stack(); String linput = full.trim(); String element = ""; boolean inElem = false; for (int cn = 0; cn < linput.length(); cn++) { char cinput = linput.charAt(cn); // Ignore blanks and * if (cinput == ' ' || cinput == '\n' || cinput == '\r' || cinput == '\t' || cinput == '*') { continue; } // Parse if (cinput == '(') { if (inElem) { if (element.length() < 1) { throw new Exception("Empty element"); } localst.push(element); getAllCurrentPhrases(localst, phrases); element = ""; } else { inElem = true; } } else { if (cinput == ')') { if (inElem) { if (element.length() < 1) { throw new Exception("Empty element"); } localst.push(element); getAllCurrentPhrases(localst, phrases); element = ""; inElem = false; } localst.pop(); } else { element += cinput; } } } } catch (Exception e) { System.err.println("Errors found (" + this.getClass().getSimpleName() + "):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); System.exit(1); } } return phrases; } public void getAllCurrentPhrases(Stack localst, ArrayList<String> phrases) { String[] synt_arr = new String[localst.size()]; localst.copyInto(synt_arr); String phraseSynt = ""; int phraseDept = 0; for (int i = synt_arr.length - 1; i >= 0; i--) { if (phraseDept == 0) { if (synt_arr[i].equals("VP")) { phraseSynt = synt_arr[i]; //phrases.add(phraseSynt); break; } else { if (synt_arr[i].endsWith("P")) { phraseSynt = synt_arr[i]; //phrases.add(phraseSynt); phraseDept++; } else { break; } } } else { if (phraseDept >= 3) { break; } if (synt_arr[i].equals("VP") || synt_arr[i].matches("S.*")) { break; } else { phraseSynt = synt_arr[i] + "-" + phraseSynt; //phrases.add(phraseSynt); phraseDept++; } } } if (!phraseSynt.equals("")) { phrases.add(phraseSynt); } } }