package fna.parsing.character;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.log4j.Logger;
import fna.parsing.ApplicationUtilities;
//interface to the FNA glossary
//read the glossary into a database
public class Glossary {
private static final Logger LOGGER = Logger.getLogger(Glossary.class);
static {
try {
Class.forName(ApplicationUtilities.getProperty("database.driverPath"));
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
LOGGER.error("Couldn't find Class in CharacterLearner" + e);
e.printStackTrace();
}
}
private static String tablename = null;
//private static String tablename1 = null;
//private String database;
static private Connection conn = null;
public Glossary(String glosstable){
Glossary.tablename = glosstable;
/*this.tablename = tablePrefix+"_fnaglossary";
this.tablename1 = tablePrefix+"_termforms";
try{
if(conn == null){
this.database = databasename;
String URL = ApplicationUtilities.getProperty("database.url");
conn = DriverManager.getConnection(URL);
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("show tables");
boolean g = false;
while(rs.next()){
if(rs.getString(1).compareToIgnoreCase(tablename)==0){
g=true;
}
}
stmt.execute("create table if not exists "+tablename1+" (id int not null auto_increment primary key, term1 varchar(100), type varchar(10), term2 varchar(100))");
stmt.execute("delete from "+tablename1);
if(!g){
stmt.execute("create table "+tablename+" (id int not null auto_increment primary key, term varchar(100), category varchar(100), limitation varchar(200), status varchar(50), definition varchar(2000))");
populateTable(glosstable);//TODO: if tablename exist, do not populate it again
}
}
}catch(Exception e){
LOGGER.error("Exception in CharacterLearner constructor" + e);
e.printStackTrace();
}*/
}
/*
private void populateTable(File glossfile){
try{
FileReader fr = new FileReader(glossfile);
LineNumberReader lnr = new LineNumberReader(fr);
String line = null;
line = lnr.readLine();
ArrayList synterms = new ArrayList();
while((line = lnr.readLine()) != null){
String[] cols = line.split("\t");
String term = cols[0]; //var., pl., sing., amphistom(at)ic, bract 2 (strict sense)etc
String rel = cols[1]; // = bundled (not recommended), fascicled
String cat = cols[2];
String lim = cols[3];
String def = cols[4];
String status = "";
ArrayList terms = new ArrayList();
boolean flag = false;
Pattern p = Pattern.compile("(.*?)(not recommended|broad sense|strict sense|misapplied)(.*)");
do{
flag = false;
Matcher m = p.matcher(term);
if(m.matches()){
status += m.group(2)+" ";
flag = true;
term = m.group(1)+m.group(3);
}
}while(flag);
p = Pattern.compile("(.*?)\\s+\\((\\w.*?)\\)");// sheath (leaf)
Matcher m = p.matcher(term);
if(m.matches()){
term = m.group(1);
lim += " "+m.group(2);
}
//String [] tms = term.split("(sing\\.|var\\.|pl\\.|,)\\s+");//embryotega pl. embryotegae, embryotegas
String [] tms = termForms(term);
for(int i = 0; i < tms.length; i++){
p= Pattern.compile("(.*?\\S)\\((\\w+)\\)(.*?)"); //epistorm(at)ic
m = p.matcher(tms[i]);
if(m.matches()){
String term1 = m.group(1)+m.group(3);
String term2 = m.group(1)+m.group(2)+m.group(3);
terms.add(term1);
terms.add(term2);
}else{
terms.add(tms[i]);
}
}
//terms hold terms ...cleft, ribbed 1, poly..., bundle ()
def = def.replaceAll("\"", "'");
def = def.trim();
lim = lim.trim();
cat = cat.trim();
status = status.trim();
Iterator it = terms.iterator();
while(it.hasNext()){
String t = (String)it.next();
t = t.replace("...", "_");
t = t.replaceAll("-_", "_");
t = t.replaceAll("_-", "_");
t = t.replaceAll("[\\d()]", " ");
t = t.replaceFirst(",\\s*$", "");
t = t.trim();
if(t.compareTo("") != 0){
Statement stmt = conn.createStatement();
String query = "insert into "+tablename+" (term, category, limitation, status, definition) values (\""+t+"\", \""+cat+"\", \""+lim+"\", \""+status+"\",\""+def+"\")";
stmt.execute(query);
if(rel.indexOf("[") < 0){
rel = "["+cat+"]"+rel;
synterms.add(rel); //to process later
}
}
}
}
//process synterms, add the ones that are not in the table already
Iterator it = synterms.iterator();
Pattern p = Pattern.compile("(.*?)(not recommended|broad sense|strict sense|misapplied)(.*)");
Pattern p1= Pattern.compile("(.*?\\S)\\((\\w+)\\)(.*?)"); //epistorm(at)ic
while(it.hasNext()){
String synstring = (String)it.next();
String cat = synstring.substring(synstring.indexOf("[")+1,synstring.indexOf("]"));
synstring = synstring.substring(synstring.indexOf("]")+1);
String[] syns = synstring.split("[,;]");
for(int i = 0; i < syns.length; i++){
String status = "";
boolean flag = false;
do{
flag = false;
Matcher m = p.matcher(syns[i]);
if(m.matches()){
status += m.group(2)+" ";
flag = true;
syns[i] = m.group(1)+m.group(3);
}
}while(flag);
syns[i] = syns[i].replaceAll("[=<>]", "");
syns[i] = syns[i].replaceAll("�", "");
syns[i] = syns[i].replace("...", "_");
syns[i] = syns[i].replace("_-", "_");
syns[i] = syns[i].replace("-_", "_");
syns[i] = syns[i].trim();
Matcher m = p1.matcher(syns[i]);
if(m.matches()){
String term1 = m.group(1)+m.group(3);
String term2 = m.group(1)+m.group(2)+m.group(3);
addSyn(cat, term1, status);
addSyn(cat, term2, status);
}else{
addSyn(cat, syns[i], status);
}
}
}
}catch(Exception e){
LOGGER.error("Exception in CharacterLearner populateTable" + e);
e.printStackTrace();
}
}*/
/**
* parse term string to populate termforms table, return all terms
* termforms: single, form type, the other form
* @param term
* @return
*/
/*
private String [] termForms(String term){
ArrayList<String> terms = new ArrayList<String>();
String types = "(?:sing\\.|var\\.|pl\\.)"; //embryotega pl. embryotegae, embryotegas
Pattern p = Pattern.compile("(.+?)("+types+".*)");
Matcher m = p.matcher(term);
if(m.matches()){
String t1 = m.group(1).trim();
String ts = m.group(2).trim();
terms.add(t1);
ts = ts.replaceAll("sing\\.", "[sing]").replaceAll("pl\\.", "[pl]").replaceAll("var\\.", "[var]");
String[] parts = ts.split("\\[");
for(int i = 1; i < parts.length; i++){
String [] part = parts[i].split("\\]\\s*");
String[] term2s = part[1].split("\\s*,\\s*");
for(int j = 0; j < term2s.length; j++){
add2TermForms(t1, part[0], term2s[j]);
terms.add(term2s[j]);
}
}
}else{
terms.add(term);
}
return (String[])terms.toArray(new String[]{});
}
private void add2TermForms(String term1, String type, String term2){
if(!term1.matches("\\s+[2-9]")){
term1 = term1.replace("()", "").trim().replaceFirst("\\s+\\d+$", "");
term2 = term2.replace("()", "").trim();
if(type.equals("sing")){
String t = term1;
term1 = term2;
term2 = t;
type = "pl";
}
String[] term1s = new String[2];
if(term1.matches(".*?\\(\\w+\\)")){
term1s[0] = term1.replaceFirst("\\(\\w+\\)", "");
term1s[1] = term1.replaceAll("[)(]", "");
}else{
term1s = new String[1];
term1s[0] = term1;
}
for(int i = 0; i < term1s.length; i++){
try{
Statement stmt = conn.createStatement();
stmt.execute("insert into "+tablename1+ "(term1, type, term2) values ('"+term1s[i]+"', '"+type+"', '"+term2.trim()+"')");
}catch(Exception e){
LOGGER.error("Exception in CharacterLearner add2TermForms" + e);
e.printStackTrace();
}
}
}
}
private void addSyn(String cat, String syn, String status)
throws SQLException {
Pattern p = Pattern.compile("(.*?)\\s+\\((\\w.*?)\\)");// sheath (leaf)
Matcher m = p.matcher(syn);
String lim = "";
if(m.matches()){
syn = m.group(1);
lim += " "+m.group(2);
}
syn = syn.replaceAll("[()]", "").replaceFirst(",\\s*$", "");
syn = syn.trim();
if(syn.compareTo("") != 0){
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery("select id from "+tablename+" where term =\"" + syn + "\"");
if(!rs.next()){
String query = "insert into "+tablename+" (term, category, limitation, status) values (\""+syn+"\", \""+cat+"\", \""+lim+"\", \""+status+"\")";
stmt.execute(query);
}
}
}
*/
public static ArrayList<String> getCharacter(String state){
ArrayList<String> chs = new ArrayList<String>();
try{
Statement stmt = conn.createStatement();
String query = "select distinct category from "+tablename+" where term =\""+state+"\"";
ResultSet rs = stmt.executeQuery(query);
while(rs.next()){
chs.add(rs.getString("category"));
}
}catch(Exception e){
LOGGER.error("Exception in CharacterLearner getCharacter" + e);
e.printStackTrace();
}
return chs;
}
public static String getAllCharacters(){
StringBuffer chs = new StringBuffer();
try{
Statement stmt = conn.createStatement();
String query = "select distinct term from "+tablename +" where category not in ('STRUCTURE / SUBSTANCE','STRUCTURE', 'CHARACTER', 'SUBSTANCE', 'PLANT')";
ResultSet rs = stmt.executeQuery(query);
while(rs.next()){
chs.append(rs.getString("term").trim()+"|");
}
}catch(Exception e){
LOGGER.error("Exception in CharacterLearner getAllCharacters" + e);
e.printStackTrace();
}
return chs.toString().replaceFirst("\\|$", "");
}
public static void addInducedPair(String term, ArrayList<?> categories){
Iterator<?> it = categories.iterator();
while(it.hasNext()){
String cat = (String)it.next();
try{
Statement stmt = conn.createStatement();
String query = "insert into "+tablename+" (term, category, status) values (\""+term+"\", \""+cat+"\", \"learned\" )";
stmt.execute(query);
}catch(Exception e){
LOGGER.error("Exception in CharacterLearner addInducedPair" + e);
e.printStackTrace();
}
}
}
public static void main(String[] argv){
//load glossary table directly
//File glossfile = new File("C://Documents and Settings//hongcui//Desktop//WorkFeb2008//FNA//FNAGloss.txt");
//Glossary g = new Glossary(glossfile, true, "fnav5_corpus", "fna");
}
}