/* MSRMeanSemrelXX.java - SQL operations with the tables 'mean_semrel_XX'
* in the database (wikt_mean_semrel) wich contains only word's meanings
* and semantic relations.
* It is simplified Wiktionary parsed database based on wikt_parsed database.
* XX is a language code.
*
* Copyright (c) 2011 Andrew Krizhanovsky <andrew.krizhanovsky at gmail.com>
* Distributed under EPL/LGPL/GPL/AL/BSD multi-license.
*/
package wiktparsed.mean_semrel.parser.sql;
import wikokit.base.wikt.constant.Relation;
import wikokit.base.wikipedia.language.LanguageType;
import wikokit.base.wikipedia.sql.Connect;
import wikokit.base.wikipedia.sql.PageTableBase;
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import wikokit.base.wikipedia.language.Encodings;
/** Table wikt_mean_semrel.mean_semrel_XX contains connected:
* (1) entry / headword in the language XX
* (1) text of the meaning (definition) of this entry
* (2) related to this meaning synonyms, antonyms, etc.
*/
public class MSRMeanSemrelXX {
private final static MSRMeanSemrelXX[] NULL_MSRMEANSEMRELXX_ARRAY = new MSRMeanSemrelXX[0];
/** Unique identifier for this meaning. */
private int id;
/** Entry / headword in the language XX. */
private String page_title;
/** Text of the meaning (definition) of this entry (without wikification). */
private String meaning;
//private final static String delimiter = new String("|");
/** Names of semantic relations. */
private static final String[] table_fields_relations = {
"synonyms", "antonyms",
"hypernyms", "hyponyms",
"holonyms", "meronyms",
"troponyms", "coordinate_terms"
};
/** Semantic relations. */
private static final Relation[] ar_relations = {
Relation.synonymy, Relation.antonymy,
Relation.hypernymy, Relation.hyponymy,
Relation.holonymy, Relation.meronymy,
Relation.troponymy, Relation.coordinate_term
};
private static Map<Relation, String> m_relations;
/** Number of semantic relations (for this meaning): synonyms + antonyms + ... */
private int n_sem_rel;
/** Number of correct answers with these semantic relations. */
private int success;
/** Number of wrong answers with these semantic relations. */
private int failure;
public MSRMeanSemrelXX(int _id, String _page_title, String _meaning,
int _n_sem_rel, int _success, int _failure,
Map<Relation, String> _m_relations)
{
id = _id;
page_title = _page_title;
meaning = _meaning;
n_sem_rel = _n_sem_rel;
success = _success;
failure = _failure;
m_relations = _m_relations;
}
/** Gets semantic relations. */
public Map<Relation, String> getRelations() {
return m_relations;
}
/** Generates tables 'mean_semrel_XX' for each language code.<br><br>
*
* <PRE>
* DROP TABLE IF EXISTS `mean_semrel_uk` ;
*
* CREATE TABLE IF NOT EXISTS `mean_semrel_en` (
`_id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`page_title` VARCHAR(255) BINARY NOT NULL COMMENT 'headword' ,
`meaning` VARCHAR(4095) BINARY NOT NULL COMMENT 'Definition text.' ,
`synonyms` VARCHAR(4095) BINARY NULL COMMENT 'Synonyms joined by some symbol' ,
`antonyms` VARCHAR(4095) BINARY NULL ,
`hypernyms` VARCHAR(4095) BINARY NULL ,
`hyponyms` VARCHAR(4095) BINARY NULL ,
`holonyms` VARCHAR(4095) BINARY NULL ,
`meronyms` VARCHAR(4095) BINARY NULL ,
`troponyms` VARCHAR(4095) BINARY NULL ,
`coordinate_terms` VARCHAR(4095) BINARY NULL ,
`n_sem_rel` TINYINT UNSIGNED NOT NULL COMMENT 'Positive number of semantic relations (for this meaning)' ,
`success` SMALLINT UNSIGNED NOT NULL COMMENT 'Number of correct answers with these sem. relations' ,
`failure` SMALLINT UNSIGNED NOT NULL COMMENT 'Number of wrong answers with these sem. relations' ,
PRIMARY KEY (`_id`) ,
INDEX `idx_page_title` (`page_title`(7) ASC) )
ENGINE = InnoDB
COMMENT = 'entry name, meaning and semantic relations' ;
* </PRE>
*
* @see http://code.google.com/p/wikokit/wiki/File_mean_semrel_empty_sql
*/
public static void generateTables (Connect connect)
{
if(null == connect)
return;
StringBuffer str_sql = new StringBuffer();
try {
Statement s = connect.conn.createStatement ();
try {
Map<String, LanguageType> code2lang = LanguageType.getAllLanguages();
for(LanguageType lang_code : code2lang.values()) {
//String table_name = "index_" + lang_code.toStringASCII();
String table_name = "`mean_semrel_" + lang_code.toTablePrefix() + "`";
str_sql.setLength(0);
str_sql.append("DROP TABLE IF EXISTS "+ table_name);
s.execute(str_sql.toString());
str_sql.setLength(0);
str_sql.append("CREATE TABLE IF NOT EXISTS "+ table_name +" (" +
"`_id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY," +
"`page_title` VARCHAR(255) BINARY NOT NULL," + // COMMENT 'headword'
"`meaning` VARCHAR(4095) BINARY NOT NULL," + // COMMENT 'Definition text.'
"`synonyms` VARCHAR(4095) BINARY NULL," +
"`antonyms` VARCHAR(4095) BINARY NULL," +
"`hypernyms` VARCHAR(4095) BINARY NULL," +
"`hyponyms` VARCHAR(4095) BINARY NULL," +
"`holonyms` VARCHAR(4095) BINARY NULL," +
"`meronyms` VARCHAR(4095) BINARY NULL," +
"`troponyms` VARCHAR(4095) BINARY NULL," +
"`coordinate_terms` VARCHAR(4095) BINARY NULL," +
"`n_sem_rel` TINYINT UNSIGNED NOT NULL," + // COMMENT 'Positive number of semantic relations (for this meaning)'
"`success` SMALLINT UNSIGNED NOT NULL," + // COMMENT 'Number of correct answers with these sem. relations'
"`failure` SMALLINT UNSIGNED NOT NULL," + // COMMENT 'Number of wrong answers with these sem. relations'
"INDEX `idx_page_title` (`page_title`(7) ASC)) " +
"ENGINE = InnoDB"
);
s.execute (str_sql.toString());
}
} finally {
s.close();
}
} catch(SQLException ex) {
System.err.println("SQLException (MSRMeanSemrelXX.generateTables()): sql='" + str_sql + "' " + ex.getMessage());
}
}
/** Inserts record into the table 'mean_semrel_XX'.<br><br>
* INSERT INTO mean_semrel_os (page_title,meaning,synonyms,n_sem_rel,success,failure) VALUES ("test_page_msr","meaning_test_msr","synonyms_test_msr",1,0,0);
*
* @param xx_lang defines XX language code in mean_semrel_XX table
* @param meaning corresponding meaning of the word
* @param connect
* @param page_title
* @param meaning_text
* @param m_relations
* @param n_sem_rel number of semantic relations for this meaning
*/
public static void insert ( LanguageType xx_lang,
Connect connect,
String page_title, String meaning_text,
Map<Relation, StringBuffer> m_relations,
int n_sem_rel)
{
if(0 == page_title.length() || 0 == meaning_text.length() || n_sem_rel < 1) {
System.err.println("Error (MSRMeanSemrelXX.insert()):: null arguments, page_title="+page_title+", meaning_text="+meaning_text+", n_sem_rel="+n_sem_rel);
return;
}
StringBuilder str_sql = new StringBuilder();
StringBuilder values_sql = new StringBuilder();
try
{
Statement s = connect.conn.createStatement ();
try {
String table_name = "mean_semrel_" + xx_lang.toTablePrefix();
// INSERT INTO mean_semrel_XX (page_title,meaning,n_sem_rel,success,failure,synonyms,etc.) VALUES ("test_page_msr","meaning_test_msr",1,0,0,"synonyms_test_msr",etc.);
str_sql.append("INSERT INTO ").append(table_name);
str_sql.append("(page_title,meaning,n_sem_rel,success,failure");
String safe_title = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, page_title);
String safe_meaning = PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, meaning_text);
values_sql.append('"').append(safe_title).append("\",\"");
values_sql.append(safe_meaning).append("\",");
values_sql.append(n_sem_rel).append(",0,0"); // 0 == success,failure
boolean b_sem_rel = false;
for(Relation r : m_relations.keySet()) {
String synset = m_relations.get(r).toString();
if (synset.length() > 0) {
b_sem_rel = true;
str_sql.append(",");
if(r != Relation.coordinate_term) {
str_sql.append(r.toString()); // INSERT INTO (...",synonyms"...)
} else {
str_sql.append("coordinate_terms"); // "coordinate terms" -> "coordinate_terms" (without space in the table field name)
}
values_sql.append(",\"");
values_sql.append( // VALUES (",word1|word2... |wordN" ...)
PageTableBase.convertToSafeStringEncodeToDBWunderscore(connect, synset));
values_sql.append("\"");
}
}
if(!b_sem_rel) {
System.err.println("Error (MSRMeanSemrelXX.insert()):: there are no semantic relations, page_title="+page_title+", meaning_text="+meaning_text+", n_sem_rel="+n_sem_rel);
return;
}
str_sql.append(") VALUES (");
str_sql.append(values_sql);
str_sql.append(")");
s.executeUpdate (str_sql.toString());
} finally {
s.close();
}
}catch(SQLException ex) {
System.err.println("SQLException (MSRMeanSemrelXX.insert()):: sql='" + str_sql.toString() + "' " + ex.getMessage());
}
}
/** Gets rows from the XX table by entry headword (page_title).<br><br>
*
* SELECT _id,meaning,synonyms,n_sem_rel,success,failure,synonyms FROM mean_semrel_os WHERE page_title="test_page_msr"
*
* @param xx_lang defines XX language code in mean_semrel_XX table
* @return
*/
public static MSRMeanSemrelXX[] getByPage (Connect connect,
LanguageType xx_lang,String page_title)
{
if(0 == page_title.length()) {
System.err.println("Error (MSRMeanSemrelXX.get()):: null argument: page_title.");
return NULL_MSRMEANSEMRELXX_ARRAY;
}
StringBuilder str_sql = new StringBuilder();
// SELECT meaning,synonyms,n_sem_rel,success,failure,synonyms FROM mean_semrel_os WHERE page_title="test_page_msr"
str_sql.append("SELECT _id,meaning,n_sem_rel,success,failure");
for(String r : table_fields_relations)
str_sql.append(",").append(r); // SELECT ...,synonyms...
String table_name = "mean_semrel_" + xx_lang.toTablePrefix();
str_sql.append(" FROM ").append(table_name);
str_sql.append(" WHERE page_title=\"");
str_sql.append(PageTableBase.
convertToSafeStringEncodeToDBWunderscore(connect, page_title));
str_sql.append("\"");
List<MSRMeanSemrelXX> list_rel = null;
try {
Statement s = connect.conn.createStatement ();
try {
ResultSet rs = s.executeQuery (str_sql.toString());
try {
while (rs.next ())
{
int _id = rs.getInt("_id");
String _meaning_text = Encodings.bytesToUTF8(rs.getBytes("meaning"));
int _n_sem_rel = rs.getInt("n_sem_rel");
int _success = rs.getInt("success");
int _failure = rs.getInt("failure");
Map<Relation, String> _m_relations = new HashMap<Relation, String>();
for(Relation r : ar_relations) {
String relation_field = r.toString(); // relation name as table field
if(r == Relation.coordinate_term)
relation_field = "coordinate_terms"; // "coordinate terms" -> "coordinate_terms" (without space in the table field name)
byte[] byte_synset = rs.getBytes(relation_field);
if(null != byte_synset) {
String synset = Encodings.bytesToUTF8(byte_synset);
_m_relations.put(r, synset);
}
}
if(null == list_rel)
list_rel = new ArrayList<MSRMeanSemrelXX>();
list_rel.add(new MSRMeanSemrelXX(_id, page_title, _meaning_text,
_n_sem_rel, _success, _failure, _m_relations));
}
} finally {
rs.close();
}
} finally {
s.close();
}
} catch(SQLException ex) {
System.err.println("SQLException (TQuote.get()):: sql='" + str_sql.toString() + "' " + ex.getMessage());
}
if(null == list_rel)
return NULL_MSRMEANSEMRELXX_ARRAY;
return (MSRMeanSemrelXX[])list_rel.toArray(NULL_MSRMEANSEMRELXX_ARRAY);
}
/** Deletes row from the table 'quote' by a value of ID.<br><br>
* DELETE FROM mean_semrel_os WHERE _id=4;
*
* @param xx_lang defines XX language code in mean_semrel_XX table
*/
public void delete (Connect connect, LanguageType xx_lang) {
String table_name = "mean_semrel_" + xx_lang.toTablePrefix();
StringBuilder str_sql = new StringBuilder();
str_sql.append("DELETE FROM ").append(table_name);
str_sql.append(" WHERE _id=");
str_sql.append( id );
try {
Statement s = connect.conn.createStatement ();
try {
s.execute (str_sql.toString());
} finally {
s.close();
}
} catch(SQLException ex) {
System.err.println("SQLException (TQuote.delete()):: sql='" + str_sql.toString() + "' " + ex.getMessage());
}
}
}