package org.dbpedia.mappings.missingbot; import com.sun.jersey.api.container.grizzly2.GrizzlyServerFactory; import com.sun.jersey.api.core.PackagesResourceConfig; import com.sun.jersey.api.core.ResourceConfig; import net.sourceforge.jwbf.core.contentRep.Article; import net.sourceforge.jwbf.mediawiki.bots.MediaWikiBot; import org.apache.commons.cli.*; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.dbpedia.mappings.missingbot.create.NewMappingArticle; import org.dbpedia.mappings.missingbot.create.Record; import org.dbpedia.mappings.missingbot.create.airpedia.AirpediaPropertyMapping; import org.dbpedia.mappings.missingbot.rest.filter.CharsetResponseFilter; import org.dbpedia.mappings.missingbot.rest.filter.CorsResponseFilter; import org.dbpedia.mappings.missingbot.storage.Store; import org.dbpedia.mappings.missingbot.label.AllMissingLabelTitles; import org.dbpedia.mappings.missingbot.label.TranslateLabelArticle; import org.dbpedia.mappings.missingbot.translate.Translator; import org.dbpedia.mappings.missingbot.translate.file.FileTranslator; import org.dbpedia.mappings.missingbot.translate.google.TranslateLabel; import org.dbpedia.mappings.missingbot.util.ParseCSV; import org.glassfish.grizzly.http.server.HttpServer; import org.h2.tools.Server; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.ws.rs.core.UriBuilder; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.net.InetAddress; import java.net.URI; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Hashtable; import java.util.List; public class Main { private static final Logger logger = LoggerFactory.getLogger(Main.class); public static Configuration config; private static MediaWikiBot bot; public static List<String> getArticles(String filename) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(filename)); String line; ArrayList<String> articles = new ArrayList<String>(); while((line = reader.readLine()) != null) { String values[] = line.split("\t"); articles.add(values[0]); } reader.close(); return articles; } private static URI getBaseURI(int port) { return UriBuilder.fromUri("http://0.0.0.0/").port(port).build(); } protected static HttpServer startServer(int port) throws IOException { System.out.println("Starting grizzly..."); ResourceConfig rc = new PackagesResourceConfig("org/dbpedia/mappings/missingbot/rest/resources"); rc.getProperties().put(ResourceConfig.PROPERTY_CONTAINER_RESPONSE_FILTERS, CorsResponseFilter.class.getName()); rc.getProperties().put(ResourceConfig.PROPERTY_CONTAINER_RESPONSE_FILTERS, CharsetResponseFilter.class.getName()); URI baseUri = getBaseURI(port); return GrizzlyServerFactory.createHttpServer(baseUri, rc); } public static void startH2Console() { try { Server.createWebServer().start(); } catch (SQLException e) { e.printStackTrace(); } } public static void run_rest(int port) throws IOException { // startH2Console(); HttpServer httpServer = startServer(port); String addr = InetAddress.getLocalHost().getHostAddress() + ":" + port; System.out.println(String.format("Jersey app started with WADL available at " + "%s/application.wadl\nTry out %s/missings\nPress Ctrl-C for exit.", addr, addr)); try { Thread.currentThread().join(); } catch(Exception ex) { System.out.println(ex.getMessage()); } finally { httpServer.stop(); } } public static void create_mappings(String template_path) { List<Record> records = null; try { records = ParseCSV.parseCreationCSV(template_path); } catch (IOException e) { e.printStackTrace(); System.exit(1); } String title_prefix = "Mapping_commons:"; for(Record record : records) { NewMappingArticle creator = new NewMappingArticle( bot, title_prefix + record.getName(), record.getCategory(), record.getUrl().split(" ") ); if(creator.exists()) { logger.info("Article with title: " + creator.getTitle() + " already exists."); logger.info("Nothing to do."); } else { creator.save(); logger.info("Created mapping under title: " + creator.getTitle()); } } } public static void import_airpedia_classes(String classes_file, String language) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(classes_file)); String line; String template = "{{TemplateMapping\n" + "| mapToClass = %s\n" + "| mappings = \n" + "}}"; while((line = reader.readLine()) != null) { String values[] = line.split("\t"); if(!values[0].equals(language)) { continue; } String name = values[1]; String cls = values[3]; String title = "Mapping_" + language + ":" + name; System.out.println(title); String new_mapping = String.format(template, cls); System.out.println(new_mapping); Article article = new Article(bot, title); String txt = article.getText(); if(txt.length() != 0) { logger.info("Article " + title + " already exists"); continue; } article.setText(new_mapping); article.setEditSummary("import class mappings from airpedia for language: " + language + " with precision 0.9."); article.save(); logger.info("Article " + title + " created."); } reader.close(); } public static void import_airpedia_properties(String classes_file, String language) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(classes_file)); Hashtable<String, AirpediaPropertyMapping> new_mappings = new Hashtable<String, AirpediaPropertyMapping>(); String line; while((line = reader.readLine()) != null) { String values[] = line.split("\t"); String lang = values[0]; String name = values[1]; String template_property = values[2]; String ontology_property = values[3]; String cap_name = Character.toUpperCase(name.charAt(0)) + name.substring(1); String title = "Mapping_" + lang + ":" + cap_name; if(new_mappings.get(title) == null) { AirpediaPropertyMapping map = new AirpediaPropertyMapping(bot,title); map.addProperty(template_property, ontology_property); new_mappings.put(title, map); } else { AirpediaPropertyMapping map = new_mappings.get(title); map.addProperty(template_property, ontology_property); } } reader.close(); for(String title : new_mappings.keySet()) { AirpediaPropertyMapping article = new_mappings.get(title); if(article.isEmpty()) { logger.info("Article " + title + " does not exists"); continue; } if(!article.hasMapping()) { logger.info("not found mappings in article " + title); continue; } String property_mapping = article.buildPropertyMapping(); if(property_mapping.isEmpty()) { logger.info("properties already exists for article: " + title + "\n" ); continue; } logger.info("Create properties for artcile: " + title + "\n" + property_mapping); article.setEditSummary("import property mappings from airpedia for language: " + language); article.save(); } } public static void list_missing(String language, String filter, boolean db) { AllMissingLabelTitles apt = new AllMissingLabelTitles(language, filter); for (String missing : apt) { TranslateLabelArticle article = new TranslateLabelArticle(bot, missing, language); if(!article.foundLabel()) { continue; } String translation = ""; try { TranslateLabel translate = new TranslateLabel( config.getString("google_api_key"), config.getString("app_name")); translation = translate.translate(article.en_label, language); } catch (IOException e) { e.printStackTrace(); } if(db) { Store.initStore(config.getString("jdbc_url")); Store store = new Store(); store.put(missing, article.en_label, translation, language); logger.info(missing + "\t" + article.en_label + "\t" + translation); } else { System.out.println(missing + "\t" + article.en_label + "\t" + translation); } } } public static void translate_labels(List<String> articles, String language, Translator trans) { int change_counter = 0; for (String missing : articles) { logger.info("Processing " + missing + " ..."); String pad = String.format("%11s", ""); TranslateLabelArticle article = new TranslateLabelArticle(bot, missing, language); String translated_label = trans.translate(article.en_label); if(!article.foundLabel()) { logger.info(pad + "No english label found in: " + missing); logger.info("abort!"); continue; } else if(translated_label == null) { logger.info(pad + "Found no Translation for: \"" + article.en_label + "\""); logger.info("abort!"); continue; } else if(article.translationAlreadyExists()) { logger.info(pad + "Translation already exists."); logger.info("abort!"); continue; } else { logger.info(pad + "Translate \"" + article.en_label + "\" to \"" + translated_label + "\""); } article.translated_label = translated_label; String old_revision = article.getRevisionId(); // make minor edit and add summary article.setMinorEdit(true); article.setEditSummary("label@" + language + " = " + translated_label); article.save(); change_counter++; logger.info(pad + "Revision from changed \"" + old_revision + "\" to \"" + article.getRevisionId() + "\""); logger.info("done!"); } logger.info("Translated " + change_counter + " Labels."); } public static Options constructOptions() { // create Options object Options options = new Options(); options.addOption("h", "help", false, "print this message"); options.addOption("ls", "list_missing", false, "list missing labels for given language."); options.addOption("l", "lang", true, "2-letter language code for missing mappings in wiki."); options.addOption("c", "config", true, "config file for dbpedia mappings wiki (default: bot.properties)"); options.addOption("t", "translation_file", true, "Tab seperated file with one translation per line e.g. <article>\\t<english label>\\t<translation>\\n"); options.addOption("f", "filter", true, "filter for missing labels. Options: OntologyClass, OntologyProperty and Datatype. Default: All"); options.addOption( "db", false, "store listing results in database."); options.addOption( "create_mappings", true, "create missing template mappings from file."); options.addOption( "start_rest", false, "start rest service for to request articles with missing labels."); options.addOption( "import_template", true, "import template mappings from airpedia files. \n" + "format: the first column contains the template/infobox, " + "while the second one contains the guessed class in the DBpedia Ontology.\n" + "(required: lang parameter)"); options.addOption( "import_property", true, "import template properties from airpedia files."); return options; } public static void main(String[] args) { Options options = constructOptions(); HelpFormatter formatter = new HelpFormatter(); // create the parser CommandLineParser parser = new GnuParser(); CommandLine line = null; try { // parse the command line arguments line = parser.parse( options, args ); } catch( ParseException exp ) { // oops, something went wrong formatter.printHelp( "missingBot", options ); System.err.println("Parsing failed. Reason: " + exp.getMessage()); System.exit(1); } if(line.hasOption("help")) { formatter.printHelp( "missingBot", options ); System.exit(0); } String configFile; if(line.hasOption("config")) { configFile = line.getOptionValue("config"); } else { configFile = "bot.properties"; } try { config = new PropertiesConfiguration(configFile); } catch (ConfigurationException e) { e.printStackTrace(); System.exit(1); } if(line.hasOption("start_rest")) { try { Store.initStore(config.getString("jdbc_url")); run_rest(config.getInt("rest_port")); } catch (IOException e) { e.printStackTrace(); System.exit(1); } System.exit(0); } bot = new MediaWikiBot(config.getString("wikihosturl")); bot.login(config.getString("wikiuser"), config.getString("password")); if(line.hasOption("create_mappings")) { String template_path = line.getOptionValue("create_mappings"); create_mappings(template_path); System.exit(0); } if(line.hasOption("import_property")) { String property_file = line.getOptionValue("import_property"); try { import_airpedia_properties(property_file, "lang"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } System.exit(0); } String filter = ""; if(line.hasOption("filter")) { List<String> filters = Arrays.asList("OntologyClass", "OntologyProperty", "Datatype"); for(String f : filters) { String filter_option = line.getOptionValue("filter"); if(filter_option.equals(f)) { filter = filter_option + ":"; } } } if(!line.hasOption("lang")) { System.err.println("language parameter is required."); System.exit(1); } String language = line.getOptionValue("lang"); if(line.hasOption("import_template")) { String class_file = line.getOptionValue("import_template"); try { import_airpedia_classes(class_file, language); } catch (IOException e) { e.printStackTrace(); System.exit(1); } System.exit(0); } if(line.hasOption("list_missing")) { list_missing(language, filter, line.hasOption("db")); System.exit(0); } if(!line.hasOption("translation_file")) { System.err.println("Missing required option: translation_file"); formatter.printHelp( "missingBot", options ); System.exit(1); } Translator trans; List<String> articles; try { trans = new FileTranslator(line.getOptionValue("translation_file")); articles = getArticles(line.getOptionValue("translation_file")); } catch (IOException e) { e.printStackTrace(); System.exit(1); return; } translate_labels(articles, language, trans); } }