package me.osm.gazetter;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import me.osm.gazetter.addresses.AddrLevelsSorting;
import me.osm.gazetter.diff.Diff;
import me.osm.gazetter.join.JoinExecutor;
import me.osm.gazetter.sortupdate.SortUpdate;
import me.osm.gazetter.split.Split;
import me.osm.gazetter.striper.Slicer;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.impl.Arguments;
import net.sourceforge.argparse4j.impl.action.StoreTrueArgumentAction;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.Namespace;
import net.sourceforge.argparse4j.inf.Subparser;
import net.sourceforge.argparse4j.inf.Subparsers;
/**
* Entry point for executable jar.
*/
public class Gazetteer {
private static final String BOUNDARIES_FALLBACK_TYPES_PARAM = "--boundaries-fallback-types";
private static final String BOUNDARIES_FALLBACK_TYPES_VAL = "boundaries_fallback_types";
private static final String BOUNDARIES_FALLBACK_PARAM = "--boundaries-fallback-file";
private static final String BOUNDARIES_FALLBACK_VAL = "boundaries_fallback_file";
private static final String NAMED_POI_BRANCH_OPT = "--named-poi-branch";
private static final String NAMED_POI_BRANCH_VAL = "named_poi_branch";
private static final String EXCCLUDE_POI_BRANCH_OPT = "--excclude-poi-branch";
private static final String EXCCLUDE_POI_BRANCH_VAL = "excclude_poi_branch";
private static final String ADDR_FORMATTER_OPT = "--addr-parser";
private static final String ADDR_FORMATTER_VAL = "addr_parser";
private static final String ADDR_ORDER_OPT = "--addr-order";
private static final String ADDR_ORDER_VAL = "addr_order";
private static final String JOIN_COMMON_VAL = "common";
private static final String JOIN_COMMON_OPT = "--common";
private static final String COMPRESS_VAL = "no_compress";
private static final String NO_COMPRESS_OPT = "--no-compress";
private static final String DATA_DIR_VAL = "data_dir";
private static final String DATA_DIR_OPT = "--data-dir";
private static final String LOG_OPT = "--log-level";
private static final String LOG_FILE_OPT = "--log-file";
private static final String LOG_PREFIX_OPT = "--log-prefix";
private static final String LOG_FILE_ONLY = "--log-console-mute";
private static final String POI_CATALOG_VAL = "poi_catalog";
private static final String POI_CATALOG_OPT = "--poi-catalog";
private static final String FEATURE_TYPES_VAL = "feature_types";
private static final String COMMAND = "command";
private static Logger log;
private static Subparser split;
private static Subparser slice;
private static Subparser join;
private static Subparser update;
private static Subparser man;
private static Subparser diff;
/**
* Command line command description
* */
public static interface CommandDescription {
/**
* Name of command, will be used as executable.jar long-coomand-name
*
* @return long name
* */
public String longName();
/**
* Command description
*
* @return help string
* */
public String help();
}
/**
* Supported commands
* */
private enum Command implements CommandDescription {
MAN {
@Override
public String longName() {return name().toLowerCase();}
@Override
public String help() {return "Prints extended usage";}
},
SPLIT {
@Override
public String longName() {return name().toLowerCase();}
@Override
public String help() {return "Prepare osm data. Split nodes, ways and relations.";}
},
SLICE {
@Override
public String longName() {return name().toLowerCase();}
@Override
public String help() {return "Parse features from osm data and write it into stripes 0.1 degree wide.";}
},
JOIN {
@Override
public String longName() {return name().toLowerCase();}
@Override
public String help() {return "Join features. Made spatial joins for address points inside polygons and so on.";}
},
SYNCHRONIZE {
@Override
public String longName() {return name().toLowerCase();}
@Override
public String help() {return "Sort and update features. Remove outdated dublicates.";}
},
OUT_CSV {
@Override
public String longName() {return name().toLowerCase().replace('_', '-');}
@Override
public String help() {return "Write data out in csv format.";}
},
DIFF {
@Override
public String longName() {return name().toLowerCase().replace('_', '-');}
@Override
public String help() {return "Write difference between two gazetteer json files";}
},
MATCH_FLAP {
@Override
public String longName() {return name().toLowerCase().replace('_', '-');}
@Override
public String help() {return "Match features with flap objects.";}
};
};
/**
* Parse arguments and run tasks accordingly.
*
* @param args
* Command line arguments
* */
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void main(String[] args) {
initLog(args);
log = LoggerFactory.getLogger(Gazetteer.class);
ArgumentParser parser = getArgumentsParser();
if(args.length > 0 && ("-v".equals(args[0]) || "--version".equals(args[0]))) {
printVersion("--version".equals(args[0]));
return;
}
try {
Namespace namespace = parser.parseArgs(args);
if(namespace.getBoolean("version")) {
printVersion(true);
return;
}
String thrds = namespace.get("threads");
Integer threads = thrds == null ? null : Integer.valueOf(thrds);
if(namespace.get(COMMAND).equals(Command.JOIN)) {
Options.initialize(
AddrLevelsSorting.valueOf(namespace.getString(ADDR_ORDER_VAL)),
namespace.getString(ADDR_FORMATTER_VAL),
new HashSet(list(namespace.getList("skip_in_text"))),
namespace.getBoolean("find_langs")
);
}
if(threads != null) {
Options.get().setNThreads(threads);
}
Options.get().setCompress(namespace.getBoolean(COMPRESS_VAL));
if(namespace.get(COMMAND).equals(Command.MAN)) {
printFullHelp(parser);
System.exit(0);
}
if(namespace.get(COMMAND).equals(Command.SPLIT)) {
File destFolder = new File(namespace.getString(DATA_DIR_VAL));
String in = namespace.getString("osm_file");
String compression = namespace.getString("compression");
boolean append = namespace.getBoolean("append");
Split splitter = new Split(destFolder, in, compression, append);
splitter.run();
}
if(namespace.get(COMMAND).equals(Command.SLICE)) {
List<String> types = new ArrayList<String>();
if(namespace.get(FEATURE_TYPES_VAL) instanceof String) {
types.add((String)namespace.get(FEATURE_TYPES_VAL));
}
else if (namespace.get(FEATURE_TYPES_VAL) instanceof Collection) {
types.addAll((Collection<String>)namespace.get(FEATURE_TYPES_VAL));
}
new Slicer(namespace.getString(DATA_DIR_VAL)).run(
namespace.getString(POI_CATALOG_VAL),
types,
list(namespace.getList(EXCCLUDE_POI_BRANCH_VAL)),
list(namespace.getList(NAMED_POI_BRANCH_VAL)),
list(namespace.getList("drop")),
namespace.getString(BOUNDARIES_FALLBACK_VAL),
list(namespace.getList(BOUNDARIES_FALLBACK_TYPES_VAL)),
namespace.getBoolean("x10"),
namespace.getBoolean("skip_interpolation")
);
}
if(namespace.get(COMMAND).equals(Command.JOIN)) {
List<String> handlers = list(namespace.getList("handlers"));
Options.get().setJoinHandlers(handlers);
if(Options.get().getJoinOutHandlers().isEmpty()) {
System.out.println("No join handlers was initialized.");
System.out.println("Predefined handlers are: " + StringUtils.join(Options.getPredefinedOutHandlers(), ", "));
System.exit(1);
}
new JoinExecutor(namespace.getBoolean("skip_hghnets"),
namespace.getBoolean("keep_hghnets_geometry"),
namespace.getBoolean("clean_stripes"),
new HashSet(list(namespace.getList("check_boundaries")))).run(
namespace.getString(DATA_DIR_VAL),
namespace.getString(JOIN_COMMON_VAL));
}
if(namespace.get(COMMAND).equals(Command.SYNCHRONIZE)) {
new SortUpdate(namespace.getString(DATA_DIR_VAL)).run();
}
if(namespace.get(COMMAND).equals(Command.DIFF)) {
Boolean full = namespace.getBoolean("--full");
full = full == null ? false : full;
String oldHeader = namespace.getString("old_header");
String newHeader = namespace.getString("new_header");
Diff diffExecutor = new Diff(namespace.getString("old"),
namespace.getString("new"),
namespace.getString("out_file"), full);
diffExecutor.setOldHeader(oldHeader);
diffExecutor.setNewHeader(newHeader);
diffExecutor.run();
}
if(namespace.get(COMMAND).equals(Command.MATCH_FLAP)) {
}
}
catch (ArgumentParserException e) {
parser.handleError(e);
}
catch (Exception e) {
Throwable rootCause = ExceptionUtils.getRootCause(e);
log.error("Fatal error: " + (rootCause == null ? "" : rootCause.getMessage()), e);
System.exit(1);
}
}
/**
* Print version of gazetteer to stdout
*
* @param full print versions of major dependencies
* */
private static void printVersion(boolean full) {
if(full) {
System.out.println("Gazetteer: " + Versions.gazetteer);
System.out.println("Build timestamp: " + Versions.buildTs);
System.out.println("Java Topology Syte: " + Versions.jts);
System.out.println("Osm Doc Java: " + Versions.osmdoc);
System.out.println("Groovy runtime: " + Versions.groovy);
}
else {
System.out.println(Versions.gazetteer);
}
}
private static void printFullHelp(ArgumentParser parser) {
parser.printHelp();
System.out.print("\nGazetteer version: ");
System.out.print(Versions.gazetteer);
System.out.print("\n\n");
System.out.print("\nCommands:\n\n");
System.out.print("MAN\n\n");
man.printHelp();
System.out.print("\n\n\nSPLIT\n\n");
split.printHelp();
System.out.print("\n\n\nSLICE\n\n");
slice.printHelp();
System.out.print("\n\n\nJOIN\n\n");
join.printHelp();
System.out.print("\n\n\nUPDATE\n\n");
update.printHelp();
System.out.print("\n\n\nDIFF\n\n");
diff.printHelp();
}
/**
* Returns string list or empty list for null
*
* @param list
* unsafe list
* @return List of strings
* */
@SuppressWarnings({ "rawtypes", "unchecked" })
public static List<String> list( List list) {
if(list == null) {
return Collections.emptyList();
}
return list;
}
/**
* Initialize logging system.
* <p>
* Logging options should be set before any logger will be instantiated.
* */
private static void initLog(String[] args) {
/**
* XXX: Static not final access to LogbackConfigurator is a crap
*
* Done on purpose, logback use LogbackConfigurator
* binded via META-INF/services. And I don't want to move it inside
* Gazetteer class, because I don't know when it will be created and
* accessed. So I need a way to set that fields.
*
* From the other hand it just wanted to work the right way, thats
* why I've made ugly configureStatic which completly unnecessary
* if services providesrs works
* */
Iterator<String> iterator = Arrays.asList(args).iterator();
while(iterator.hasNext()) {
String k = iterator.next();
if(k.equals(LOG_OPT) && iterator.hasNext()) {
LogbackConfigurator.level = iterator.next();
}
else if(k.equals(LOG_FILE_OPT) && iterator.hasNext()) {
LogbackConfigurator.outFile = iterator.next();
}
else if(k.equals(LOG_PREFIX_OPT) && iterator.hasNext()) {
LogbackConfigurator.logPrefix = iterator.next();
}
else if(k.equals(LOG_FILE_ONLY)) {
LogbackConfigurator.muteConsole = true;
}
}
LogbackConfigurator.configureStatic();
}
/**
* Generate arguments parser.
* */
private static ArgumentParser getArgumentsParser() {
ArgumentParser parser = ArgumentParsers.newArgumentParser("gazetter")
.defaultHelp(true)
.description("Create alphabetical index of osm file features.");
parser.version(Versions.gazetteer);
parser.addArgument("--threads").required(false)
.help("set number of threads avaible. By default will be used runtime.availableProcessors value.");
parser.addArgument(NO_COMPRESS_OPT).required(false).action(Arguments.storeFalse())
.help("Do not cmpress tepmlorary stored data")
.setDefault(Boolean.TRUE);
parser.addArgument(DATA_DIR_OPT).required(false)
.help("Use this folder as data storage.")
.setDefault("data");
parser.addArgument(LOG_OPT).required(false).setDefault("WARN");
parser.addArgument(LOG_FILE_OPT).required(false).help("Path to log file");
parser.addArgument(LOG_PREFIX_OPT).required(false).help("Add that prefix to all log messages");
parser.addArgument(LOG_FILE_ONLY).required(false)
.action(Arguments.storeTrue())
.setDefault(Boolean.TRUE)
.help("Mute console output");
parser.addArgument("--version", "-v").required(false)
.help("Print version and exit.")
.action(Arguments.storeTrue())
.setDefault(Boolean.FALSE);
Subparsers subparsers = parser.addSubparsers();
//man
{
Command command = Command.MAN;
man = subparsers.addParser(command.longName())
.setDefault(COMMAND, command)
.help(command.help());
}
//split
{
Command command = Command.SPLIT;
split = subparsers.addParser(command.longName())
.setDefault(COMMAND, command)
.help(command.help());
split.addArgument("osm_file").required(true)
.help("Path to osm file. *.osm *.osm.bz2 *.osm.gz supported. Use - to read from STDIN");
split.addArgument("compression").required(false).nargs("?").choices("none", "gzip", "bz2")
.setConst("none").setDefault("bz2")
.help("Use with \"osm_file -\" allow to read compressed stream from STDIN.");
split.addArgument("--append").required(false).setDefault(Boolean.FALSE)
.nargs("?").setConst(Boolean.TRUE);
}
//slice
{
Command command = Command.SLICE;
slice = subparsers.addParser(command.longName())
.setDefault(COMMAND, command)
.help(command.help());
slice.addArgument(POI_CATALOG_OPT).setDefault("jar")
.help("Path to osm-doc catalog xml file. By default internal osm-doc.xml will be used.");
slice.addArgument(EXCCLUDE_POI_BRANCH_OPT).nargs("*")
.help("Exclude branch of osm-doc features hierarchy. "
+ "Eg: osm-ru:transport where osm-ru is a name of the hierarchy, "
+ "and transport is a name of the branch");
slice.addArgument(NAMED_POI_BRANCH_OPT).nargs("*")
.help("Kepp POIS from this banch only if they have name tag");
slice.addArgument(FEATURE_TYPES_VAL).help("Parse and slice axact feature(s) type.")
.choices(Slicer.sliceTypes).nargs("*").setDefault("all").setConst("all");
slice.addArgument("--drop").nargs("*")
.help("List of objects osm ids which will be dropped ex r60189.");
slice.addArgument(BOUNDARIES_FALLBACK_PARAM).nargs("?")
.help("Path to boundaries fallback file.");
slice.addArgument(BOUNDARIES_FALLBACK_TYPES_PARAM).nargs("*")
.help("List of boundaries to keep in boundaries fallback file. Eg. boundary:2");
slice.addArgument("--x10").setConst(Boolean.TRUE)
.setDefault(Boolean.FALSE).action(new StoreTrueArgumentAction())
.help("Slice ten times thinner stripes");
slice.addArgument("--skip-interpolation").setConst(Boolean.TRUE)
.setDefault(Boolean.FALSE).action(new StoreTrueArgumentAction())
.help("Do not parse addr:interpolation lines");
}
//join
{
Command command = Command.JOIN;
join = subparsers.addParser(command.longName())
.setDefault(COMMAND, command)
.help(command.help());
join.addArgument(JOIN_COMMON_OPT)
.help("Path for *.json with array of features which will be added to boundaries "
+ "list for every feature.");
join.addArgument(ADDR_ORDER_OPT).choices("HN_STREET_CITY", "STREET_HN_CITY", "CITY_STREET_HN").setDefault("HN_STREET_CITY")
.help("How to sort addr levels in full addr text");
join.addArgument(ADDR_FORMATTER_OPT)
.help("Path to *.groovy file with full addresses texts formatter.");
join.addArgument("--check-boundaries").nargs("*")
.help("Filter only addresses inside any of boundary given as osm id. eg. r12345 w123456 ");
join.addArgument("--skip-in-text").nargs("*")
.help("Skip in addr full text.");
join.addArgument("--find-langs").setDefault(Boolean.FALSE)
.nargs("?").setConst(Boolean.TRUE)
.help("Search for translated address rows. \n"
+ "Eg. if street and all upper addr levels \n"
+ "have name name:uk name:ru name:en \n"
+ "generate 4 address rows.\n"
+ "If one of [name:uk name:ru name:en] is equals \n"
+ "to name still generate additional row. \n"
+ "(You can filter it later with simple distinct check).");
join.addArgument("--skip-hghnets").setConst(Boolean.TRUE)
.setDefault(Boolean.FALSE).action(new StoreTrueArgumentAction())
.help("Do not build highway networks.");
join.addArgument("--clean-stripes").setConst(Boolean.TRUE)
.setDefault(Boolean.FALSE).action(new StoreTrueArgumentAction())
.help("Remove stripes intermediate files, right after usage");
join.addArgument("--keep-hghnets-geometry").setConst(Boolean.TRUE)
.setDefault(Boolean.FALSE).action(new StoreTrueArgumentAction())
.help("Do not drop highway networks geometries.");
join.addArgument("--handlers").nargs("*");
}
//update
{
Command command = Command.SYNCHRONIZE;
update = subparsers.addParser(command.longName())
.setDefault(COMMAND, command)
.help(command.help());
}
//diff
{
Command command = Command.DIFF;
diff = subparsers.addParser(command.longName())
.setDefault(COMMAND, command)
.help(command.help());
diff.addArgument("--out-file").setDefault("-")
.help("Where to print results.");
diff.addArgument("--old").required(true)
.help("Path to old file.");
diff.addArgument("--new").required(true)
.help("Path to new file.");
diff.addArgument("--old-header").required(false)
.help("Add meta for old file.");
diff.addArgument("--new-header").required(false)
.help("Add meta for new file.");
diff.addArgument("--full").setConst(Boolean.TRUE)
.setDefault(Boolean.FALSE).action(new StoreTrueArgumentAction())
.help("Print full object data for deleted and old rows.");
}
return parser;
}
}