package me.osm.gazetteer.web.csvgeocode; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.nio.charset.Charset; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import me.osm.gazetteer.web.GazetteerWeb; import me.osm.gazetteer.web.api.AnswerDetalization; import me.osm.gazetteer.web.api.SearchAPI; import me.osm.gazetteer.web.executions.AbortedException; import me.osm.gazetteer.web.executions.BackgroudTaskDescription; import me.osm.gazetteer.web.executions.BackgroundExecutorFacade.BackgroundExecutableTask; import me.osm.gazetteer.web.imp.LocationsDumpImporter; import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; import org.apache.commons.lang3.StringUtils; import org.json.JSONArray; import org.json.JSONObject; import org.slf4j.LoggerFactory; import org.supercsv.io.CsvMapReader; import org.supercsv.io.CsvMapWriter; import org.supercsv.prefs.CsvPreference; public class CSVGeocode extends BackgroundExecutableTask { private String filePath; private String callback; private String searchField = "search_text"; private SearchAPI searchAPI; private Set<String> refs; private File outFile = null; private int counter; public CSVGeocode(){}; public CSVGeocode(String filePath, String callback, SearchAPI searchAPI, String searchField) { super(); this.filePath = filePath; this.callback = callback; this.searchAPI = searchAPI; this.counter = 0; if(StringUtils.isNotEmpty(searchField)) { this.searchField = searchField; } File geocodeFolder = new File(GazetteerWeb.config().getMassGeocodeFolder()); geocodeFolder.mkdirs(); this.outFile = new File(geocodeFolder, getUUID() + ".csv.gz"); } @Override public void executeTask() throws AbortedException { try { CsvPreference csvPreferences = CsvPreference.STANDARD_PREFERENCE; if(StringUtils.endsWith(filePath, ".tsv")) { csvPreferences = CsvPreference.TAB_PREFERENCE; } CsvMapReader csvMapReader = new CsvMapReader(new InputStreamReader(LocationsDumpImporter.getFileIS(filePath), Charset.forName("UTF-8")), csvPreferences); String[] header = csvMapReader.getHeader(true); CsvMapWriter csvMapWriter = new CsvMapWriter(new OutputStreamWriter(new GzipCompressorOutputStream( new FileOutputStream(outFile)), Charset.forName("UTF-8")), csvPreferences); String[] writeHeader = writeHeader(header, csvMapWriter); Map<String, String> row = null; while( (row = csvMapReader.read(header)) != null ) { String string = row.get(searchField); AnswerDetalization detalization = AnswerDetalization.FULL; try { JSONObject answer = searchAPI.internalSearch( false, string, null, null, null, null, this.refs, true, false, true, detalization, null, null); counter++; if(!gotResult(answer)) { Set<String> types = new HashSet<>( Arrays.asList("hghnet", "hghway", "admbnd", "plcpnt")); answer = searchAPI.internalSearch( false, string, types, null, null, null, this.refs, false, false, true, detalization, null, null); } fillTheRow(row, answer); csvMapWriter.write(row, writeHeader); } catch (Exception e) { LoggerFactory.getLogger(getClass()).error("Failed to geocode {}", string, e); } } csvMapWriter.flush(); csvMapWriter.close(); } catch (Exception e) { throw new AbortedException(e.getMessage(), e, false); } } private boolean gotResult(JSONObject answer) { JSONArray optJSONArray = answer.optJSONArray("features"); if(optJSONArray == null) { return false; } if(optJSONArray.length() == 0) { return false; } return true; } private void fillTheRow(Map<String, String> row, JSONObject answer) { String lat = null; String lon = null; String score = null; String lvl = null; String id = null; JSONArray features = answer.optJSONArray("features"); if(features != null) { JSONObject firstAnswer = features.optJSONObject(0); if(firstAnswer != null) { id = firstAnswer.optString("id"); JSONObject cp = firstAnswer.optJSONObject("center_point"); if(cp != null) { lat = String.valueOf(cp.optDouble("lat", Double.NaN)); lon = String.valueOf(cp.optDouble("lon", Double.NaN)); } score = String.valueOf(firstAnswer.opt("_hit_score")); lvl = String.valueOf(firstAnswer.optString("weight_base_type")); } } row.put("result_lat", lat); row.put("result_lon", lon); row.put("result_score", score); row.put("result_lvl", lvl); row.put("result_id", id); } private String[] writeHeader(String[] header, CsvMapWriter csvMapWriter) throws IOException { List<String> outHeader = new ArrayList<>(Arrays.asList(header)); if(!outHeader.contains("result_lat")) { outHeader.add("result_lat"); } if(!outHeader.contains("result_lon")) { outHeader.add("result_lon"); } if(!outHeader.contains("result_score")) { outHeader.add("result_score"); } if(!outHeader.contains("result_lvl")) { outHeader.add("result_lvl"); } if(!outHeader.contains("result_id")) { outHeader.add("result_id"); } String[] array = outHeader.toArray(new String[outHeader.size()]); csvMapWriter.writeHeader(array); return array; } @Override public BackgroudTaskDescription description() { BackgroudTaskDescription description = new BackgroudTaskDescription(); description.setId(this.getId()); description.setUuid(this.getUUID()); description.setClassName(getClass().getName()); Map<String, Object> parameters = new HashMap<String, Object>(); description.setParameters(parameters); parameters.put("source", filePath); parameters.put("callback", callback); parameters.put("searchField", searchField); parameters.put("geocoded", counter); parameters.put("outfile", this.outFile.getAbsolutePath()); return description; } public String getFilePath() { return filePath; } public void setFilePath(String filePath) { this.filePath = filePath; } public String getCallback() { return callback; } public void setCallback(String callback) { this.callback = callback; } public void setRefs(Set<String> refs) { this.refs = refs; } }