/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler; import java.io.BufferedReader; import java.util.Hashtable; import org.apache.commons.lang.StringEscapeUtils; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.util.ContentStream; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.update.processor.UpdateRequestProcessor; /** * This class parses geo data from geonames.org's data dump and adds * it to the Solr index */ public class GeonamesLoader extends ContentStreamLoader { /* * Field names corresponding to geonames.org's data. These fields must be added to * Solr's schema.xml before indexing will work properly. */ private static final String[] FIELD_NAMES = { "id", "geoname", "asciiname", "alternatenames", "latitude", "longitude", "featureclass", "featurecode", "countrycode", "cc2", "admin1code", "admin2code", "admin3code", "admin4code", "population", "elevation", "gtopo30", "timezone", "modificationdate" }; /* * Default values for geonames data. These values are used if an invalid or * non-existent value is provided during indexing. */ private static final Hashtable<String, String> DEFAULT_VALUES = new Hashtable<String, String>(); static { DEFAULT_VALUES.put(FIELD_NAMES[0], "-1"); DEFAULT_VALUES.put(FIELD_NAMES[1], "NULL"); DEFAULT_VALUES.put(FIELD_NAMES[2], "NULL"); DEFAULT_VALUES.put(FIELD_NAMES[3], "NULL"); DEFAULT_VALUES.put(FIELD_NAMES[4], "0"); DEFAULT_VALUES.put(FIELD_NAMES[5], "0"); DEFAULT_VALUES.put(FIELD_NAMES[6], "S"); DEFAULT_VALUES.put(FIELD_NAMES[7], "ll"); DEFAULT_VALUES.put(FIELD_NAMES[8], "NULL"); DEFAULT_VALUES.put(FIELD_NAMES[9], "NULL"); DEFAULT_VALUES.put(FIELD_NAMES[10], "00"); DEFAULT_VALUES.put(FIELD_NAMES[11], "00"); DEFAULT_VALUES.put(FIELD_NAMES[12], "00"); DEFAULT_VALUES.put(FIELD_NAMES[13], "00"); DEFAULT_VALUES.put(FIELD_NAMES[14], "0"); DEFAULT_VALUES.put(FIELD_NAMES[15], "0"); DEFAULT_VALUES.put(FIELD_NAMES[16], "0"); DEFAULT_VALUES.put(FIELD_NAMES[17], "GMT"); DEFAULT_VALUES.put(FIELD_NAMES[18], "1970-01-01"); } protected UpdateRequestProcessor processor; public static final String COMMIT_COMMAND = "commit"; /** * Constructor * @param processor the UpdateRequestProcessor to send the update command to */ public GeonamesLoader(UpdateRequestProcessor processor) { this.processor = processor; } @Override public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws Exception { SolrInputDocument doc = new SolrInputDocument(); BufferedReader br = new BufferedReader(stream.getReader()); String line = br.readLine(); //geonames data dump is tab-delimited and each line represents a record //iterate through the lines and split columns by tab (\t) while(line != null) { //if the request is to commit, issue the commit command and return if(line.equalsIgnoreCase(COMMIT_COMMAND)) { CommitUpdateCommand cmd = new CommitUpdateCommand( true ); processor.processCommit( cmd ); return; } //otherwise, parse the line and create a SolrInputDocument from the //data String[] tokens = line.split("\t"); for(int i = 0; i < tokens.length; i++) { String value = ""; tokens[i] = tokens[i].trim(); if (tokens[i] == null || tokens[i].equalsIgnoreCase("")) value = DEFAULT_VALUES.get(FIELD_NAMES[i]); else value = StringEscapeUtils.escapeXml(tokens[i]); doc.setField(FIELD_NAMES[i], value.toLowerCase()); } //issue add command to processor AddUpdateCommand addCmd = new AddUpdateCommand(); addCmd.solrDoc = doc; processor.processAdd(addCmd); //read next line line = br.readLine(); } } }