/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.dataimport; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.common.util.ContentStream; import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow; import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.ConcurrentHashMap; /** * <p> Stores all configuration information for pulling and indexing data. </p> * <p/> * <b>This API is experimental and subject to change</b> * * @version $Id: DataImporter.java 945245 2010-05-17 17:18:10Z rmuir $ * @since solr 1.3 */ public class DataImporter { public enum Status { IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED } private static final Logger LOG = LoggerFactory.getLogger(DataImporter.class); private Status status = Status.IDLE; private DataConfig config; private Date indexStartTime; private Properties store = new Properties(); private Map<String, Properties> dataSourceProps = new HashMap<String, Properties>(); private IndexSchema schema; public DocBuilder docBuilder; public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics(); private SolrCore core; private ReentrantLock importLock = new ReentrantLock(); private final Map<String , Object> coreScopeSession; /** * Only for testing purposes */ DataImporter() { coreScopeSession = new ConcurrentHashMap<String, Object>(); } DataImporter(String dataConfig, SolrCore core, Map<String, Properties> ds, Map<String, Object> session) { if (dataConfig == null) throw new DataImportHandlerException(SEVERE, "Configuration not found"); this.core = core; this.schema = core.getSchema(); dataSourceProps = ds; if (session == null) session = new HashMap<String, Object>(); coreScopeSession = session; loadDataConfig(dataConfig); for (Map.Entry<String, SchemaField> entry : schema.getFields().entrySet()) { config.lowerNameVsSchemaField.put(entry.getKey().toLowerCase(Locale.ENGLISH), entry.getValue()); } for (DataConfig.Entity e : config.document.entities) { Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>(); initEntity(e, fields, false); verifyWithSchema(fields); identifyPk(e); } } private void verifyWithSchema(Map<String, DataConfig.Field> fields) { Map<String, SchemaField> schemaFields = schema.getFields(); for (Map.Entry<String, SchemaField> entry : schemaFields.entrySet()) { SchemaField sf = entry.getValue(); if (!fields.containsKey(sf.getName())) { if (sf.isRequired()) { LOG .info(sf.getName() + " is a required field in SolrSchema . But not found in DataConfig"); } } } for (Map.Entry<String, DataConfig.Field> entry : fields.entrySet()) { DataConfig.Field fld = entry.getValue(); SchemaField field = schema.getFieldOrNull(fld.getName()); if (field == null) { field = config.lowerNameVsSchemaField.get(fld.getName().toLowerCase(Locale.ENGLISH)); if (field == null) { LOG.info("The field :" + fld.getName() + " present in DataConfig does not have a counterpart in Solr Schema"); } } } } /** * Used by tests */ void loadAndInit(String configStr) { loadDataConfig(configStr); Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>(); for (DataConfig.Entity entity : config.document.entities) { initEntity(entity, fields, false); } } private void identifyPk(DataConfig.Entity entity) { SchemaField uniqueKey = schema.getUniqueKeyField(); String schemaPk = ""; if (uniqueKey != null) schemaPk = uniqueKey.getName(); else return; //if no fields are mentioned . solr uniqueKey is same as dih 'pk' entity.pkMappingFromSchema = schemaPk; for (DataConfig.Field field : entity.fields) { if(field.getName().equals(schemaPk)) { entity.pkMappingFromSchema = field.column; //get the corresponding column mapping for the solr uniqueKey // But if there are multiple columns mapping to the solr uniqueKey, it will fail // so , in one off cases we may need pk break; } } } private void loadDataConfig(String configFile) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { dbf.setXIncludeAware(true); dbf.setNamespaceAware(true); } catch( UnsupportedOperationException e ) { LOG.warn( "XML parser doesn't support XInclude option" ); } DocumentBuilder builder = dbf.newDocumentBuilder(); Document document = builder.parse(new InputSource(new StringReader( configFile))); config = new DataConfig(); NodeList elems = document.getElementsByTagName("dataConfig"); if(elems == null || elems.getLength() == 0) { throw new DataImportHandlerException(SEVERE, "the root node '<dataConfig>' is missing"); } config.readFromXml((Element) elems.item(0)); LOG.info("Data Configuration loaded successfully"); } catch (Exception e) { SolrConfig.severeErrors.add(e); throw new DataImportHandlerException(SEVERE, "Exception occurred while initializing context", e); } } private void initEntity(DataConfig.Entity e, Map<String, DataConfig.Field> fields, boolean docRootFound) { e.allAttributes.put(DATA_SRC, e.dataSource); if (!docRootFound && !"false".equals(e.docRoot)) { // if in this chain no document root is found() e.isDocRoot = true; } if (e.allAttributes.get("threads") != null) { if(docRootFound) throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "'threads' not allowed below rootEntity "); config.isMultiThreaded = true; } if (e.fields != null) { for (DataConfig.Field f : e.fields) { if (schema != null) { if(f.name != null && f.name.contains("${")){ f.dynamicName = true; continue; } SchemaField schemaField = schema.getFieldOrNull(f.getName()); if (schemaField == null) { schemaField = config.lowerNameVsSchemaField.get(f.getName().toLowerCase(Locale.ENGLISH)); if (schemaField != null) f.name = schemaField.getName(); } if (schemaField != null) { f.multiValued = schemaField.multiValued(); f.allAttributes.put(MULTI_VALUED, Boolean.toString(schemaField .multiValued())); f.allAttributes.put(TYPE, schemaField.getType().getTypeName()); f.allAttributes.put("indexed", Boolean.toString(schemaField.indexed())); f.allAttributes.put("stored", Boolean.toString(schemaField.stored())); f.allAttributes.put("defaultValue", schemaField.getDefaultValue()); } else { f.toWrite = false; } } fields.put(f.getName(), f); f.entity = e; f.allAttributes.put("boost", f.boost.toString()); f.allAttributes.put("toWrite", Boolean.toString(f.toWrite)); e.allFieldsList.add(Collections.unmodifiableMap(f.allAttributes)); } } e.allFieldsList = Collections.unmodifiableList(e.allFieldsList); e.allAttributes = Collections.unmodifiableMap(e.allAttributes); if (e.entities == null) return; for (DataConfig.Entity e1 : e.entities) { e1.parentEntity = e; initEntity(e1, fields, e.isDocRoot || docRootFound); } } DataConfig getConfig() { return config; } Date getIndexStartTime() { return indexStartTime; } void setIndexStartTime(Date indextStartTime) { this.indexStartTime = indextStartTime; } void store(Object key, Object value) { store.put(key, value); } Object retrieve(Object key) { return store.get(key); } DataSource getDataSourceInstance(DataConfig.Entity key, String name, Context ctx) { Properties p = dataSourceProps.get(name); if (p == null) p = config.dataSources.get(name); if (p == null) p = dataSourceProps.get(null);// for default data source if (p == null) p = config.dataSources.get(null); if (p == null) throw new DataImportHandlerException(SEVERE, "No dataSource :" + name + " available for entity :" + key.name); String type = p.getProperty(TYPE); DataSource dataSrc = null; if (type == null) { dataSrc = new JdbcDataSource(); } else { try { dataSrc = (DataSource) DocBuilder.loadClass(type, getCore()).newInstance(); } catch (Exception e) { wrapAndThrow(SEVERE, e, "Invalid type for data source: " + type); } } try { Properties copyProps = new Properties(); copyProps.putAll(p); Map<String, Object> map = ctx.getRequestParameters(); if (map.containsKey("rows")) { int rows = Integer.parseInt((String) map.get("rows")); if (map.containsKey("start")) { rows += Integer.parseInt((String) map.get("start")); } copyProps.setProperty("maxRows", String.valueOf(rows)); } dataSrc.init(ctx, copyProps); } catch (Exception e) { wrapAndThrow(SEVERE, e, "Failed to initialize DataSource: " + key.dataSource); } return dataSrc; } public Status getStatus() { return status; } public void setStatus(Status status) { this.status = status; } public boolean isBusy() { return importLock.isLocked(); } public void doFullImport(SolrWriter writer, RequestParams requestParams) { LOG.info("Starting Full Import"); setStatus(Status.RUNNING_FULL_DUMP); setIndexStartTime(new Date()); try { docBuilder = new DocBuilder(this, writer, requestParams); docBuilder.execute(); if (!requestParams.debug) cumulativeStatistics.add(docBuilder.importStatistics); } catch (Throwable t) { LOG.error("Full Import failed", t); docBuilder.rollback(); } finally { setStatus(Status.IDLE); config.clearCaches(); DocBuilder.INSTANCE.set(null); } } public void doDeltaImport(SolrWriter writer, RequestParams requestParams) { LOG.info("Starting Delta Import"); setStatus(Status.RUNNING_DELTA_DUMP); try { setIndexStartTime(new Date()); docBuilder = new DocBuilder(this, writer, requestParams); docBuilder.execute(); if (!requestParams.debug) cumulativeStatistics.add(docBuilder.importStatistics); } catch (Throwable t) { LOG.error("Delta Import Failed", t); docBuilder.rollback(); } finally { setStatus(Status.IDLE); config.clearCaches(); DocBuilder.INSTANCE.set(null); } } public void runAsync(final RequestParams reqParams, final SolrWriter sw) { new Thread() { @Override public void run() { runCmd(reqParams, sw); } }.start(); } void runCmd(RequestParams reqParams, SolrWriter sw) { String command = reqParams.command; if (command.equals(ABORT_CMD)) { if (docBuilder != null) { docBuilder.abort(); } return; } if (!importLock.tryLock()){ LOG.warn("Import command failed . another import is running"); return; } try { if (FULL_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) { doFullImport(sw, reqParams); } else if (command.equals(DELTA_IMPORT_CMD)) { doDeltaImport(sw, reqParams); } } finally { importLock.unlock(); } } @SuppressWarnings("unchecked") Map<String, String> getStatusMessages() { //this map object is a Collections.synchronizedMap(new LinkedHashMap()). if we // synchronize on the object it must be safe to iterate through the map Map statusMessages = (Map) retrieve(STATUS_MSGS); Map<String, String> result = new LinkedHashMap<String, String>(); if (statusMessages != null) { synchronized (statusMessages) { for (Object o : statusMessages.entrySet()) { Map.Entry e = (Map.Entry) o; //the toString is taken because some of the Objects create the data lazily when toString() is called result.put((String) e.getKey(), e.getValue().toString()); } } } return result; } DocBuilder getDocBuilder() { return docBuilder; } static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() { protected AtomicLong initialValue() { return new AtomicLong(); } }; static final ThreadLocal<SimpleDateFormat> DATE_TIME_FORMAT = new ThreadLocal<SimpleDateFormat>() { @Override protected SimpleDateFormat initialValue() { return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); } }; static final class MSG { public static final String NO_CONFIG_FOUND = "Configuration not found"; public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run"; public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid"; public static final String LOAD_EXP = "Exception while loading DataImporter"; public static final String JMX_DESC = "Manage data import from databases to Solr"; public static final String CMD_RUNNING = "A command is still running..."; public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml"; public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully"; public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed"; public static final String TOTAL_FAILED_DOCS = "Total Documents Failed"; public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource"; public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched"; public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted"; public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped"; } static final class RequestParams { public String command = null; public boolean debug = false; public boolean verbose = false; public boolean syncMode = false; public boolean commit = true; public boolean optimize = true; public int start = 0; public long rows = Integer.MAX_VALUE; public boolean clean = true; public List<String> entities; public Map<String, Object> requestParams; public String dataConfig; public ContentStream contentStream; public RequestParams() { } public RequestParams(Map<String, Object> requestParams) { if (requestParams.containsKey("command")) command = (String) requestParams.get("command"); if ("on".equals(requestParams.get("debug"))) { debug = true; rows = 10; // Set default values suitable for debug mode commit = false; clean = false; verbose = "true".equals(requestParams.get("verbose")) || "on".equals(requestParams.get("verbose")); } syncMode = "true".equals(requestParams.get("synchronous")); if (DELTA_IMPORT_CMD.equals(command) || IMPORT_CMD.equals(command)) { clean = false; } if (requestParams.containsKey("commit")) commit = Boolean.parseBoolean((String) requestParams.get("commit")); if (requestParams.containsKey("start")) start = Integer.parseInt((String) requestParams.get("start")); if (requestParams.containsKey("rows")) rows = Integer.parseInt((String) requestParams.get("rows")); if (requestParams.containsKey("clean")) clean = Boolean.parseBoolean((String) requestParams.get("clean")); if (requestParams.containsKey("optimize")) { optimize = Boolean.parseBoolean((String) requestParams.get("optimize")); if (optimize) commit = true; } Object o = requestParams.get("entity"); if (o != null && o instanceof String) { entities = new ArrayList<String>(); entities.add((String) o); } else if (o != null && o instanceof List) { entities = (List<String>) requestParams.get("entity"); } dataConfig = (String) requestParams.get("dataConfig"); if (dataConfig != null && dataConfig.trim().length() == 0) { // Empty data-config param is not valid, change it to null dataConfig = null; } this.requestParams = requestParams; } } IndexSchema getSchema() { return schema; } Map<String, Object> getCoreScopeSession() { return coreScopeSession; } SolrCore getCore() { return core; } public static final String COLUMN = "column"; public static final String TYPE = "type"; public static final String DATA_SRC = "dataSource"; public static final String MULTI_VALUED = "multiValued"; public static final String NAME = "name"; public static final String STATUS_MSGS = "status-messages"; public static final String FULL_IMPORT_CMD = "full-import"; public static final String IMPORT_CMD = "import"; public static final String DELTA_IMPORT_CMD = "delta-import"; public static final String ABORT_CMD = "abort"; public static final String DEBUG_MODE = "debug"; public static final String RELOAD_CONF_CMD = "reload-config"; public static final String SHOW_CONF_CMD = "show-config"; }