/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
/**
* <p> Base class for all implementations of EntityProcessor </p> <p/> <p> Most implementations of EntityProcessor
* extend this base class which provides common functionality. </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id: EntityProcessorBase.java 887875 2009-12-07 10:27:47Z noble $
* @since solr 1.3
*/
public class EntityProcessorBase extends EntityProcessor {
private static final Logger log = LoggerFactory.getLogger(EntityProcessorBase.class);
protected boolean isFirstInit = true;
protected String entityName;
protected Context context;
protected Iterator<Map<String, Object>> rowIterator;
protected List<Transformer> transformers;
protected String query;
protected String onError = ABORT;
public void init(Context context) {
rowIterator = null;
this.context = context;
if (isFirstInit) {
firstInit(context);
}
query = null;
}
/**first time init call. do one-time operations here
*/
protected void firstInit(Context context) {
entityName = context.getEntityAttribute("name");
String s = context.getEntityAttribute(ON_ERROR);
if (s != null) onError = s;
isFirstInit = false;
}
protected Map<String, Object> getNext() {
try {
if (rowIterator == null)
return null;
if (rowIterator.hasNext())
return rowIterator.next();
query = null;
rowIterator = null;
return null;
} catch (Exception e) {
log.error("getNext() failed for query '" + query + "'", e);
query = null;
rowIterator = null;
wrapAndThrow(DataImportHandlerException.WARN, e);
return null;
}
}
public Map<String, Object> nextModifiedRowKey() {
return null;
}
public Map<String, Object> nextDeletedRowKey() {
return null;
}
public Map<String, Object> nextModifiedParentRowKey() {
return null;
}
/**
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to
* stream rows one-by-one. Return null to signal end of rows
*
* @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
* null to signal end of rows
*/
public Map<String, Object> nextRow() {
return null;// do not do anything
}
public void destroy() {
/*no op*/
}
/**
* Only used by cache implementations
*/
protected String cachePk;
/**
* Only used by cache implementations
*/
protected String cacheVariableName;
/**
* Only used by cache implementations
*/
protected Map<String, List<Map<String, Object>>> simpleCache;
/**
* Only used by cache implementations
*/
protected Map<String, Map<Object, List<Map<String, Object>>>> cacheWithWhereClause;
protected List<Map<String, Object>> dataSourceRowCache;
/**
* Only used by cache implementations
*/
protected void cacheInit() {
if (simpleCache != null || cacheWithWhereClause != null)
return;
String where = context.getEntityAttribute("where");
String cacheKey = context.getEntityAttribute(CACHE_KEY);
String lookupKey = context.getEntityAttribute(CACHE_LOOKUP);
if(cacheKey != null && lookupKey == null){
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'cacheKey' is specified for the entity "+ entityName+" but 'cacheLookup' is missing" );
}
if (where == null && cacheKey == null) {
simpleCache = new HashMap<String, List<Map<String, Object>>>();
} else {
if (where != null) {
String[] splits = where.split("=");
cachePk = splits[0];
cacheVariableName = splits[1].trim();
} else {
cachePk = cacheKey;
cacheVariableName = lookupKey;
}
cacheWithWhereClause = new HashMap<String, Map<Object, List<Map<String, Object>>>>();
}
}
/**
* If the where clause is present the cache is sql Vs Map of key Vs List of Rows. Only used by cache implementations.
*
* @param query the query string for which cached data is to be returned
*
* @return the cached row corresponding to the given query after all variables have been resolved
*/
protected Map<String, Object> getIdCacheData(String query) {
Map<Object, List<Map<String, Object>>> rowIdVsRows = cacheWithWhereClause
.get(query);
List<Map<String, Object>> rows = null;
Object key = context.resolve(cacheVariableName);
if (key == null) {
throw new DataImportHandlerException(DataImportHandlerException.WARN,
"The cache lookup value : " + cacheVariableName + " is resolved to be null in the entity :" +
context.getEntityAttribute("name"));
}
if (rowIdVsRows != null) {
rows = rowIdVsRows.get(key);
if (rows == null)
return null;
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
return getFromRowCacheTransformed();
} else {
rows = getAllNonCachedRows();
if (rows.isEmpty()) {
return null;
} else {
rowIdVsRows = new HashMap<Object, List<Map<String, Object>>>();
for (Map<String, Object> row : rows) {
Object k = row.get(cachePk);
if (k == null) {
throw new DataImportHandlerException(DataImportHandlerException.WARN,
"No value available for the cache key : " + cachePk + " in the entity : " +
context.getEntityAttribute("name"));
}
if (!k.getClass().equals(key.getClass())) {
throw new DataImportHandlerException(DataImportHandlerException.WARN,
"The key in the cache type : " + k.getClass().getName() +
"is not same as the lookup value type " + key.getClass().getName() + " in the entity " +
context.getEntityAttribute("name"));
}
if (rowIdVsRows.get(k) == null)
rowIdVsRows.put(k, new ArrayList<Map<String, Object>>());
rowIdVsRows.get(k).add(row);
}
cacheWithWhereClause.put(query, rowIdVsRows);
if (!rowIdVsRows.containsKey(key))
return null;
dataSourceRowCache = new ArrayList<Map<String, Object>>(rowIdVsRows.get(key));
if (dataSourceRowCache.isEmpty()) {
dataSourceRowCache = null;
return null;
}
return getFromRowCacheTransformed();
}
}
}
/**
* <p> Get all the rows from the the datasource for the given query. Only used by cache implementations. </p> This
* <b>must</b> be implemented by sub-classes which intend to provide a cached implementation
*
* @return the list of all rows fetched from the datasource.
*/
protected List<Map<String, Object>> getAllNonCachedRows() {
return Collections.EMPTY_LIST;
}
/**
* If where clause is not present the cache is a Map of query vs List of Rows. Only used by cache implementations.
*
* @param query string for which cached row is to be returned
*
* @return the cached row corresponding to the given query
*/
protected Map<String, Object> getSimpleCacheData(String query) {
List<Map<String, Object>> rows = simpleCache.get(query);
if (rows != null) {
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
return getFromRowCacheTransformed();
} else {
rows = getAllNonCachedRows();
if (rows.isEmpty()) {
return null;
} else {
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
simpleCache.put(query, rows);
return getFromRowCacheTransformed();
}
}
}
protected Map<String, Object> getFromRowCacheTransformed() {
Map<String, Object> r = dataSourceRowCache.remove(0);
if (dataSourceRowCache.isEmpty())
dataSourceRowCache = null;
return r;
}
public static final String TRANSFORMER = "transformer";
public static final String TRANSFORM_ROW = "transformRow";
public static final String ON_ERROR = "onError";
public static final String ABORT = "abort";
public static final String CONTINUE = "continue";
public static final String SKIP = "skip";
public static final String SKIP_DOC = "$skipDoc";
public static final String CACHE_KEY = "cacheKey";
public static final String CACHE_LOOKUP = "cacheLookup";
}