/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.dataimport; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.apache.solr.schema.SchemaField; import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; /** * <p> * Mapping for data-config.xml * </p> * <p/> * <p> * Refer to <a * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> * for more details. * </p> * <p/> * <b>This API is experimental and subject to change</b> * * @version $Id: DataConfig.java 898217 2010-01-12 08:00:34Z noble $ * @since solr 1.3 */ public class DataConfig { static final Logger LOG = LoggerFactory.getLogger(DataConfig.class); public Document document; public List<Map<String, String >> functions = new ArrayList<Map<String ,String>>(); public Script script; public Map<String, Properties> dataSources = new HashMap<String, Properties>(); public Map<String, SchemaField> lowerNameVsSchemaField = new HashMap<String, SchemaField>(); boolean isMultiThreaded = false; public static class Document { // TODO - remove from here and add it to entity public String deleteQuery; public List<Entity> entities = new ArrayList<Entity>(); public String onImportStart, onImportEnd; public Document() { } public Document(Element element) { this.deleteQuery = getStringAttribute(element, "deleteQuery", null); this.onImportStart = getStringAttribute(element, "onImportStart", null); this.onImportEnd = getStringAttribute(element, "onImportEnd", null); List<Element> l = getChildNodes(element, "entity"); for (Element e : l) entities.add(new Entity(e)); } } public static class Entity { public String name; public String pk; public String pkMappingFromSchema; public String dataSource; public Map<String, String> allAttributes; public String proc; public String docRoot; public boolean isDocRoot = false; public List<Field> fields = new ArrayList<Field>(); public List<Map<String, String>> allFieldsList = new ArrayList<Map<String, String>>(); public List<Entity> entities; public Entity parentEntity; public EntityProcessorWrapper processor; @SuppressWarnings("unchecked") public DataSource dataSrc; public Map<String, List<Field>> colNameVsField = new HashMap<String, List<Field>>(); public Entity() { } public Entity(Element element) { name = getStringAttribute(element, NAME, null); if(name == null){ LOG.warn("Entity does not have a name"); name= ""+System.nanoTime(); } if(name.indexOf(".") != -1){ throw new DataImportHandlerException(SEVERE, "Entity name must not have period (.): '" + name); } if (RESERVED_WORDS.contains(name)) { throw new DataImportHandlerException(SEVERE, "Entity name : '" + name + "' is a reserved keyword. Reserved words are: " + RESERVED_WORDS); } pk = getStringAttribute(element, "pk", null); docRoot = getStringAttribute(element, ROOT_ENTITY, null); proc = getStringAttribute(element, PROCESSOR, null); dataSource = getStringAttribute(element, DataImporter.DATA_SRC, null); allAttributes = getAllAttributes(element); List<Element> n = getChildNodes(element, "field"); for (Element elem : n) { Field field = new Field(elem); fields.add(field); List<Field> l = colNameVsField.get(field.column); if(l == null) l = new ArrayList<Field>(); boolean alreadyFound = false; for (Field f : l) { if(f.getName().equals(field.getName())) { alreadyFound = true; break; } } if(!alreadyFound) l.add(field); colNameVsField.put(field.column, l); } n = getChildNodes(element, "entity"); if (!n.isEmpty()) entities = new ArrayList<Entity>(); for (Element elem : n) entities.add(new Entity(elem)); } public void clearCache() { if (entities != null) { for (Entity entity : entities) entity.clearCache(); } try { processor.close(); } catch (Exception e) { /*no op*/ } processor = null; if (dataSrc != null) dataSrc.close(); dataSrc = null; } public String getPk(){ return pk == null ? pkMappingFromSchema : pk; } } public static class Script { public String language; public String text; public Script() { } public Script(Element e) { this.language = getStringAttribute(e, "language", "JavaScript"); StringBuilder buffer = new StringBuilder(); String script = getTxt(e, buffer); if (script != null) this.text = script.trim(); } } public static class Field { public String column; public String name; public Float boost = 1.0f; public boolean toWrite = true; public boolean multiValued = false; boolean dynamicName; public Map<String, String> allAttributes = new HashMap<String, String>() { public String put(String key, String value) { if (super.containsKey(key)) return super.get(key); return super.put(key, value); } }; public Field() { } public Field(Element e) { this.name = getStringAttribute(e, DataImporter.NAME, null); this.column = getStringAttribute(e, DataImporter.COLUMN, null); if (column == null) { throw new DataImportHandlerException(SEVERE, "Field must have a column attribute"); } this.boost = Float.parseFloat(getStringAttribute(e, "boost", "1.0f")); allAttributes.putAll(getAllAttributes(e)); } public String getName() { return name == null ? column : name; } public Entity entity; } public void readFromXml(Element e) { List<Element> n = getChildNodes(e, "document"); if (n.isEmpty()) { throw new DataImportHandlerException(SEVERE, "DataImportHandler " + "configuration file must have one <document> node."); } document = new Document(n.get(0)); n = getChildNodes(e, SCRIPT); if (!n.isEmpty()) { script = new Script(n.get(0)); } // Add the provided evaluators n = getChildNodes(e, FUNCTION); if (!n.isEmpty()) { for (Element element : n) { String func = getStringAttribute(element, NAME, null); String clz = getStringAttribute(element, CLASS, null); if (func == null || clz == null){ throw new DataImportHandlerException( SEVERE, "<function> must have a 'name' and 'class' attributes"); } else { functions.add(getAllAttributes(element)); } } } n = getChildNodes(e, DATA_SRC); if (!n.isEmpty()) { for (Element element : n) { Properties p = new Properties(); HashMap<String, String> attrs = getAllAttributes(element); for (Map.Entry<String, String> entry : attrs.entrySet()) { p.setProperty(entry.getKey(), entry.getValue()); } dataSources.put(p.getProperty("name"), p); } } if(dataSources.get(null) == null){ for (Properties properties : dataSources.values()) { dataSources.put(null,properties); break; } } } private static String getStringAttribute(Element e, String name, String def) { String r = e.getAttribute(name); if (r == null || "".equals(r.trim())) r = def; return r; } private static HashMap<String, String> getAllAttributes(Element e) { HashMap<String, String> m = new HashMap<String, String>(); NamedNodeMap nnm = e.getAttributes(); for (int i = 0; i < nnm.getLength(); i++) { m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue()); } return m; } public static String getTxt(Node elem, StringBuilder buffer) { if (elem.getNodeType() != Node.CDATA_SECTION_NODE) { NodeList childs = elem.getChildNodes(); for (int i = 0; i < childs.getLength(); i++) { Node child = childs.item(i); short childType = child.getNodeType(); if (childType != Node.COMMENT_NODE && childType != Node.PROCESSING_INSTRUCTION_NODE) { getTxt(child, buffer); } } } else { buffer.append(elem.getNodeValue()); } return buffer.toString(); } public static List<Element> getChildNodes(Element e, String byName) { List<Element> result = new ArrayList<Element>(); NodeList l = e.getChildNodes(); for (int i = 0; i < l.getLength(); i++) { if (e.equals(l.item(i).getParentNode()) && byName.equals(l.item(i).getNodeName())) result.add((Element) l.item(i)); } return result; } public void clearCaches() { for (Entity entity : document.entities) entity.clearCache(); } public static final String SCRIPT = "script"; public static final String NAME = "name"; public static final String PROCESSOR = "processor"; /** * @deprecated use IMPORTER_NS_SHORT instead */ @Deprecated public static final String IMPORTER_NS = "dataimporter"; public static final String IMPORTER_NS_SHORT = "dih"; public static final String ROOT_ENTITY = "rootEntity"; public static final String FUNCTION = "function"; public static final String CLASS = "class"; public static final String DATA_SRC = "dataSource"; private static final Set<String> RESERVED_WORDS = new HashSet<String>(); static{ RESERVED_WORDS.add(IMPORTER_NS); RESERVED_WORDS.add(IMPORTER_NS_SHORT); RESERVED_WORDS.add("request"); RESERVED_WORDS.add("delta"); RESERVED_WORDS.add("functions"); RESERVED_WORDS.add("session"); RESERVED_WORDS.add(SolrWriter.LAST_INDEX_KEY); } }