/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.morphlines.solr; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.TreeMap; import org.apache.solr.schema.IndexSchema; import org.kitesdk.morphline.api.Command; import org.kitesdk.morphline.api.CommandBuilder; import org.kitesdk.morphline.api.MorphlineContext; import org.kitesdk.morphline.api.Record; import org.kitesdk.morphline.base.AbstractCommand; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.typesafe.config.Config; /** * Command that sanitizes record fields that are unknown to Solr schema.xml by either deleting them * (renameToPrefix is absent or a zero length string), or by moving them to a field prefixed with * the given renameToPrefix (e.g. renameToPrefix = "ignored_" to use typical dynamic Solr fields). * <p> * Recall that Solr throws an exception on any attempt to load a document that contains a field that * isn't specified in schema.xml. */ public final class SanitizeUnknownSolrFieldsBuilder implements CommandBuilder { @Override public Collection<String> getNames() { return Collections.singletonList("sanitizeUnknownSolrFields"); } @Override public Command build(Config config, Command parent, Command child, MorphlineContext context) { return new SanitizeUnknownSolrFields(this, config, parent, child, context); } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// private static final class SanitizeUnknownSolrFields extends AbstractCommand { private final IndexSchema schema; private final String renameToPrefix; public SanitizeUnknownSolrFields(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); SolrLocator locator = new SolrLocator(solrLocatorConfig, context); LOG.debug("solrLocator: {}", locator); this.schema = locator.getIndexSchema(); Preconditions.checkNotNull(schema); LOG.trace("Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values())); String str = getConfigs().getString(config, "renameToPrefix", "").trim(); this.renameToPrefix = str.length() > 0 ? str : null; validateArguments(); } @Override protected boolean doProcess(Record record) { Collection<Map.Entry> entries = new ArrayList<Map.Entry>(record.getFields().asMap().entrySet()); for (Map.Entry<String, Collection<Object>> entry : entries) { String key = entry.getKey(); if (schema.getFieldOrNull(key) == null) { LOG.debug("Sanitizing unknown Solr field: {}", key); Collection values = entry.getValue(); if (renameToPrefix != null) { record.getFields().putAll(renameToPrefix + key, values); } values.clear(); // implicitly removes key from record } } return super.doProcess(record); } } }