/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.hadoop.morphline; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.Collection; import java.util.Map; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.apache.solr.hadoop.HeartBeater; import org.apache.solr.hadoop.SolrInputDocumentWritable; import org.apache.solr.hadoop.SolrMapper; import org.apache.solr.morphlines.solr.DocumentLoader; import org.apache.solr.schema.IndexSchema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.codahale.metrics.Counter; import com.codahale.metrics.Counting; import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; /** * This class takes the input files, extracts the relevant content, transforms * it and hands SolrInputDocuments to a set of reducers. * * More specifically, it consumes a list of <offset, hdfsFilePath> input pairs. * For each such pair extracts a set of zero or more SolrInputDocuments and * sends them to a downstream Reducer. The key for the reducer is the unique id * of the SolrInputDocument specified in Solr schema.xml. */ public class MorphlineMapper extends SolrMapper<LongWritable, Text> { private Context context; private MorphlineMapRunner runner; private HeartBeater heartBeater; private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); protected IndexSchema getSchema() { return runner.getSchema(); } protected Context getContext() { return context; } @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); this.context = context; heartBeater = new HeartBeater(context); this.runner = new MorphlineMapRunner( context.getConfiguration(), new MyDocumentLoader(), getSolrHomeDir().toString()); } /** * Extract content from the path specified in the value. Key is useless. */ @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { heartBeater.needHeartBeat(); try { runner.map(value.toString(), context.getConfiguration(), context); } finally { heartBeater.cancelHeartBeat(); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { heartBeater.close(); runner.cleanup(); addMetricsToMRCounters(runner.getMorphlineContext().getMetricRegistry(), context); super.cleanup(context); } private void addMetricsToMRCounters(MetricRegistry metricRegistry, Context context) { for (Map.Entry<String, Counter> entry : metricRegistry.getCounters().entrySet()) { addCounting(entry.getKey(), entry.getValue(), 1); } for (Map.Entry<String, Histogram> entry : metricRegistry.getHistograms().entrySet()) { addCounting(entry.getKey(), entry.getValue(), 1); } for (Map.Entry<String, Meter> entry : metricRegistry.getMeters().entrySet()) { addCounting(entry.getKey(), entry.getValue(), 1); } for (Map.Entry<String, Timer> entry : metricRegistry.getTimers().entrySet()) { long nanosPerMilliSec = 1000 * 1000; addCounting(entry.getKey(), entry.getValue(), nanosPerMilliSec); } } private void addCounting(String metricName, Counting value, long scale) { context.getCounter("morphline", metricName).increment(value.getCount() / scale); } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// private final class MyDocumentLoader implements DocumentLoader { @Override public void beginTransaction() { } @Override public void load(SolrInputDocument doc) throws IOException, SolrServerException { String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName(); Object id = doc.getFieldValue(uniqueKeyFieldName); if (id == null) { throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName + " (see Solr schema.xml)"); } try { context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc)); } catch (InterruptedException e) { throw new IOException("Interrupted while writing " + doc, e); } if (LOG.isDebugEnabled()) { long numParserOutputBytes = 0; for (SolrInputField field : doc.values()) { numParserOutputBytes += sizeOf(field.getValue()); } context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes); } context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1); } // just an approximation private long sizeOf(Object value) { if (value instanceof CharSequence) { return ((CharSequence) value).length(); } else if (value instanceof Integer) { return 4; } else if (value instanceof Long) { return 8; } else if (value instanceof Collection) { long size = 0; for (Object val : (Collection) value) { size += sizeOf(val); } return size; } else { return String.valueOf(value).length(); } } @Override public void commitTransaction() { } @Override public UpdateResponse rollbackTransaction() throws SolrServerException, IOException { return new UpdateResponse(); } @Override public void shutdown() { } @Override public SolrPingResponse ping() throws SolrServerException, IOException { return new SolrPingResponse(); } } }