package com.esri;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;
/**
*/
public class GeoEnrichmentMapper extends Mapper<LongWritable, Text, NullWritable, Text>
{
public static final String BAD_LINE = "Bad Line";
private final Log m_log = LogFactory.getLog(getClass());
private final NullWritable m_nullWritable = NullWritable.get();
private int m_lonField;
private int m_latField;
private Pattern m_pattern;
private Counter m_badLineCounter;
private List<ColumnInterface> m_columnList;
private String m_outputSeparator;
private SearchInterface m_searchInterface;
private String m_inputSeparator;
private boolean m_writeAll;
@Override
protected void setup(final Context context) throws IOException, InterruptedException
{
final Configuration configuration = context.getConfiguration();
m_inputSeparator = configuration.get(GeoEnrichmentJob.KEY_INPUT_SEPARATOR, "\t");
m_outputSeparator = configuration.get(GeoEnrichmentJob.KEY_OUTPUT_SEPARATOR, m_inputSeparator);
m_pattern = Pattern.compile(m_inputSeparator);
m_writeAll = configuration.getBoolean(GeoEnrichmentJob.KEY_WRITE_ALL, true);
m_lonField = configuration.getInt(GeoEnrichmentJob.KEY_LON_FIELD, 0);
m_latField = configuration.getInt(GeoEnrichmentJob.KEY_LAT_FIELD, 0);
m_columnList = ColumnParser.newInstance().parseColumns(configuration.getStrings(GeoEnrichmentJob.KEY_COLUMN));
final Class<SearchInterface> clazz = (Class<SearchInterface>) configuration.getClass(GeoEnrichmentJob.KEY_SEARCH_CLASS, SearchNoop.class);
try
{
m_searchInterface = clazz.newInstance();
}
catch (Exception e)
{
m_log.warn(e.toString(), e);
m_searchInterface = new SearchNoop();
}
m_searchInterface.setup(configuration, m_columnList);
m_badLineCounter = context.getCounter(GeoEnrichmentMapper.class.getSimpleName(), BAD_LINE);
}
@Override
protected void map(
final LongWritable key,
final Text value,
final Context context) throws IOException, InterruptedException
{
final String text = value.toString();
final String[] tokens = m_pattern.split(text);
try
{
final double lon = Double.parseDouble(tokens[m_lonField]);
final double lat = Double.parseDouble(tokens[m_latField]);
final boolean found = m_searchInterface.search(lon, lat, m_columnList);
if (m_writeAll || found)
{
final StringBuffer stringBuffer = new StringBuffer();
if (m_inputSeparator.equals(m_outputSeparator))
{
stringBuffer.append(text);
}
else
{
stringBuffer.append(text.replaceAll(m_inputSeparator, m_outputSeparator));
}
for (final ColumnInterface column : m_columnList)
{
stringBuffer.append(m_outputSeparator).append(column.toFormattedString());
}
context.write(m_nullWritable, new Text(stringBuffer.toString()));
}
}
catch (final Throwable t)
{
m_badLineCounter.increment(1L);
m_log.error(t.toString());
}
}
@Override
protected void cleanup(final Context context) throws IOException, InterruptedException
{
m_searchInterface.cleanup(context.getConfiguration());
}
}