package com.esri;
import ch.hsr.geohash.BoundingBox;
import ch.hsr.geohash.GeoHash;
import ch.hsr.geohash.queries.GeoHashBoundingBoxQuery;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class FreqDistMapper extends Mapper<LongWritable, Text, LongWritable, IntWritable>
{
public final static IntWritable ONE = new IntWritable(1);
// private final Log m_log = LogFactory.getLog(FreqDistMapper.class);
private final Pattern m_pattern = Pattern.compile("^.+\\t(-?\\d+\\.\\d+)\\t(-?\\d+\\.\\d+)$");
private final LongWritable m_key = new LongWritable();
private double m_offset;
private HTable m_table;
@Override
protected void setup(final Context context) throws IOException, InterruptedException
{
m_offset = context.getConfiguration().getFloat("freqDistJob.offset", 5);
m_table = new HTable(context.getConfiguration(), Const.LUT);
}
public void map(
final LongWritable lineno,
final Text line,
final Context context
) throws IOException, InterruptedException
{
final Matcher matcher = m_pattern.matcher(line.toString());
if (matcher.matches())
{
final double lon = Double.parseDouble(matcher.group(1));
final double lat = Double.parseDouble(matcher.group(2));
final BoundingBox boundingBox = new BoundingBox(
Math.max(-90, lat - m_offset), Math.min(90, lat + m_offset),
Math.max(-180, lon - m_offset), Math.min(180, lon + m_offset));
final GeoHashBoundingBoxQuery geoHashBoundingBoxQuery = new GeoHashBoundingBoxQuery(boundingBox);
final List<GeoHash> searchHashes = geoHashBoundingBoxQuery.getSearchHashes();
double minDist = Double.POSITIVE_INFINITY;
for (final GeoHash geoHash : searchHashes)
{
minDist = doScan(geoHash, lon, lat, boundingBox, minDist);
}
if (minDist < Double.POSITIVE_INFINITY)
{
m_key.set(Math.round(Math.sqrt(minDist) * 10.0)); // TODO - Make configurable
context.write(m_key, ONE);
}
}
}
private double doScan(
final GeoHash start,
final double origLon,
final double origLat,
final BoundingBox boundingBox,
double minDist) throws IOException
{
final Scan scan = new Scan();
scan.setStartRow(Bytes.toBytes(start.longValue()));
scan.setStopRow(Bytes.toBytes(start.next().longValue()));
scan.setMaxVersions(1);
scan.setCaching(50); // TODO - make configurable
scan.setFilter(new BoundingBoxFilter(boundingBox));
final ResultScanner scanner = m_table.getScanner(scan);
try
{
for (final Result result : scanner)
{
final ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(result.getRow());
final DataInput dataInput = new DataInputStream(byteArrayInputStream);
final long bits = dataInput.readLong();
final double resultLon = dataInput.readDouble();
final double resultLat = dataInput.readDouble();
// Dummy implementation of geo distance - should use http://en.wikipedia.org/wiki/Vincenty's_formulae
final double deltaLon = resultLon - origLon;
final double deltaLat = resultLat - origLat;
minDist = Math.min(minDist, deltaLon * deltaLon + deltaLat * deltaLat);
}
}
finally
{
scanner.close();
}
return minDist;
}
@Override
protected void cleanup(final Context context) throws IOException, InterruptedException
{
if (m_table != null)
{
m_table.close();
}
}
}