/* * Copyright (c) 2014-2015 Giving.com, trading as JustGiving or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located in the "license" file accompanying this file. * * This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for * the specific language governing permissions and limitations under the License. * * @author Richard Freeman * */ package com.justgiving.raven.kissmetrics.jsonenricher; import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.log4j.Logger; import com.justgiving.raven.kissmetrics.utils.KeyRowWrapper; import com.justgiving.raven.kissmetrics.utils.KissmetricsRowParser; import com.justgiving.raven.kissmetrics.KissmetricsConstants.TRACKING_COUNTER; import org.apache.log4j.Logger; /**** * This mapper takes in a json rows, parses the elements based on a predefined schema into a tab sperated file * it emits the email and/ID as key and the full tsv as value * * @author rfreeman * */ public class KissmetricsJsonToEnrichedJsonMapper extends Mapper<LongWritable, Text, Text, Text> { static DateFormat dateFormatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); // %Y-%m-%d %H:%M:%S public String getCurrentDate(){ Calendar calendar = Calendar.getInstance(); String event_timedate = dateFormatter.format(calendar.getTime()); return event_timedate; } @Override public void map(LongWritable rowKey, Text rawJsonRow, Context context) throws IOException, InterruptedException { final Logger logger = Logger.getLogger(KissmetricsJsonToEnrichedJsonMapper.class); String fileNameInputToMapper = ""; String filePath = ""; try{ fileNameInputToMapper = ((FileSplit) context.getInputSplit()).getPath().getName(); filePath = ((FileSplit) context.getInputSplit()).getPath().toString(); } catch (Exception e) { logger.info("unable to get file inputpath"); System.out.println("unable to get file inputpath"); } //TODO: String capturedDate = getCurrentDate(); KeyRowWrapper keyRow = KissmetricsRowParser.parseJsonRowToValidJson(rawJsonRow, fileNameInputToMapper, filePath); if(keyRow != null){ if(keyRow.getKey() != null && keyRow.getJsonrow() != null && keyRow.getReturnCode() == TRACKING_COUNTER.VALID_JSON_ROW){ context.getCounter(TRACKING_COUNTER.VALID_JSON_ROW).increment(1); //TODO: Monitoring use of Octal Decoder if(keyRow.getMonitoringCode() == TRACKING_COUNTER.OCTAL_PARSING_NEEDED){ context.getCounter(TRACKING_COUNTER.OCTAL_PARSING_NEEDED).increment(1); } context.write(new Text(keyRow.getKey()), new Text(keyRow.getJsonrow())); }else if (keyRow.getMonitoringCode() == TRACKING_COUNTER.INVALID_DATE){ logger.info("Error on row parsing, skipping row"); System.out.println("Error on row parsing"); System.out.println("Skipped row with contents: " + rawJsonRow.toString()); System.out.println("--------------------------------"); context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1); context.getCounter(TRACKING_COUNTER.INVALID_DATE).increment(1); }else { // Assume it's an invalid logger.info("Error on row parsing, skipping row"); System.out.println("Error on row parsing"); System.out.println("Skipped row with contents: " + rawJsonRow.toString()); System.out.println("--------------------------------"); context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1); } } else{ logger.info("Error on row parsing, skipping row"); System.out.println("Error on row parsing"); System.out.println("Skipped row with contents: " + rawJsonRow.toString()); System.out.println("--------------------------------"); context.getCounter(TRACKING_COUNTER.INVALID_JSON_ROW).increment(1); } } }