/*
* Copyright (c) 2014-2015 Giving.com, trading as JustGiving or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located in the "license" file accompanying this file.
*
* This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for
* the specific language governing permissions and limitations under the License.
*
* @author Richard Freeman
*
*/
package com.justgiving.raven.kissmetrics.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class KissmetricsLocalSchemaExtractor {
static final Logger logger = Logger.getLogger(KissmetricsLocalSchemaExtractor.class);
/****
* This function parses all the json record files in a folder and returns a counts of the total occurrences of keys
* in all files
*
* @param inputFolder
* @param outputFolder
* @throws IOException
*/
private static void countKeysInJsonRecordsFolder(String inputFolder, String outputFile) throws IOException{
File folder = new File(inputFolder);
File[] listOfFiles = folder.listFiles();
KeyValueCounter totalKeyValueCounter = new KeyValueCounter();
KeyValueCounter currentKeyValueCounter = new KeyValueCounter();
for( File currentFile : listOfFiles){
if(currentFile.isFile()){
logger.info("Processing file: " + currentFile.getName());
currentKeyValueCounter = countKeysInJsonRecordsFile(Paths.get(inputFolder, currentFile.getName()).toString());
totalKeyValueCounter = deepMergeKeyValueCounter(totalKeyValueCounter, currentKeyValueCounter);
}
else if (currentFile.isDirectory()) {
logger.warn("Sub-directory folders are currently ignored");
}
}
//System.out.println(totalKeyCounter.toString());
logger.info("---------------");
logger.info(sortOutputByKey(totalKeyValueCounter));
logger.info("saving output to file: ");
File outpuFile = new File(outputFile);
outpuFile.getParentFile().mkdirs();
PrintWriter out = new PrintWriter(outputFile);
out.print(sortOutputByKey(totalKeyValueCounter));
out.close();
}
public static KeyValueCounter deepMergeKeyValueCounter(KeyValueCounter originalMap, KeyValueCounter newMapToAdd) {
KeyValueCounter outputValueCounter = new KeyValueCounter();
outputValueCounter.keyCounter.putAll(originalMap.keyCounter);
outputValueCounter.deepMergeHashMapsAddition(newMapToAdd.keyCounter);
outputValueCounter.valueLength.putAll(originalMap.valueLength);
outputValueCounter.deepMergeHashMapsMaxium(newMapToAdd.valueLength);
return outputValueCounter;
}
/****
* This function counts the total occurrences of keys, in the json records files
*
* @param input_filename path to a json file
* @return a HashMap with the keys / total counts pairs
*/
private static KeyValueCounter countKeysInJsonRecordsFile(String input_filename){
InputStream fis;
BufferedReader bufferedReader;
String line;
JSONParser jsonParser = new JSONParser();
KeyValueCounter keyValueCounter = new KeyValueCounter();
String jsonValue = "";
try{
fis = new FileInputStream(input_filename);
bufferedReader = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
while ((line = bufferedReader.readLine()) != null) {
JSONObject jsonObject = (JSONObject) jsonParser.parse(line);
Set<String> keyset = jsonObject.keySet();
for(String jsonkey : keyset)
{
if ( jsonObject.get(jsonkey) != null){
jsonValue = (String) jsonObject.get(jsonkey).toString();
if (jsonValue == null || jsonValue == ""){ jsonValue = ""; }
int lenValue = jsonValue.length();
keyValueCounter.incrementKeyCounter(jsonkey);
keyValueCounter.putValueLength(jsonkey, lenValue);
}else{
if (jsonkey.compareTo("user_agent")!= 0){
logger.error("Errot typing to get jsonkey " + jsonkey);
}
}
}
}
bufferedReader.close();
} catch (ParseException e) {
e.printStackTrace();
}catch (Exception e) {
e.printStackTrace();
}
//System.out.println(keyCounter.toString());
//System.out.println(sortHashByKey(keyCounter));
return keyValueCounter;
}
/***
* This function sorts the HashMap values by key and returns the key/value pairs as a string
* @param hashMap the input hashMap
* @return the return string of the sorted key/value pairs
*/
private static String sortOutputByKey(KeyValueCounter outputKetValueCounter){
Set<String> set = outputKetValueCounter.keyCounter.keySet();
ArrayList<String> list = new ArrayList<String>();
list.addAll(set);
Collections.sort(list);
StringBuilder sb = new StringBuilder();
for (String key : list) {
sb.append(key).append("\t")
.append(outputKetValueCounter.keyCounter.get(key))
.append("\t")
.append(outputKetValueCounter.valueLength.get(key))
.append("\n");
}
return sb.toString();
}
public static void main(String[] args) throws FileNotFoundException, IOException {
for (String s: args) {
System.out.println(s);
}
//String inputFolder ="D:\\datasets\\kissmetrics\\input\\2250.json";
//String outputFile ="D:\\datasets\\kissmetrics\\output\\2250.json";
//String inputFolder ="D:\\datasets\\kissmetrics\\input\\";
//String inputFolder ="D:\\ouptuts\\km\\input\\";
//String inputFolder ="D:\\datasets\\kissmetrics\\input4\\revisions\\";
//String inputFolder ="D:\\datasets\\kissmetrics\\input5\\";
//String outputFile ="D:\\datasets\\kissmetrics\\output\\";
//String inputFolder ="D:\\datasets\\kinesis\\input2\\";
//String outputFile ="D:\\datasets\\kissmetrics\\output\\schema2.txt";
//String inputFolder ="D:\\datasets\\kissmetrics\\stg\\input\\";
//String outputFile ="D:\\datasets\\kissmetrics\\stg\\ouput\\schema1.txt";
String inputFolder ="D:\\datasets\\kinesis\\stg\\input2\\";
String outputFile ="D:\\datasets\\kinesis\\stg\\output2\\schema-kinesis.txt";
//String inputFolder ="D:\\datasets\\kissmetrics\\prd\\input1\\";
//String outputFile ="D:\\datasets\\kissmetrics\\prd\\output1\\schema-kissmetrics.txt";
if(args.length != 2){
System.out.println("No arguments provided, using default values");
System.out.println("InputFolder/File: " + inputFolder);
System.out.println("OutputFile: " + outputFile);
}else{
inputFolder = args[0];
outputFile = args[1];
}
if((new File(outputFile)).isDirectory() ){
System.err.println("Error output file cannot be a directory");
return;
}
String logConfigPath = Paths.get(System.getProperty("user.dir"), "log4j.properties").toString();
System.out.println("log config file used: " + logConfigPath);
PropertyConfigurator.configure(logConfigPath);
logger.info("log config file used: " + logConfigPath);
if(inputFolder.endsWith("\\")){
logger.info("Detected source folder");
countKeysInJsonRecordsFolder(inputFolder, outputFile);
}else{
logger.info("Detected source file");
countKeysInJsonRecordsFile(inputFolder);
}
}
}