/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.utils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* Dictionary for translating column values to integer codes, and vice versa.
*
* The class also has methods to read and write data from files.
*
* @author Maneesh Varshney
*
*/
public class CodeDictionary
{
private final Map<String, Integer> keyToCodeMap = new HashMap<String, Integer>();
private final Map<Integer, String> codeToKeyMap = new HashMap<Integer, String>();
private int nextCode = 1;
public int getCodeForKey(String key)
{
Integer code = keyToCodeMap.get(key);
return code == null ? -1 : code;
}
public String getValueForCode(int code)
{
return codeToKeyMap.get(code);
}
public int addKey(String key)
{
int code = getCodeForKey(key);
// If key already exists, return the existing code
if (code > 0)
return code;
if (nextCode < 0)
throw new RuntimeException("CodeDictionary cannot store more data");
// assign a new code to this key
code = nextCode;
nextCode++;
keyToCodeMap.put(key, code);
codeToKeyMap.put(code, key);
return code;
}
public Set<String> keySet()
{
return keyToCodeMap.keySet();
}
public void addKeyCode(String key, int code)
{
keyToCodeMap.put(key, code);
codeToKeyMap.put(code, key);
if (code >= nextCode)
{
nextCode = (code + 1);
}
}
/**
* Reads dictionary from local filesystem.
*
* @param filename
* @throws IOException
*/
public void read(String filename) throws IOException
{
BufferedReader reader = new BufferedReader(new FileReader(filename));
read(reader);
reader.close();
}
/**
* Reads dictionary from the HDFS filesystem.
*
* @param fs
* @param path
* @throws IOException
*/
public void read(FileSystem fs, Path path) throws IOException
{
FSDataInputStream istream = fs.open(path);
BufferedReader reader = new BufferedReader(new InputStreamReader(istream));
read(reader);
reader.close();
}
private void read(BufferedReader reader) throws IOException
{
String line;
while ((line = reader.readLine()) != null)
{
String[] keyval = line.split("\\s+");
String key = keyval[0];
int code = Integer.parseInt(keyval[1]);
if (nextCode < (code + 1))
nextCode = (code + 1);
keyToCodeMap.put(key, code);
codeToKeyMap.put(code, key);
}
}
public void write(FileSystem fs, Path path) throws IOException
{
// if the path exists, rename the existing file with ".old" suffix
if (fs.exists(path))
{
Path renamePath = new Path(path.toString() + ".old");
fs.delete(renamePath, false);
fs.rename(path, renamePath);
}
// Write data to file
FSDataOutputStream ostream = fs.create(path);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(ostream));
for (Map.Entry<String, Integer> entry : keyToCodeMap.entrySet())
{
String line = String.format("%s %d\n", entry.getKey(), entry.getValue());
writer.write(line);
}
writer.flush();
writer.close();
ostream.close();
}
@Override
public String toString()
{
return keyToCodeMap == null ? "<null>" : keyToCodeMap.toString();
}
}