/*
* avenir: Predictive analytic based on Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.avenir.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.chombo.util.Utility;
/**
* Writes entity distance text fle as map file. The key is source entity ID. Values is list of target entity and distance
* pair
* @author pranab
*
*/
public class EntityDistanceMapFileAccessor {
private FileSystem fileSys;
private Configuration conf;
private MapFile.Reader reader;
private String delim;
/**
* @throws IOException
*/
public EntityDistanceMapFileAccessor() throws IOException {
conf = new Configuration();;
fileSys = FileSystem.get(conf);
}
/**
* @param conf
* @throws IOException
*/
public EntityDistanceMapFileAccessor(Configuration conf) throws IOException {
this.conf = conf;
fileSys = FileSystem.get(conf);
}
/**
* @param filePathParam
* @param delim
* @throws IOException
*/
public void write(String inPutfilePathParam, String outPutfilePathParam, String delim) throws IOException {
InputStream fs = Utility.getFileStream(conf, inPutfilePathParam);
if (null != fs) {
BufferedReader reader = new BufferedReader(new InputStreamReader(fs));
String line = null;
Path outputFile = new Path(conf.get(outPutfilePathParam));
Text txtKey = new Text();
Text txtValue = new Text();
MapFile.Writer writer = new MapFile.Writer(conf, fileSys, outputFile.toString(), txtKey.getClass(), txtKey.getClass());
while((line = reader.readLine()) != null) {
int pos = line.indexOf(delim);
String key = line.substring(0, pos);
String value = line.substring(pos+1);
txtKey.set(key);
txtValue.set(value);
writer.append(txtKey, txtValue);
}
IOUtils.closeStream(writer);
this.delim = delim;
}
}
/**
* @param mapFileDirPathParam
* @throws IOException
*/
public void initReader(String mapFileDirPathParam) throws IOException {
String dirPath = Utility.assertStringConfigParam(conf, mapFileDirPathParam, "missing distance map file directory");
Path mapFiles = new Path(dirPath);
MapFile.Reader reader = new MapFile.Reader(fileSys, mapFiles.toString(), conf);
}
/**
* @param key
* @param mapFileDirPathParam
* @return
* @throws IOException
*/
public Map<String, Double> read(String key) throws IOException {
Map<String, Double> distanceMap = new HashMap<String, Double>();
Text txtKey = new Text(key);
Text txtValue = new Text();
reader.get(txtKey, txtValue);
String distances = txtValue.toString();
String[] entities = distances.split(delim);
for (String entity : entities) {
String[] entityDist = entity.split(delim);
distanceMap.put(entityDist[0], Double.parseDouble(entityDist[1]));
}
return distanceMap;
}
/**
*
*/
public void closeReader() {
IOUtils.closeStream(reader);
}
}