package org.iswc.iswc2012main.dev;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.util.HashMap;
import org.apache.log4j.Logger;
import sw4j.util.DataSmartMap;
import com.csvreader.CsvReader;
public class ToolCsvLoader {
public static void main(String[] args){
test();
}
protected static void test(){
String csvFile = "local/logd/data/us-federal-agency-dbpedia.csv";
ToolCsvLoader loader = new ToolCsvLoader();
try {
int cnt = loader.loadCsvFile(csvFile);
System.out.println(cnt);
System.out.println(loader.m_data);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
HashMap<String,String> m_config = new HashMap<String,String>();
public static String CONFIG_COLUMN_KEY = "column_key";
HashMap<String,DataSmartMap> m_data= new HashMap<String,DataSmartMap>();
HashMap<String,String> m_metadata= new HashMap<String,String>();
public int loadCsvUrl(String szCsvUrl) throws IOException {
URL url = new URL(szCsvUrl);
getLogger().info("loading csv url ... "+ url.toString());
m_metadata.put("source", url.toString());
return loadCsv(new CsvReader(new InputStreamReader(url.openStream())));
}
public int loadCsvFile(String szCsvFilename) throws IOException {
File file_input = new File(szCsvFilename);
return loadCsvFile(file_input);
}
public int loadCsvFile(File file_input) throws IOException {
getLogger().info("loading csv file ... "+ file_input.getAbsolutePath());
return loadCsv(new CsvReader(new FileReader(file_input)));
}
public int loadCsvString(String szCsvData) throws IOException {
return loadCsv(new CsvReader(new StringReader(szCsvData)));
}
private Logger getLogger(){
return Logger.getLogger(this.getClass());
}
public int loadCsv(CsvReader csv) throws IOException {
int row_number=1;
////////////////////////////
//read header - we require all csv files have header
csv.readHeaders();
row_number++;
//an array of of hasmaps that have a number as a Key(coloum position) and Property as a Value
//each Hashmap for each input csv file in the dataset
String[] ary_prop = new String[csv.getHeaderCount()];
for(int i = 0;i<csv.getHeaderCount();i++)
{
String name = csv.getHeader(i).trim();
//save the property
ary_prop[i] =name;
}
////////////////////////////
//read data
int entry_number=0;
while ((csv.readRecord()))
{
row_number++;
//exit if the header count is mismatched
if (csv.getHeaderCount()<csv.getColumnCount()){
getLogger().fatal("too many columns: found (" +csv.getColumnCount()+") but the header row has ("+csv.getHeaderCount()+")");
System.out.println("at row_number "+ row_number + " with current record "+ csv.getCurrentRecord());
System.out.println("with value "+ csv.getRawRecord());
System.exit(-1);
}
if (csv.getHeaderCount()>csv.getColumnCount()){
getLogger().info("too few columns: found (" +csv.getColumnCount()+") but the header row has ("+csv.getHeaderCount()+")");
System.out.println("at row_number "+ row_number + " with current record "+ csv.getCurrentRecord());
System.out.println("with value "+ csv.getRawRecord());
}
//create entry id
String entryid= String.format("thing_%05d",entry_number);
entry_number++;
String szColumnKey = this.m_config.get(CONFIG_COLUMN_KEY);
if (null!=szColumnKey){
for(int i = 0;i<csv.getColumnCount();i++){
if (ary_prop[i].equals(szColumnKey))
entryid = csv.get(i).trim();
}
}
DataSmartMap row = new DataSmartMap();
//convert all data
for(int i = 0;i<csv.getColumnCount();i++)
{
row.put( ary_prop[i], csv.get(i));
}
m_data.put(entryid, row);
}
return m_data.size();
}
}