package org.fastcatsearch.datasource.reader;
import org.fastcatsearch.datasource.SourceModifier;
import org.fastcatsearch.datasource.reader.annotation.SourceReader;
import org.fastcatsearch.ir.common.IRException;
import org.fastcatsearch.ir.config.SingleSourceConfig;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by swsong on 2015. 6. 29..
*/
@SourceReader(name = "CSV_FILE")
public class CSVFileReader extends AbstractFileReader {
private List<String> fieldList;
private List<String> fieldNameList;
private List<Integer> fieldIndexList;
public CSVFileReader() {
}
public CSVFileReader(String collectionId, File filePath, SingleSourceConfig singleSourceConfig, SourceModifier<Map<String, Object>> sourceModifier, String lastIndexTime)
throws IRException {
super(collectionId, filePath, singleSourceConfig, sourceModifier, lastIndexTime);
}
@Override
public void init() throws IRException {
super.init();
String fieldNames = getConfigString("fieldList");
// * 이면 fieldNameList 가 null이고 모든 필드를 대상으로 한다.
if (!fieldNames.equals("*")) {
String[] fields = fieldNames.split(",");
if (fields.length > 0) {
fieldList = new ArrayList<String>();
for (String field : fields) {
fieldList.add(field.trim().toUpperCase());
}
}
}
}
@Override
protected Map<String, Object> parse(BufferedReader reader) throws IRException, IOException {
String line = null;
while ((line = reader.readLine()) != null) {
Map<String, Object> record = new HashMap<String, Object>();
try {
String[] els = line.split(",");
for (int i = 0; i < fieldIndexList.size(); i++) {
Integer index = fieldIndexList.get(i);
if (index != -1) {
record.put(fieldNameList.get(i), els[index]);
}
}
//정상이면 리턴.
return record;
}catch(Exception e) {
logger.error("parsing error : line= " + line, e);
}
}
throw new IOException("EOF");
}
@Override
protected void initReader(BufferedReader reader) throws IRException, IOException {
String headerLine = reader.readLine();
String[] headers = headerLine.split(",");
List<String> headerList = new ArrayList<String>();
for (String header : headers) {
headerList.add(header.trim().toUpperCase());
}
fieldNameList = new ArrayList<String>();
fieldIndexList = new ArrayList<Integer>();
if (fieldList != null) {
//가져올 필드를 정의했을 경우.
for (String field : fieldList) {
fieldNameList.add(field);
int index = headerList.indexOf(field);
fieldIndexList.add(index);
}
} else {
//가져올 필드가 정의 안되있을 경우.
int i = 0;
for (String header : headerList) {
fieldNameList.add(header);
fieldIndexList.add(i++);
}
}
}
@Override
protected void initParameters() {
super.initParameters();
registerParameter(new SourceReaderParameter("fieldList", "Field List", "Comma separated fields to use. Use '*' for all fields."
, SourceReaderParameter.TYPE_STRING_LONG, true, null));
}
}