package org.aksw.sparqlify.csv;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
public class Csv2Tsv {
private static final Logger logger = LoggerFactory
.getLogger(Csv2Tsv.class);
public static void test() {
String original = "this\\tis\\\\\\ta\\\\test\\nsecond line\\\\\\n and third";
//String original = "one\\ntwo";
String unescaped = Csv2Tsv.unescapeTsvField(original);
String escaped = Csv2Tsv.escapeTsvField(unescaped);
System.out.println("-----");
System.out.println(original);
System.out.println("-----");
System.out.println(unescaped);
System.out.println("-----");
System.out.println(escaped);
System.out.println("-----");
if(!original.equals(escaped)) {
throw new RuntimeException("Fail");
}
System.exit(0);
}
public static void main(String[] args) throws FileNotFoundException {
//test();
if(args.length != 1) {
throw new RuntimeException("This tool takes exactly 1 argument, which is the filename of the CSV file to export to TSV.");
}
String filename = args[0];
File file = new File(filename);
Iterator<List<String>> it = CsvMapperCliMain.getCsvIterator(file, "\t");
while(it.hasNext()) {
List<String> line = it.next();
List<String> encoded = new ArrayList<String>();
for(String cell : line) {
encoded.add(escapeTsvField(cell));
}
String resultLine = Joiner.on("\t").join(encoded);
System.out.println(resultLine);
}
}
/**
* Escapes backslashes, tabs and newlines
*
* This should cleanly unescape values from bash:
* sed -r 's|([^\\](\\\\)*)(\\n)|\1\n|g' | sed -r 's|([^\\](\\\\)*)(\\t)|\1\t|g' | sed -r 's|\\\\|\\|g'
*
* @param value
* @return
*/
public static String escapeTsvField(String value) {
if(value == null) {
return null;
}
String result = value.replace("\\", "\\\\");
result = result.replace("\t", "\\t");
result = result.replace("\n", "\\n");
return result;
}
public static String unescapeTsvField(String value) {
if(value == null) {
return null;
}
String result = value.replaceAll("([^\\\\](\\\\\\\\)*)(\\\\n)", "$1\n");
result = result.replaceAll("([^\\\\](\\\\\\\\)*)(\\\\t)", "$1\t");
result = result.replace("\\\\", "\\");
return result;
}
}