package convertors.gerard; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; import org.molgenis.pheno.Individual; import org.molgenis.util.CsvFileReader; import org.molgenis.util.CsvFileWriter; import org.molgenis.util.CsvReader; import org.molgenis.util.Tuple; import org.molgenis.xgap.Marker; /** * Gerards format is as follows: * * rs604860343 19 217034 10000000000 rs805420609 19 218039 12001111002 etc, one * file per chromosome * * @author Morris Swertz * */ public class GerardParser { public static void main(String[] args) throws Exception { File inputDir = new File("D:/Data/athma/source"); File outputDir = new File("D:/Data/athma/xgap/"); writeMarkerFile(inputDir, outputDir); List<String> colNames = writeIndividualFile(inputDir, outputDir); writeMatrixFiles(inputDir, outputDir, colNames); } public static List<String> writeIndividualFile(File inputDir, File outputDir) throws Exception { final List<String> result = new ArrayList<String>(); for (File f : inputDir.listFiles()) { File outfile = new File(outputDir.getCanonicalFile() + "/individual.txt"); CsvReader reader = new CsvFileReader(f); CsvFileWriter writer = new CsvFileWriter(outfile); // each line is: // rs604860343 19 217034 10000000000 char[] chars = reader.colnames().get(3).toCharArray(); for (int i = 0; i < chars.length; i++) { Individual ind = new Individual(); ind.setName("Ind" + i); writer.writeRow(ind); result.add(ind.getName()); } writer.close(); return result; } return result; } public static void writeMarkerFile(final File inputDir, final File outputDir) throws Exception { File outfile = new File(outputDir.getCanonicalFile() + "/marker.txt"); final CsvFileWriter writer = new CsvFileWriter(outfile); for (File f : inputDir.listFiles()) { CsvReader reader = new CsvFileReader(f); for (Tuple tuple : reader) { // each line is: // rs604860343 19 217034 10000000000 Marker m = new Marker(); m.setName(tuple.getString(0)); m.setChromosome_Name(tuple.getString(1)); m.setBpStart(tuple.getLong(2)); writer.writeRow(m); // fixme: only write the columns that are not null!!! // can we not write the column headers at the end to enable // this? } } writer.close(); } public static void writeMatrixFiles(final File inputDir, final File outputDir, final List<String> colNames) throws Exception { for (File f : inputDir.listFiles()) { CsvReader reader = new CsvFileReader(f); File outfile = new File(outputDir.getCanonicalFile() + "/" + f.getName() + ".txt"); final PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(outfile))); int line_number = 1; for (Tuple tuple : reader) { // each line is: // rs604860343 19 217034 10000000000 char[] chars = tuple.getString(3).toCharArray(); // write header once if (line_number == 1) { for (int i = 0; i < chars.length; i++) { writer.print("\t"); writer.print(colNames.get(i)); } writer.println(); } // write values writer.print(tuple.getString(0)); for (int i = 0; i < chars.length; i++) writer.print("\t" + chars[i]); writer.println(); line_number++; } writer.close(); } } }