/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.genes;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import org.erasmusmc.textMining.LVG.LVGNormaliser;
public class NormJansFiles {
public static void main(String[] args){
String filename = "/home/schuemie/GeneList/geneprotein_OM.txt";
String newfilename = "/home/schuemie/GeneList/geneprotein_OM_norm.txt";
LVGNormaliser normaliser = new LVGNormaliser();
normaliser.loadCacheBinary("/home/public/Peregrine/standardNormCache2006.bin");
try {
FileInputStream PSFFile = new FileInputStream(filename);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(PSFFile),1000000);
FileOutputStream OutFile = new FileOutputStream(newfilename);
BufferedWriter bufferedWrite = new BufferedWriter( new OutputStreamWriter(OutFile),1000000);
try {
while (bufferedReader.ready()){
bufferedWrite.write(processLine(bufferedReader.readLine(), normaliser));
bufferedWrite.newLine();
}
bufferedReader.close();
bufferedWrite.close();
} catch (IOException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e){
e.printStackTrace();
}
}
private static String processLine(String string, LVGNormaliser normaliser) {
String[] cols = string.split("[|]");
if (cols.length < 10) return string;
String[] lfs = cols[9].split("\t");
StringBuffer newLine = new StringBuffer();
for (int i = 0; i< cols.length; i++){
if (i != 0) newLine.append("|");
if (i == 9){
for (int j = 0; j < lfs.length; j++){
if (j != 0) newLine.append("\t");
newLine.append(normaliser.normalise(lfs[j]));
}
newLine.append("|");
}
newLine.append(cols[i]);
}
return newLine.toString();
}
}