package tbx2rdf; import tbx2rdf.types.TBX_Terminology; import com.hp.hpl.jena.rdf.model.Model; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.ParserConfigurationException; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFDataMgr; import org.apache.log4j.Logger; import org.apache.log4j.PropertyConfigurator; import org.xml.sax.SAXException; /** * Main class for TBX2RDF Converter. * WRONG DESIGN: IT IS CURRENTLY MIXING TBX2RDF AND IATE AFFAIRS * * Entry point of the functionality, it parses the parameters and invokes the conversion methods * making them available from the command line. * Example of params for the command line: samples/iate.xml --output samples/iate.nt --big=true * Another example: samples/CounterSample.xml --output=samples/CounterSample.nt * --output samples/iatefullmini.nt * * It is advice to set a parameter in the Java Virtual Machine: -Dfile.encoding=UTF-8 in order to have good character encoding. * * @author John McCrae - Universität Bielefeld * @author Victor Rodriguez - Universidad Politécnica de Madrid */ public class Main { //Determines whether it will be a stream-parsing (if big=true) or a block conversion (big=false) static boolean big = false; // Establishes the file with the mappings static String mapping_file = "mappings.default"; // Input file name to be read from; static String input_file = ""; // Output file name to be written to static String output_file = ""; // If the output is to be shown in console static boolean bOutputInConsole = true; //Determines if the parsing is going to be lenient or strict public static boolean lenient = false; // The mappings to be used public static Mappings mappings; // The base namespace of the dataset public static String DATA_NAMESPACE = "http://tbx2rdf.lider-project.eu/data/iate/"; /** * Main method. */ public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException { PropertyConfigurator.configure("log4j.properties"); boolean ok = parseParams(args); if (!ok) { System.exit(-1); } //READ MAPPINGS System.err.println("Using mapping file: " + mapping_file + "\n"); mappings = Mappings.readInMappings(mapping_file); if (big) { convertBigFile(); } else { convertSmallFile(); } } /** * Parses the command line parameters */ public static boolean parseParams(String[] args) { String ejecutando = ""; for (String ejecutandox : args) { ejecutando += " " + ejecutandox; } if (args.length == 0) { System.err.println("Usage: TBX2RDF_Converter <INPUT_FILE> (--output=<OUTPUT_FILE>)? (--mappings=<MAPPING_FILE>)? (--big=true)? (--datanamespace=<DATA_NAMESPACE>)?"); System.err.println("If no OUTPUT_FILE is provided, then <OUTPUT FILE>s/.xml/.rdf/ will be assumed as output file."); System.err.println("If no MAPPING_FILE is provided, then mappings.default will be used."); return false; } input_file = args[0]; //First argument, input file File file = new File(input_file); if (!file.exists()) { System.err.println("The file " + input_file + " does not exist"); return false; } output_file = input_file.replaceAll("\\.(xml|tbx)", "\\.rdf"); if (!output_file.endsWith(".rdf")) { output_file += ".rdf"; } String arg, key, value; for (int i = 1; i < args.length; i++) { arg = args[i]; Pattern p = Pattern.compile("^--(output|mappings|datanamespace|big|lenient)=(.*?)$"); Matcher matcher; matcher = p.matcher(arg); if (matcher.matches()) { key = matcher.group(1); value = matcher.group(2); if (key.equals("output")) { output_file = value; bOutputInConsole = false; System.err.println("OUTPUT_FILE set to" + output_file + "\n"); } if (key.equals("mappings")) { mapping_file = value; System.err.println("MAPPING_FILE set to" + mapping_file + "\n"); } if (key.equals("datanamespace")) { DATA_NAMESPACE = value; System.err.println("DATA_NAMESPACE set to" + DATA_NAMESPACE + "\n"); } if (key.equals("big")) { if (value.equals("true")) { big = true; } System.err.println("Processing large file"); } if (key.equals("lenient")) { if (value.equals("true")) { lenient = true; } System.err.println("Processing in lenient mode"); } } } return true; } /** * This is the conversion to be invoked for large files, that will be processed in a stream * The output is serialized as the conversion is being done */ public static boolean convertBigFile() { try { bOutputInConsole = false; System.err.println("Doing the conversion of a big file\n"); TBX2RDF_Converter converter = new TBX2RDF_Converter(); PrintStream fos; if (output_file.isEmpty() || bOutputInConsole) { fos = System.out; } else { fos = new PrintStream(output_file, "UTF-8"); } if (fos == null) { System.err.println("output file could not be open"); return false; } converter.convertAndSerializeLargeFile(input_file, fos, mappings); } catch (Exception e) { System.err.println(e.getMessage()); return false; } return true; } /** * Standard conversion * This is the conversion invoked from the web service. * Input file is read as a whole and kept in memory. */ public static boolean convertSmallFile() { try { System.err.println("Doing the standard conversion (not a big file)\n"); //READ TBX XML System.err.println("Opening file " + input_file + "\n"); BufferedReader reader = new BufferedReader(new FileReader(input_file)); TBX2RDF_Converter converter = new TBX2RDF_Converter(); TBX_Terminology terminology = converter.convert(reader, mappings); //WRITE. This one has been obtained from System.err.println("Writting output to " + output_file + "\n"); // final Model model = terminology.getModel("file:" + output_file); final Model model = terminology.getModel(Main.DATA_NAMESPACE); RDFDataMgr.write(new FileOutputStream(output_file), model, Lang.TURTLE); reader.close(); } catch (Exception e) { System.err.println(e.getMessage()); return false; } return true; } }