/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * XML2NTriple.java * * Created on July 13, 2001, 10:06 PM */ package org.apache.jena.rdfxml.xmlinput; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.net.URL; import java.util.Locale ; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.xml.sax.ErrorHandler; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** A command line interface into ARP. * Creates NTriple's or just error messages. * <pre> * java <class-path> com.hp.hpl.jena.arp.NTriple ( [ -[xstfurR]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )... * </pre> * <p> * All options, files and URLs can be intemingled in any order. * They are processed from left-to-right. * <dl> * file </dt><dd> Converts RDF/XML file into N-triples * </dd><dt> * url </dt><dd> Converts RDF/XML from URL into N-triples * </dd><dt> * -b uri </dt><dd> Sets XML Base to the absolute URI. * </dd><dt> * -r </dt><dd> Content is RDF (default, no embedding, rdf:RDF tag may be omitted). * </dd><dt> * -R </dt><dd> RDF embedded in XML document, search for obligatory rdf:RDF start element. * </dd><dt> * -t </dt><dd> No n-triple output, error checking only. * </dd><dt> * -x </dt><dd> Lax mode - warnings are suppressed. * </dd><dt> * -s </dt><dd> Strict mode - most warnings are errors. * </dd><dt> * -u </dt><dd> Allow unqualified attributes (defaults to warning). * </dd><dt> * -f </dt><dd> All errors are.error - report first one only. * </dd><dt> * -n </dt><dd> Show line numbers of each triple. * </dd><dt> * -b url </dt><dd> Sets XML Base to the absolute url. * </dd><dt> * -e NNN[,NNN...]</dt><dd> * Treats numbered warning conditions as errrors. * </dd><dt> * -w NNN[,NNN...]</dt><dd> * Treats numbered error conditions as warnings. * </dd><dt> * -i NNN[,NNN...] * </dt><dd> * Ignores numbered error/warning conditions. * </dl> */ public class NTriple implements ARPErrorNumbers { private static StringBuffer line = new StringBuffer(); private static ARP arp; private static String xmlBase = null; private static boolean numbers = false; /** Starts an RDF/XML to NTriple converter. * @param args The command-line arguments. */ static public void main(String args[]) { mainEh(args, null, null); } static StatementHandler andMeToo = null; /** Starts an RDF/XML to NTriple converter, * using an error handler, and an ARPHandler. * Statements get processed both by this class, * and by the passed in StatementHandler * @param args The command-line arguments. * @param eh Can be null. * @param ap Can be null. */ static public void mainEh(String args[], ErrorHandler eh, ARPEventHandler ap) { boolean doneOne = false; startMem = -1; andMeToo = ap; //SH sh = new SH(); int i; arp = new ARP(); ARPHandlers handlers = arp.getHandlers(); handlers.setStatementHandler(getSH(true)); // arp.getOptions().setEmbedding(true); if (ap != null) { handlers.setNamespaceHandler(ap); handlers.setExtendedHandler(ap); } if (eh != null) handlers.setErrorHandler(eh); for (i = 0; i < args.length - 1; i++) { if (args[i].startsWith("-")) { i += processOpts(args[i].substring(1), args[i + 1]); } else { doneOne = true; process(args[i]); } } if (args.length > 0) { if (args[i].startsWith("-")) { if (doneOne || processOpts(args[i].substring(1), "100") == 1) usage(); } else { doneOne = true; process(args[i]); } } if (!doneOne) { process(System.in, "http://example.org/stdin", "standard input"); } if ( startMem != -1) { rt.gc(); System.out.println(rt.totalMemory()-rt.freeMemory()-startMem); rt.gc(); System.out.println(rt.totalMemory()-rt.freeMemory()-startMem); rt.gc(); System.out.println(rt.totalMemory()-rt.freeMemory()-startMem); rt.gc(); System.out.println(rt.totalMemory()-rt.freeMemory()-startMem); } } /** * @param b false for quiet. */ private static StatementHandler getSH(boolean b) { StatementHandler rslt = b?(StatementHandler)new SH(System.out):new NoSH(); if (andMeToo!=null) rslt = new TwoSH(rslt,andMeToo); return rslt; } static private void lineNumber() { if (numbers) { Locator locator = arp.getLocator(); if (locator != null) print( "# " + locator.getSystemId() + ":" + locator.getLineNumber() + "(" + locator.getColumnNumber() + ")\n"); } } /* * Options: * -x Lax, Warnings suppressed * -s Strict, Warnings are errors * -f All errors are.error. * -u Suppress unqualified attribute warnings * -t Error checking only, no n-triple output * -b: set xml:base (same for all files?) * -e: convert numbered warnings to errors * -i: suppress numbered warnings * -w: convert numbered errors/suppressed warnings to warnings * -n: give line numbers * */ static void usage() { System.err.println( "java <class-path> " + NTriple.class.getName() + " ( [ -[xstfurR]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )... "); System.err.println( " All options, files and URLs can be intemingled in any order."); System.err.println(" They are processed from left-to-right."); System.err.println( " file Converts RDF/XML file into N-triples"); System.err.println( " url Converts RDF/XML from URL into N-triples"); System.err.println(" -b uri Sets XML Base to the absolute URI."); System.err.println( " -r Content is RDF (default, no embedding, rdf:RDF tag may be omitted)."); System.err.println( " -R RDF embedded in XML document, search for obligatory rdf:RDF start element."); System.err.println( " -t No n-triple output, error checking only."); System.err.println(" -x Lax mode - warnings are suppressed."); System.err.println( " -s Strict mode - most warnings are errors."); System.err.println(" -n Show line and column numbers."); System.err.println( " -u Allow unqualified attributes (defaults to warning)."); System.err.println( " -f All errors are.error - report first one only."); System.err.println(" -b url Sets XML Base to the absolute url."); System.err.println(" -e NNN[,NNN...]"); System.err.println( " Treats numbered warning conditions as errrors."); System.err.println(" -w NNN[,NNN...]"); System.err.println( " Treats numbered error conditions as warnings."); System.err.println(" -i NNN[,NNN...]"); System.err.println( " Ignores numbered error/warning conditions."); System.exit(1); } static final private Runtime rt = Runtime.getRuntime(); static private int startMem = -1; static private int processOpts(String opts, String nextArg) { boolean usedNext = false; ARPOptions options = arp.getOptions(); for (int i = 0; i < opts.length(); i++) { char opt = opts.charAt(i); if ("beiwD".indexOf(opt) != -1) { if (usedNext) usage(); usedNext = true; } switch (opt) { case 'D': final int nStatements = Integer.parseInt(nextArg); rt.gc(); rt.gc(); startMem = (int)(rt.totalMemory()-rt.freeMemory()); arp.getHandlers().setStatementHandler(new StatementHandler(){ int debugC = 0; @Override public void statement(AResource subj, AResource pred, AResource obj) { statement(null,null,(ALiteral)null); } @Override public void statement(AResource subj, AResource pred, ALiteral lit) { if (++debugC%100 == 0) { System.out.println("T: " + debugC); rt.gc(); System.out.println("M1: "+ (rt.totalMemory()-rt.freeMemory()-startMem)); rt.gc(); System.out.println("M2: " + (rt.totalMemory()-rt.freeMemory()-startMem)); } if ( debugC == 1 ){ rt.gc(); rt.gc(); startMem = (int)(rt.totalMemory()-rt.freeMemory()); } if (debugC == nStatements) { rt.gc(); System.err.println("Kill me now."); try { Thread.sleep(200000); } catch (Exception e){ // ignore } } } }); break; case 'x' : options.setLaxErrorMode(); break; case 's' : options.setStrictErrorMode(); break; case 't' : arp.getHandlers().setStatementHandler(getSH(false)); break; case 'r' : options.setEmbedding(false); break; case 'R' : options.setEmbedding(true); break; case 'n' : numbers = true; break; case 'E': arp.getHandlers().setErrorHandler(new ErrorHandler(){ @Override public void warning(SAXParseException exception) { /* ignore */ } @Override public void error(SAXParseException exception) { /* ignore */ } @Override public void fatalError(SAXParseException exception) { /* ignore */ } }); arp.setBadStatementHandler(new SH(System.err)); break; case 'b' : xmlBase = nextArg; break; case 'e' : setErrorMode(nextArg, EM_ERROR); break; case 'i' : setErrorMode(nextArg, EM_IGNORE); break; case 'w' : setErrorMode(nextArg, EM_WARNING); break; case 'f' : for (int j = 0; j < 400; j++) { if (options.setErrorMode(j, -1) == EM_ERROR) options.setErrorMode(j, EM_FATAL); } break; case 'u' : options.setErrorMode(WARN_UNQUALIFIED_ATTRIBUTE, EM_IGNORE); options.setErrorMode(WARN_UNQUALIFIED_RDF_ATTRIBUTE, EM_IGNORE); break; default : usage(); } } return usedNext ? 1 : 0; } static private void setErrorMode(String numbers, int mode) { int n[] = new int[3]; int j = 0; numbers += ","; for (int i = 0; i < numbers.length(); i++) { char c = numbers.charAt(i); switch (c) { case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : if (j == 3) usage(); n[j++] = c - '0'; break; case ' ' : case ';' : case ',' : if (i == 0) usage(); switch (j) { case 0 : break; case 3 : arp.getOptions().setErrorMode( n[0] * 100 + n[1] * 10 + n[2], mode); j = 0; break; default : usage(); } break; default : usage(); } } } @SuppressWarnings("resource") static private void process(String surl) { InputStream in = null ; URL url; String baseURL; try { File ff = new File(surl); in = new FileInputStream(ff); url = ff.toURI().toURL() ; baseURL = url.toExternalForm(); if (baseURL.startsWith("file:/") && !baseURL.startsWith("file://")) { baseURL = "file://" + baseURL.substring(5); } } catch (Exception ignore) { try { url = new URL(surl); in = url.openStream(); baseURL = url.toExternalForm(); } catch (Exception e) { System.err.println("ARP: Failed to open: " + surl); System.err.println( " " + ParseException.formatMessage(ignore)); System.err.println(" " + ParseException.formatMessage(e)); return; } } process(in, baseURL, surl); try { in.close() ; } catch (IOException ex) {} } static private void process(InputStream in, String xmlBasex, String surl) { String xmlBasey = xmlBase == null ? xmlBasex : xmlBase; try { arp.load(in, xmlBasey); } catch (IOException e) { System.err.println( "Error: " + surl + ": " + ParseException.formatMessage(e)); } catch (SAXParseException e) { // already reported. } catch (SAXException sax) { System.err.println( "Error: " + surl + ": " + ParseException.formatMessage(sax)); } } private static class TwoSH implements StatementHandler { final StatementHandler a, b; @Override public void statement(AResource subj, AResource pred, AResource obj) { a.statement(subj, pred, obj); b.statement(subj, pred, obj); } @Override public void statement(AResource subj, AResource pred, ALiteral lit) { a.statement(subj, pred, lit); b.statement(subj, pred, lit); } TwoSH(StatementHandler A, StatementHandler B) { a = A; b = B; } } private static class NoSH implements StatementHandler { // private int ix = 0; // private void userData(AResource n){ // if (n.isAnonymous()) { //// n.setUserData(new Integer(ix++)); // } // } @Override public void statement(AResource subj, AResource pred, AResource obj) { // userData(subj); // userData(pred); // userData(obj); } @Override public void statement(AResource subj, AResource pred, ALiteral lit) { // userData(subj); // userData(pred); } } private static class SH implements StatementHandler { PrintStream out; SH(PrintStream out){ this.out = out; } @Override public void statement(AResource subj, AResource pred, AResource obj) { lineNumber(); resource(subj); resource(pred); resource(obj); line.append('.'); out.println(line); line.setLength(0); } @Override public void statement(AResource subj, AResource pred, ALiteral lit) { // String lang = lit.getLang(); // String parseType = lit.getParseType(); lineNumber(); /* if (parseType != null) { System.out.print("# "); if (parseType != null) System.out.print("'" + parseType + "'"); System.out.println(); } */ resource(subj); resource(pred); literal(lit); line.append('.'); out.println(line); line.setLength(0); } } static private void print(String s) { line.append(s); } static private void resource(AResource r) { if (r.isAnonymous()) { print("_:j"); print(escapeNTriple(r.getAnonymousID())); print(" "); } else { print("<"); escapeURI(r.getURI()); print("> "); } } static private Pattern ntripleBnode=Pattern.compile("[a-zA-Y0-9]*"); /** * * Replace any non-legal char (or Z) with ZNN where NN are the hex codes * in UTF-8 * @param anonymousID Is something that corresponds to an XMLName * @return an ascii string that is legal NTriple */ public static String escapeNTriple(String anonymousID) { Matcher matcher = ntripleBnode.matcher(anonymousID); if (matcher.matches()) return anonymousID; matcher.reset(); StringBuilder rslt = new StringBuilder(); int lastNotMatched = 0; while (matcher.find()) { String unmatched = anonymousID.substring(lastNotMatched, matcher.start()); rslt.append(escapeUTF8(unmatched)); lastNotMatched = matcher.end(); rslt.append(matcher.group()); } rslt.append(escapeUTF8(anonymousID.substring(lastNotMatched))); return rslt.toString(); } private static StringBuffer escapeUTF8(String str) { StringBuffer rslt = new StringBuffer(); try { for (byte b : str.getBytes("utf-8")) { rslt.append("Z"); if ((0xff&b)<16) { rslt.append("0"); } rslt.append(Integer.toHexString(0xff&b)); } } catch (UnsupportedEncodingException e) { throw new Error(e); } return rslt; } static private void escape(String s) { int lg = s.length(); for (int i = 0; i < lg; i++) { char ch = s.charAt(i); switch (ch) { case '\\' : print("\\\\"); break; case '"' : print("\\\""); break; case '\n' : print("\\n"); break; case '\r' : print("\\r"); break; case '\t' : print("\\t"); break; default : if (ch >= 32 && ch <= 126) line.append(ch); else { print("\\u"); String hexstr = Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); int pad = 4 - hexstr.length(); for (; pad > 0; pad--) print("0"); print(hexstr); } } } } static private boolean okURIChars[] = new boolean[128]; static { for (int i = 32; i < 127; i++) okURIChars[i] = true; okURIChars['<'] = false; okURIChars['>'] = false; okURIChars['\\'] = false; } static private void escapeURI(String s) { int lg = s.length(); for (int i = 0; i < lg; i++) { char ch = s.charAt(i); if (ch < okURIChars.length && okURIChars[ch]) { line.append(ch); } else { print("\\u"); String hexstr = Integer.toHexString(ch).toUpperCase(); int pad = 4 - hexstr.length(); for (; pad > 0; pad--) print("0"); print(hexstr); } } } static private void literal(ALiteral l) { //if (l.isWellFormedXML()) // System.out.print("xml"); line.append('"'); escape(l.toString()); line.append('"'); String lang = l.getLang(); if (lang != null && !lang.equals("")) { line.append('@'); print(lang); } String dt = l.getDatatypeURI(); if (dt != null && !dt.equals("")) { print("^^<"); escapeURI(dt); line.append('>'); } line.append(' '); } }