/* Copyright (c) Jython Developers */ package org.python.modules._csv; import org.python.core.ArgParser; import org.python.core.ClassDictInit; import org.python.core.Py; import org.python.core.PyBaseString; import org.python.core.PyDictionary; import org.python.core.PyException; import org.python.core.PyInteger; import org.python.core.PyObject; import org.python.core.PyString; import org.python.core.PyStringMap; /** * The Python _csv module. * * Provides the low-level underpinnings of a CSV reading/writing module. Users should not * use this module directly, but import the csv.py module instead. */ public class _csv implements ClassDictInit { public static PyString __doc__ = Py.newString( "CSV parsing and writing.\n" + "\n" + "This module provides classes that assist in the reading and writing\n" + "of Comma Separated Value (CSV) files, and implements the interface\n" + "described by PEP 305. Although many CSV files are simple to parse,\n" + "the format is not formally defined by a stable specification and\n" + "is subtle enough that parsing lines of a CSV file with something\n" + "like line.split(\",\") is bound to fail. The module supports three\n" + "basic APIs: reading, writing, and registration of dialects.\n" + "\n" + "\n" + "DIALECT REGISTRATION:\n" + "\n" + "Readers and writers support a dialect argument, which is a convenient\n" + "handle on a group of settings. When the dialect argument is a string,\n" + "it identifies one of the dialects previously registered with the module.\n" + "If it is a class or instance, the attributes of the argument are used as\n" + "the settings for the reader or writer:\n" + "\n" + " class excel:\n" + " delimiter = ','\n" + " quotechar = '\"'\n" + " escapechar = None\n" + " doublequote = True\n" + " skipinitialspace = False\n" + " lineterminator = '\r\n'\n" + " quoting = QUOTE_MINIMAL\n" + "\n" + "SETTINGS:\n" + "\n" + " * quotechar - specifies a one-character string to use as the \n" + " quoting character. It defaults to '\"'.\n" + " * delimiter - specifies a one-character string to use as the \n" + " field separator. It defaults to ','.\n" + " * skipinitialspace - specifies how to interpret whitespace which\n" + " immediately follows a delimiter. It defaults to False, which\n" + " means that whitespace immediately following a delimiter is part\n" + " of the following field.\n" + " * lineterminator - specifies the character sequence which should \n" + " terminate rows.\n" + " * quoting - controls when quotes should be generated by the writer.\n" + " It can take on any of the following module constants:\n" + "\n" + " csv.QUOTE_MINIMAL means only when required, for example, when a\n" + " field contains either the quotechar or the delimiter\n" + " csv.QUOTE_ALL means that quotes are always placed around fields.\n" + " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" + " fields which do not parse as integers or floating point\n" + " numbers.\n" + " csv.QUOTE_NONE means that quotes are never placed around fields.\n" + " * escapechar - specifies a one-character string used to escape \n" + " the delimiter when quoting is set to QUOTE_NONE.\n" + " * doublequote - controls the handling of quotes inside fields. When\n" + " True, two consecutive quotes are interpreted as one during read,\n" + " and when writing, each quote character embedded in the data is\n" + " written as two quotes\n" + "\n"); // XXX: should be per PySystemState /** Dialect registry. */ public static PyDictionary _dialects = new PyDictionary(); // XXX: should be per PySystemState /** Max parsed field size */ public static volatile int field_limit = 128 * 1024; /** _csv.Error exception. */ public static final PyObject Error = Py.makeClass("Error", Py.Exception, exceptionNamespace()); public static PyException Error(String message) { return new PyException(Error, message); } /** Module version. */ public static PyString __version__ = new PyString("1.0"); public static void classDictInit(PyObject dict) { dict.__setitem__("__name__", Py.newString("_csv")); dict.__setitem__("__doc__", __doc__); dict.__setitem__("Dialect", PyDialect.TYPE); dict.__setitem__("Error", Error); for (QuoteStyle style : QuoteStyle.values()) { dict.__setitem__(style.name(), Py.newInteger(style.ordinal())); } dict.__setitem__("classDictInit", null); dict.__setitem__("field_limit", null); } public static void register_dialect(PyObject[] args, String[] keywords) { int argc = args.length - keywords.length; if (argc > 2) { throw Py.TypeError("register_dialect() expected at most 2 arguments, got " + argc); } ArgParser ap = parseArgs("register_dialect", args, keywords); PyObject name = ap.getPyObject(0); PyObject dialect = ap.getPyObject(1, null); if (!(name instanceof PyBaseString)) { throw Py.TypeError("dialect name must be a string or unicode"); } _dialects.__setitem__(name, dialectFromKwargs(dialect, args, keywords)); return; } public static void unregister_dialect(PyObject name) { if (!_dialects.has_key(name)) { throw Error("unknown dialect"); } _dialects.__delitem__(name); } public static PyObject get_dialect(PyObject name) { return get_dialect_from_registry(name); } public static PyObject list_dialects() { return _dialects.keys(); } public static PyObject reader(PyObject[] args, String[] keywords) { ArgParser ap = parseArgs("reader", args, keywords); PyObject iterator = Py.iter(ap.getPyObject(0), "argument 1 must be an iterator"); PyObject dialect = ap.getPyObject(1, null); return new PyReader(iterator, dialectFromKwargs(dialect, args, keywords)); } public static PyObject writer(PyObject[] args, String[] keywords) { ArgParser ap = parseArgs("writer", args, keywords); PyObject outputFile = ap.getPyObject(0); PyObject dialect = ap.getPyObject(1, null); PyObject writeline = outputFile.__findattr__("write"); if (writeline == null || !writeline.isCallable()) { throw Py.TypeError("argument 1 must have a \"write\" method"); } return new PyWriter(writeline, dialectFromKwargs(dialect, args, keywords)); } public static PyInteger field_size_limit() { return Py.newInteger(field_limit); } public static PyInteger field_size_limit(PyObject new_limit) { if (!(new_limit instanceof PyInteger)) { throw Py.TypeError("limit must be an integer"); } int old_limit = field_limit; field_limit = new_limit.asInt(); return Py.newInteger(old_limit); } static PyObject get_dialect_from_registry(PyObject name) { PyObject dialect = _dialects.__finditem__(name); if (dialect == null) { throw Error("unknown dialect"); } return dialect; } /** * Return an ArgParser that ignores keyword args. */ private static ArgParser parseArgs(String funcName, PyObject[] args, String[] keywords) { // XXX: _weakref.ReferenceType has the same code if (keywords.length > 0) { int argc = args.length - keywords.length; PyObject[] justArgs = new PyObject[argc]; System.arraycopy(args, 0, justArgs, 0, argc); args = justArgs; } return new ArgParser(funcName, args, Py.NoKeywords, Py.NoKeywords); } /** * Return a Dialect instance created or updated from keyword arguments. */ private static PyDialect dialectFromKwargs(PyObject dialect, PyObject[] args, String[] keywords) { PyObject[] dialectArgs; int argc = args.length - keywords.length; // was a dialect keyword specified? boolean dialectKeyword = false; for (String keyword : keywords) { if (keyword.equals("dialect")) { dialectKeyword = true; } } if (dialect == null || dialectKeyword) { // dialect wasn't passed as a positional arg dialectArgs = new PyObject[keywords.length]; System.arraycopy(args, argc, dialectArgs, 0, keywords.length); } else { // have dialect -- pass it to dialect_new as a positional arg dialectArgs = new PyObject[1 + keywords.length]; dialectArgs[0] = dialect; System.arraycopy(args, argc, dialectArgs, 1, keywords.length); } return (PyDialect)PyDialect.TYPE.__call__(dialectArgs, keywords); } private static PyObject exceptionNamespace() { PyObject dict = new PyStringMap(); dict.__setitem__("__module__", new PyString("_csv")); return dict; } }