/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.configuration; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import javax.xml.bind.JAXBElement; import org.apache.metamodel.DataContext; import org.apache.metamodel.MetaModelHelper; import org.apache.metamodel.data.DataSet; import org.apache.metamodel.data.Row; import org.apache.metamodel.pojo.ArrayTableDataProvider; import org.apache.metamodel.pojo.TableDataProvider; import org.apache.metamodel.query.Query; import org.apache.metamodel.schema.Column; import org.apache.metamodel.schema.ColumnType; import org.apache.metamodel.schema.ColumnTypeImpl; import org.apache.metamodel.schema.Schema; import org.apache.metamodel.schema.Table; import org.apache.metamodel.util.CollectionUtils; import org.apache.metamodel.util.SimpleTableDef; import org.datacleaner.configuration.jaxb.AbstractDatastoreType; import org.datacleaner.configuration.jaxb.PojoDatastoreType; import org.datacleaner.configuration.jaxb.PojoTableType; import org.datacleaner.configuration.jaxb.PojoTableType.Columns; import org.datacleaner.configuration.jaxb.PojoTableType.Rows; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.DatastoreConnection; import org.datacleaner.connection.PojoDatastore; import org.datacleaner.util.CollectionUtils2; import org.datacleaner.util.ReflectionUtils; import org.datacleaner.util.StringUtils; import org.datacleaner.util.convert.StringConverter; import org.datacleaner.util.xml.XmlUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * Convenient utility class for reading and writing POJO datastores from and to * XML (JAXB) elements. */ public class JaxbPojoDatastoreAdaptor { private static final Logger logger = LoggerFactory.getLogger(JaxbPojoDatastoreAdaptor.class); private final StringConverter _converter; /** * @deprecated use the * {@link #JaxbPojoDatastoreAdaptor(DataCleanerConfiguration)} * constructor instead */ @Deprecated public JaxbPojoDatastoreAdaptor() { _converter = StringConverter.simpleInstance(); } public JaxbPojoDatastoreAdaptor(final DataCleanerConfiguration configuration) { _converter = new StringConverter(configuration); } public PojoDatastore read(final PojoDatastoreType pojoDatastore) { final String name = pojoDatastore.getName(); final String schemaName = (pojoDatastore.getSchemaName() == null ? name : pojoDatastore.getSchemaName()); final List<TableDataProvider<?>> tableDataProviders = new ArrayList<>(); final List<PojoTableType> tables = pojoDatastore.getTable(); for (final PojoTableType table : tables) { final String tableName = table.getName(); final List<Columns.Column> columns = table.getColumns().getColumn(); final int columnCount = columns.size(); final String[] columnNames = new String[columnCount]; final ColumnType[] columnTypes = new ColumnType[columnCount]; for (int i = 0; i < columnCount; i++) { final Columns.Column column = columns.get(i); columnNames[i] = column.getName(); columnTypes[i] = ColumnTypeImpl.valueOf(column.getType()); } final SimpleTableDef tableDef = new SimpleTableDef(tableName, columnNames, columnTypes); final Collection<Object[]> arrays = new ArrayList<>(); final Rows rowsType = table.getRows(); if (rowsType != null) { final List<Rows.Row> rows = rowsType.getRow(); for (final Rows.Row row : rows) { final List<Object> values = row.getV(); if (values.size() != columnCount) { throw new IllegalStateException( "Row value count is not equal to column count in datastore '" + name + "'. Expected " + columnCount + " values, found " + values.size() + " (table " + tableName + ", row no. " + arrays.size() + ")"); } final Object[] array = new Object[columnCount]; for (int i = 0; i < array.length; i++) { final Class<?> expectedClass = columnTypes[i].getJavaEquivalentClass(); final Object rawValue = values.get(i); final Object value = deserializeValue(rawValue, expectedClass); array[i] = value; } arrays.add(array); } } final TableDataProvider<?> tableDataProvider = new ArrayTableDataProvider(tableDef, arrays); tableDataProviders.add(tableDataProvider); } return new PojoDatastore(name, schemaName, tableDataProviders); } private Object deserializeValue(final Object value, final Class<?> expectedClass) { if (value == null) { return null; } if (value instanceof Node) { final Node node = (Node) value; logger.debug("Value is a DOM node: {}", node); return getNodeValue(node, expectedClass); } if (value instanceof JAXBElement) { final JAXBElement<?> element = (JAXBElement<?>) value; logger.debug("Value is a JAXBElement: {}", element); final Object jaxbValue = element.getValue(); return deserializeValue(jaxbValue, expectedClass); } if (value instanceof String) { final String str = (String) value; return _converter.deserialize(str, expectedClass); } else { throw new UnsupportedOperationException("Unknown value type: " + value); } } @SuppressWarnings("unchecked") private <T> T getNodeValue(final Node node, Class<T> expectedClass) { if (node.getNodeType() == Node.TEXT_NODE) { final String str = node.getNodeValue(); if (expectedClass == null) { // we will fall back to string class expectedClass = (Class<T>) String.class; } final Object result = _converter.deserialize(str, determineExpectedClass(node, expectedClass)); return (T) result; } // a top-level value final List<Node> childNodes = getChildNodes(node); if (childNodes.isEmpty()) { return null; } else if (childNodes.size() == 1 && childNodes.get(0).getNodeType() == Node.TEXT_NODE) { expectedClass = (Class<T>) determineExpectedClass(node, expectedClass); final Node child = childNodes.get(0); return getNodeValue(child, expectedClass); } if (expectedClass == null) { final Node firstChild = childNodes.get(0); if ("i".equals(firstChild.getNodeName())) { final List<Object> list = getNodeList(childNodes); return (T) list; } else if ("e".equals(firstChild.getNodeName())) { final Map<String, Object> map = getNodeMap(childNodes); return (T) map; } else { throw new UnsupportedOperationException( "Unexpected child nodes. First child: " + printNode(firstChild)); } } else if (ReflectionUtils.is(expectedClass, List.class)) { final List<Object> list = getNodeList(childNodes); return (T) list; } else if (ReflectionUtils.is(expectedClass, Map.class)) { final Map<String, Object> map = getNodeMap(childNodes); return (T) map; } else if (expectedClass.isArray()) { final List<Object> list = getNodeList(childNodes); final Class<?> componentType = expectedClass.getComponentType(); return (T) CollectionUtils2.toArray(list, componentType); } throw new UnsupportedOperationException("Not a value (v) node type: " + printNode(node)); } private Class<?> determineExpectedClass(final Node node, final Class<?> fallbackType) { final NamedNodeMap attributes = node.getAttributes(); if (attributes != null) { final Node attribute = attributes.getNamedItem("class"); if (attribute != null) { final String className = attribute.getTextContent(); if (!StringUtils.isNullOrEmpty(className)) { try { return Class.forName(className); } catch (final ClassNotFoundException e) { logger.error("Could not load class: " + className + ". Falling back to String type.", e); } } } } return fallbackType; } private List<Object> getNodeList(final List<Node> childNodes) { final List<Object> list = new ArrayList<>(); for (final Node childNode : childNodes) { final Object value = getNodeValue(childNode, null); list.add(value); } return list; } private List<Node> getChildNodes(final Node node) { final List<Node> list = new ArrayList<>(); final NodeList childNodes = node.getChildNodes(); for (int i = 0; i < childNodes.getLength(); i++) { final Node child = childNodes.item(i); switch (child.getNodeType()) { case Node.ELEMENT_NODE: list.add(child); // fallthru case Node.TEXT_NODE: final String text = child.getNodeValue(); if (!StringUtils.isNullOrEmpty(text)) { list.add(child); } break; default: // ignore } } return list; } private Map<String, Object> getNodeMap(final List<Node> entryNodes) { final Map<String, Object> map = new LinkedHashMap<>(); for (final Node entryNode : entryNodes) { final String entryNodeName = entryNode.getNodeName(); if (!"e".equals(entryNodeName)) { throw new UnsupportedOperationException( "Node passed as Map entry does not appear to be the right type: " + printNode(entryNode)); } String key = null; Object value = null; final List<Node> keyOrValueNodes = getChildNodes(entryNode); assert keyOrValueNodes.size() == 2; for (final Node keyOrValueNode : keyOrValueNodes) { final String keyOrValueNodeName = keyOrValueNode.getNodeName(); if ("k".equals(keyOrValueNodeName)) { key = getNodeValue(keyOrValueNode, String.class); } else if ("v".equals(keyOrValueNodeName)) { value = getNodeValue(keyOrValueNode, null); } } if (key == null) { throw new UnsupportedOperationException("Map key (k) node not set in entry: " + printNode(entryNode)); } map.put(key, value); } return map; } private String printNode(final Node node) { return XmlUtils.writeDocumentToString(node, false); } private org.datacleaner.configuration.jaxb.PojoTableType.Rows.Row createPojoRow(final Row row, final Document document) { final org.datacleaner.configuration.jaxb.PojoTableType.Rows.Row rowType = new org.datacleaner.configuration.jaxb.PojoTableType.Rows.Row(); final Object[] values = row.getValues(); for (final Object value : values) { final Element elem = document.createElement("v"); createPojoValue(value, elem, document, false); rowType.getV().add(elem); } return rowType; } private void createPojoValue(Object value, final Element elem, final Document document, final boolean explicitType) { if (value == null) { // return an empty element return; } if (value.getClass().isArray()) { final Class<?> componentType = value.getClass().getComponentType(); if (componentType.isPrimitive() || componentType == String.class) { // leave the array to be serialized using the string converter - // it // will take up much less space. } else { value = CollectionUtils.toList(value); } } if (value instanceof List) { final List<?> list = (List<?>) value; for (final Object item : list) { final Element itemElement = document.createElement("i"); createPojoValue(item, itemElement, document, true); elem.appendChild(itemElement); } return; } if (value instanceof Map) { final Map<?, ?> map = (Map<?, ?>) value; for (final Entry<?, ?> entry : map.entrySet()) { final Element keyElement = document.createElement("k"); createPojoValue(entry.getKey(), keyElement, document, true); final Element valueElement = document.createElement("v"); createPojoValue(entry.getValue(), valueElement, document, true); final Element entryElement = document.createElement("e"); entryElement.appendChild(keyElement); entryElement.appendChild(valueElement); elem.appendChild(entryElement); } return; } try { final String stringValue = _converter.serialize(value); elem.setTextContent(stringValue); if (explicitType) { elem.setAttribute("class", value.getClass().getName()); } } catch (final RuntimeException e) { logger.warn("Failed to serialize value: " + value + ". Returning null.", e); } return; } private org.datacleaner.configuration.jaxb.PojoTableType.Columns.Column createPojoColumn(final String name, final ColumnType type) { final org.datacleaner.configuration.jaxb.PojoTableType.Columns.Column columnType = new org.datacleaner.configuration.jaxb.PojoTableType.Columns.Column(); columnType.setName(name); columnType.setType(type.toString()); return columnType; } public PojoTableType createPojoTable(final DataContext dataContext, final Table table, final Column[] usedColumns, final int maxRows) { final PojoTableType tableType = new PojoTableType(); tableType.setName(table.getName()); // read columns final Columns columnsType = new Columns(); for (final Column column : usedColumns) { columnsType.getColumn().add(createPojoColumn(column.getName(), column.getType())); } tableType.setColumns(columnsType); if (maxRows > 0) { // read values final Query q = dataContext.query().from(table).select(usedColumns).toQuery(); q.setMaxRows(maxRows); final Document document = XmlUtils.createDocument(); final Rows rowsType = new Rows(); try (DataSet ds = dataContext.executeQuery(q)) { while (ds.next()) { final Row row = ds.getRow(); rowsType.getRow().add(createPojoRow(row, document)); } } tableType.setRows(rowsType); } return tableType; } public AbstractDatastoreType createPojoDatastore(final String datastoreName, final String schemaName, final Collection<PojoTableType> tables) { final PojoDatastoreType datastoreType = new PojoDatastoreType(); datastoreType.setName(datastoreName); datastoreType.setSchemaName(schemaName); datastoreType.getTable().addAll(tables); return datastoreType; } /** * Creates a serialized POJO copy of a datastore. * * @param datastore * the datastore to copy * @param columns * the columns to include, or null if all tables/columns should * be included. * @param maxRowsToQuery * the maximum number of records to query and include in the * datastore copy. Keep this number reasonably low, or else the * copy might cause out-of-memory issues (Both while reading and * writing). * @return */ public AbstractDatastoreType createPojoDatastore(final Datastore datastore, final Set<Column> columns, final int maxRowsToQuery) { final PojoDatastoreType datastoreType = new PojoDatastoreType(); datastoreType.setName(datastore.getName()); datastoreType.setDescription(datastore.getDescription()); try (DatastoreConnection con = datastore.openConnection()) { final DataContext dataContext = con.getDataContext(); final Schema schema; final Table[] tables; if (columns == null || columns.isEmpty()) { schema = dataContext.getDefaultSchema(); tables = schema.getTables(); } else { tables = MetaModelHelper.getTables(columns); // TODO: There's a possibility that tables span multiple // schemas, but we cannot currently support that in a // PojoDatastore, so we just pick the first and cross our // fingers. schema = tables[0].getSchema(); } datastoreType.setSchemaName(schema.getName()); for (final Table table : tables) { final Column[] usedColumns; if (columns == null || columns.isEmpty()) { usedColumns = table.getColumns(); } else { usedColumns = MetaModelHelper.getTableColumns(table, columns); } final PojoTableType tableType = createPojoTable(dataContext, table, usedColumns, maxRowsToQuery); datastoreType.getTable().add(tableType); } } return datastoreType; } }