/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.util; import java.beans.PropertyDescriptor; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectStreamClass; import java.io.ObjectStreamField; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import org.apache.metamodel.util.EqualsBuilder; import org.apache.metamodel.util.HasName; import org.apache.metamodel.util.LegacyDeserializationObjectInputStream; import org.datacleaner.api.AnalyzerResult; import org.datacleaner.api.AnalyzerResultReducer; import org.datacleaner.api.ComponentCategory; import org.datacleaner.api.HasAnalyzerResult; import org.datacleaner.api.InputColumn; import org.datacleaner.api.InputRow; import org.datacleaner.api.Metric; import org.datacleaner.api.Renderable; import org.datacleaner.connection.Datastore; import org.datacleaner.connection.SchemaNavigator; import org.datacleaner.descriptors.MetricDescriptor; import org.datacleaner.job.ComponentConfiguration; import org.datacleaner.job.ComponentJob; import org.datacleaner.job.ImmutableComponentConfiguration; import org.datacleaner.reference.TextFileDictionary; import org.datacleaner.reference.TextFileSynonymCatalog; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * {@link ObjectInputStream} implementation that is aware of changes such as * class or package renaming. This can be used to deserialize classes with * historic/legacy class names. * * Furthermore the deserialization mechanism is aware of multiple * {@link ClassLoader}s. This means that if the object being deserialized * pertains to a different {@link ClassLoader}, then this classloader can be * added using the {@link #addClassLoader(ClassLoader)} method. * * */ public class ChangeAwareObjectInputStream extends LegacyDeserializationObjectInputStream { private static final Logger logger = LoggerFactory.getLogger(ChangeAwareObjectInputStream.class); /** * Table mapping primitive type names to corresponding class objects. As * defined in {@link ObjectInputStream}. */ private static final Map<String, Class<?>> PRIMITIVE_CLASSES = new HashMap<>(8, 1.0F); /** * Since the change from eobjects.org MetaModel to Apache MetaModel, a lot * of interfaces (especially those that extend {@link HasName}) have * transparently changed their serialization IDs. */ private static final Set<String> INTERFACES_WITH_SERIAL_ID_CHANGES = new HashSet<>(); private static final Comparator<String> packageNameComparator = (o1, o2) -> { if (EqualsBuilder.equals(o1, o2)) { return 0; } // use length as the primary differentiator, to make sure long // packages are placed before short ones. int diff = o1.length() - o2.length(); if (diff == 0) { diff = o1.compareTo(o2); } return diff; }; static { PRIMITIVE_CLASSES.put("boolean", boolean.class); PRIMITIVE_CLASSES.put("byte", byte.class); PRIMITIVE_CLASSES.put("char", char.class); PRIMITIVE_CLASSES.put("short", short.class); PRIMITIVE_CLASSES.put("int", int.class); PRIMITIVE_CLASSES.put("long", long.class); PRIMITIVE_CLASSES.put("float", float.class); PRIMITIVE_CLASSES.put("double", double.class); PRIMITIVE_CLASSES.put("void", void.class); INTERFACES_WITH_SERIAL_ID_CHANGES.add(InputColumn.class.getName()); INTERFACES_WITH_SERIAL_ID_CHANGES.add(ComponentJob.class.getName()); INTERFACES_WITH_SERIAL_ID_CHANGES.add(Datastore.class.getName()); INTERFACES_WITH_SERIAL_ID_CHANGES.add(MetricDescriptor.class.getName()); INTERFACES_WITH_SERIAL_ID_CHANGES.add(PropertyDescriptor.class.getName()); INTERFACES_WITH_SERIAL_ID_CHANGES.add(ComponentCategory.class.getName()); INTERFACES_WITH_SERIAL_ID_CHANGES.add("org.datacleaner.beans.writers.WriteDataResult"); } private final List<ClassLoader> additionalClassLoaders; private final Map<String, String> renamedPackages; private final Map<String, String> renamedClasses; public ChangeAwareObjectInputStream(final InputStream in) throws IOException { super(in); renamedPackages = new TreeMap<>(packageNameComparator); renamedClasses = new HashMap<>(); additionalClassLoaders = new ArrayList<>(); // add analyzerbeans' own renamed classes addRenamedClass("org.datacleaner.reference.TextBasedDictionary", TextFileDictionary.class); addRenamedClass("org.datacleaner.reference.TextBasedSynonymCatalog", TextFileSynonymCatalog.class); // analyzer results moved as of ticket #843 addRenamedClass("org.datacleaner.result.PatternFinderResult", "org.datacleaner.beans.stringpattern.PatternFinderResult"); addRenamedClass("org.datacleaner.result.DateGapAnalyzerResult", "org.datacleaner.beans.dategap.DateGapAnalyzerResult"); addRenamedClass("org.datacleaner.util.TimeInterval", "org.datacleaner.beans.dategap.TimeInterval"); addRenamedClass("org.datacleaner.result.StringAnalyzerResult", "org.datacleaner.beans.StringAnalyzerResult"); addRenamedClass("org.datacleaner.result.NumberAnalyzerResult", "org.datacleaner.beans.NumberAnalyzerResult"); addRenamedClass("org.datacleaner.result.BooleanAnalyzerResult", "org.datacleaner.beans.BooleanAnalyzerResult"); addRenamedClass("org.datacleaner.result.DateAndTimeAnalyzerResult", "org.datacleaner.beans.DateAndTimeAnalyzerResult"); // analyzer results Scala migrations compat-classes as of ticket #1268 addRenamedClass("org.datacleaner.visualization.DensityAnalyzerResult", "org.datacleaner.visualization.ScalaDensityAnalyzerResult"); addRenamedClass("org.datacleaner.visualization.ScatterAnalyzerResult", "org.datacleaner.visualization.ScalaScatterAnalyzerResult"); addRenamedClass("org.datacleaner.visualization.StackedAreaAnalyzerResult", "org.datacleaner.visualization.ScalaStackedAreaAnalyzerResult"); // analyzer results moved as of ticket #993 addRenamedClass("org.datacleaner.result.ValueDistributionGroupResult", "org.datacleaner.beans.valuedist.SingleValueDistributionResult"); addRenamedClass("org.datacleaner.result.ValueDistributionResult", "org.datacleaner.beans.valuedist.GroupedValueDistributionResult"); addRenamedClass("org.datacleaner.beans.valuedist.ValueDistributionGroupResult", "org.datacleaner.beans.valuedist.SingleValueDistributionResult"); addRenamedClass("org.datacleaner.beans.valuedist.ValueDistributionResult", "org.datacleaner.beans.valuedist.GroupedValueDistributionResult"); addRenamedClass("org.datacleaner.beans.valuedist.ValueCount", "org.datacleaner.result.SingleValueFrequency"); addRenamedClass("org.datacleaner.result.ValueCount", "org.datacleaner.result.SingleValueFrequency"); addRenamedClass("org.datacleaner.beans.valuedist.ValueCountList", "org.datacleaner.result.ValueCountList"); addRenamedClass("org.datacleaner.beans.valuedist.ValueCountListImpl", "org.datacleaner.result.ValueCountListImpl"); // duplicate detection analyzer changed final String duplicateDetectionClassName = "com.hi.hiqmr.packaging.datacleaner.deduplication.DuplicateDetectionAnalyzer"; addRenamedClass("com.hi.contacts.datacleaner.DuplicateDetectionAnalyzer", duplicateDetectionClassName); addRenamedClass("com.hi.hiqmr.datacleaner.deduplication.Identify7DeduplicationAnalyzer", duplicateDetectionClassName); addRenamedClass("com.hi.hiqmr.datacleaner.deduplication.DuplicateDetectionAnalyzer", duplicateDetectionClassName); addRenamedClass("com.hi.hiqmr.deduplication.DuplicateDetectionAnalyzer", duplicateDetectionClassName); addRenamedPackage("com.hi.contacts.security", "com.hi.common.client.security"); // Classes moved in DC 4.0 addRenamedClass("org.datacleaner.data.InputRow", InputRow.class); addRenamedClass("org.datacleaner.data.InputColumn", InputColumn.class); addRenamedClass("org.datacleaner.result.Metric", Metric.class); addRenamedClass("org.datacleaner.job.BeanConfiguration", ComponentConfiguration.class); addRenamedClass("org.datacleaner.job.ImmutableBeanConfiguration", ImmutableComponentConfiguration.class); addRenamedClass("org.datacleaner.descriptors.AnnotationBasedAnalyzerBeanDescriptor", "org.datacleaner.descriptors.AnnotationBasedAnalyzerComponentDescriptor"); addRenamedClass("org.datacleaner.descriptors.AnnotationBasedTransformerBeanDescriptor", "org.datacleaner.descriptors.AnnotationBasedTransformerComponentDescriptor"); addRenamedClass("org.datacleaner.descriptors.AnnotationBasedFilterBeanDescriptor", "org.datacleaner.descriptors.AnnotationBasedFilterComponentDescriptor"); addRenamedClass("org.datacleaner.descriptors.AbstractHasAnalyzerResultBeanDescriptor", "org.datacleaner.descriptors.AbstractHasAnalyzerResultComponentDescriptor"); addRenamedClass("org.datacleaner.descriptors.AbstractBeanDescriptor", "org.datacleaner.descriptors.AbstractComponentDescriptor"); addRenamedClass("org.eobjects.analyzer.result.AnalyzerResult", AnalyzerResult.class); addRenamedClass("org.eobjects.analyzer.result.AnalyzerResultReducer", AnalyzerResultReducer.class); addRenamedClass("org.eobjects.analyzer.result.HasAnalyzerResult", HasAnalyzerResult.class); addRenamedClass("org.eobjects.analyzer.result.Metric", Metric.class); addRenamedClass("org.eobjects.analyzer.result.renderer.Renderable", Renderable.class); addRenamedClass("org.eobjects.analyzer.util.SchemaNavigator", SchemaNavigator.class); // General namespace change as of DC 4.0 addRenamedPackage("org.eobjects.datacleaner", "org.datacleaner"); addRenamedPackage("org.eobjects.analyzer", "org.datacleaner"); addRenamedPackage("org.datacleaner.beans.api", "org.datacleaner.api"); addRenamedPackage("org.datacleaner.beans.categories", "org.datacleaner.components.categories"); // Change from eobjects.org MetaModel to Apache MetaModel addRenamedPackage("org.eobjects.metamodel", "org.apache.metamodel"); // DataCleaner output writers package changed addRenamedPackage("org.datacleaner.output.beans", "org.datacleaner.extension.output"); // Monitor removal from OSS addRenamedPackage("org.datacleaner.monitor", "com.hi.datacleaner.monitor"); } public void addClassLoader(final ClassLoader classLoader) { additionalClassLoaders.add(classLoader); } public void addRenamedPackage(final String originalPackageName, final String newPackageName) { renamedPackages.put(originalPackageName, newPackageName); } public void addRenamedClass(final String originalClassName, final Class<?> newClass) { addRenamedClass(originalClassName, newClass.getName()); } public void addRenamedClass(final String originalClassName, final String newClassName) { renamedClasses.put(originalClassName, newClassName); } @Override protected ObjectStreamClass readClassDescriptor() throws IOException, ClassNotFoundException { final ObjectStreamClass resultClassDescriptor = super.readClassDescriptor(); final String originalClassName = resultClassDescriptor.getName(); final String className = getClassNameRenamed(originalClassName); if (className != originalClassName) { return getClassDescriptor(className, false, resultClassDescriptor); } if (INTERFACES_WITH_SERIAL_ID_CHANGES.contains(originalClassName)) { return ObjectStreamClass.lookup(resolveClass(originalClassName, false)); } return resultClassDescriptor; } private ObjectStreamClass getClassDescriptor(final String className, final boolean checkRenames, final ObjectStreamClass originalClassDescriptor) throws ClassNotFoundException { if (originalClassDescriptor == null) { logger.warn("Original ClassDescriptor resolved to null for '{}'", className); } final Class<?> newClass = resolveClass(className, checkRenames); final ObjectStreamClass newClassDescriptor = ObjectStreamClass.lookupAny(newClass); if (newClassDescriptor == null) { logger.warn("New ClassDescriptor resolved to null for {}", newClass); } final String[] newFieldNames = getFieldNames(newClassDescriptor); final String[] originalFieldNames = getFieldNames(originalClassDescriptor); if (!EqualsBuilder.equals(originalFieldNames, newFieldNames)) { logger.warn("Field names of original and new class ({}) does not correspond!", className); // try to hack our way out of it by changing the value of the "name" // field in the ORIGINAL descriptor try { final Field field = ObjectStreamClass.class.getDeclaredField("name"); assert field != null; assert field.getType() == String.class; field.setAccessible(true); field.set(originalClassDescriptor, className); return originalClassDescriptor; } catch (final Exception e) { logger.error("Unsuccesful attempt at changing the name of the original class descriptor"); if (e instanceof RuntimeException) { throw (RuntimeException) e; } throw new IllegalStateException(e); } } return newClassDescriptor; } @Override protected Class<?> resolveClass(final ObjectStreamClass desc) throws IOException, ClassNotFoundException { final String className = desc.getName(); if (className.startsWith("org.eobjects.metamodel") || className.startsWith("[Lorg.eobjects.metamodel")) { return super.resolveClass(desc); } return resolveClass(className, true); } private Class<?> resolveClass(final String classNameParameter, final boolean checkRenames) throws ClassNotFoundException { logger.debug("Resolving class '{}'", classNameParameter); final String className; if (checkRenames) { className = getClassNameRenamed(classNameParameter); } else { className = classNameParameter; } try { return Class.forName(className); } catch (final ClassNotFoundException e) { final Class<?> primitiveClass = PRIMITIVE_CLASSES.get(className); if (primitiveClass != null) { return primitiveClass; } logger.info("Class '{}' was not resolved in main class loader.", className); final List<Exception> exceptions = new ArrayList<>(additionalClassLoaders.size()); for (final ClassLoader classLoader : additionalClassLoaders) { try { return Class.forName(className, true, classLoader); } catch (final ClassNotFoundException minorException) { logger.info("Class '{}' was not resolved in additional class loader '{}'", className, classLoader); exceptions.add(minorException); } } logger.warn("Could not resolve class of name '{}'", className); // if we reach this stage, all classloaders have failed, log their // issues int i = 1; for (final Exception exception : exceptions) { final int numExceptions = exceptions.size(); logger.error("Exception " + i + " of " + numExceptions, exception); i++; } throw e; } } private String getClassNameRenamed(final String className) { return getClassNameRenamed(className, true); } private String getClassNameRenamed(final String className, final boolean includeRenamedPackages) { // handle array definitions if (className.startsWith("[L")) { final String classNameWithoutArrayDef = className.substring(2, className.length() - 1); return "[L" + getClassNameRenamed(classNameWithoutArrayDef) + ";"; } // handle direct entries for renamed class final String directlyRenamedClassName = renamedClasses.get(className); if (directlyRenamedClassName != null) { logger.info("Class '{}' was encountered. Returning new class name: '{}'", className, directlyRenamedClassName); return directlyRenamedClassName; } if (includeRenamedPackages) { // handle renamed packages final Set<Entry<String, String>> entrySet = renamedPackages.entrySet(); for (final Entry<String, String> entry : entrySet) { final String legacyPackage = entry.getKey(); if (className.startsWith(legacyPackage)) { final String renamedClassName = className.replaceFirst(legacyPackage, entry.getValue()); logger.info("Class '{}' was encountered. Adapting to new class name: '{}'", className, renamedClassName); return getClassNameRenamed(renamedClassName, includeRenamedPackages); } } } // ok no rename happened return className; } private String[] getFieldNames(final ObjectStreamClass classDescriptor) { if (classDescriptor == null) { return new String[0]; } final ObjectStreamField[] fields = classDescriptor.getFields(); final String[] fieldNames = new String[fields.length]; for (int i = 0; i < fieldNames.length; i++) { fieldNames[i] = fields[i].getName(); } return fieldNames; } }