/* * #! * Ontopia Engine * #- * Copyright (C) 2001 - 2013 The Ontopia Project * #- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * !# */ package net.ontopia.topicmaps.utils; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import net.ontopia.infoset.core.LocatorIF; import net.ontopia.topicmaps.core.AssociationIF; import net.ontopia.topicmaps.core.AssociationRoleIF; import net.ontopia.topicmaps.core.TopicNameIF; import net.ontopia.topicmaps.core.OccurrenceIF; import net.ontopia.topicmaps.core.TMObjectIF; import net.ontopia.topicmaps.core.TopicIF; import net.ontopia.topicmaps.core.TopicMapIF; import net.ontopia.topicmaps.core.VariantNameIF; import net.ontopia.topicmaps.core.index.ClassInstanceIndexIF; import net.ontopia.utils.CollectionUtils; import net.ontopia.topicmaps.query.impl.utils.Prefetcher; /** * PUBLIC: A helper class that can remove duplicate objects from a topic map. * * @since 1.2 */ public class DuplicateSuppressionUtils { /** * PUBLIC: Remove all duplicates in the entire topic map. */ public static void removeDuplicates(TopicMapIF topicmap) { // remove duplicate topic characteristics int batchSize = 50; Iterator<TopicIF> it = topicmap.getTopics().iterator(); while (it.hasNext()) { List<TopicIF> batch = CollectionUtils.nextBatch(it, batchSize); prefetchTopics(topicmap, batch); Iterator<TopicIF> iter = batch.iterator(); while (iter.hasNext()) { removeDuplicates(iter.next()); } } // remove duplicate associations (do one association type at a time) ClassInstanceIndexIF cindex = (ClassInstanceIndexIF)topicmap.getIndex("net.ontopia.topicmaps.core.index.ClassInstanceIndexIF"); Collection<AssociationIF> assocs; Iterator<TopicIF> atypes = new ArrayList<TopicIF>(cindex.getAssociationTypes()).iterator(); while (atypes.hasNext()) { TopicIF atype = atypes.next(); assocs = cindex.getAssociations(atype); if (!assocs.isEmpty()) removeDuplicateAssociations(assocs); } // remove duplicate untyped associations assocs = cindex.getAssociations(null); if (!assocs.isEmpty()) removeDuplicateAssociations(assocs); } private static void prefetchTopics(TopicMapIF topicmap, Collection<TopicIF> batch) { // TopicIF.basenames Prefetcher.prefetch(topicmap, batch, Prefetcher.TopicIF, Prefetcher.TopicIF_names, false); // TopicIF.occurrences Prefetcher.prefetch(topicmap, batch, Prefetcher.TopicIF, Prefetcher.TopicIF_occurrences, false); List<TopicNameIF> basenames = new ArrayList<TopicNameIF>(); List<OccurrenceIF> occurrences = new ArrayList<OccurrenceIF>(); Iterator<TopicIF> iter = batch.iterator(); while (iter.hasNext()) { TopicIF topic = iter.next(); basenames.addAll(topic.getTopicNames()); occurrences.addAll(topic.getOccurrences()); } // TopicNameIF.scope Prefetcher.prefetch(topicmap, basenames, Prefetcher.TopicNameIF, Prefetcher.TopicNameIF_scope, false); // OccurrenceIF.scope Prefetcher.prefetch(topicmap, occurrences, Prefetcher.OccurrenceIF, Prefetcher.OccurrenceIF_scope, false); // TopicNameIF.variants Prefetcher.prefetch(topicmap, basenames, Prefetcher.TopicNameIF, Prefetcher.TopicNameIF_variants, false); } private static void prefetchAssociations(TopicMapIF topicmap, Collection<AssociationIF> batch) { // AssociationIF.type (need this as the associations themselves haven't been fully loaded) Prefetcher.prefetch(topicmap, batch, Prefetcher.AssociationIF, Prefetcher.AssociationIF_type, false); // AssociationIF.roles Prefetcher.prefetch(topicmap, batch, Prefetcher.AssociationIF, Prefetcher.AssociationIF_roles, false); // AssociationIF.scope Prefetcher.prefetch(topicmap, batch, Prefetcher.AssociationIF, Prefetcher.AssociationIF_scope, false); } /** * PUBLIC: Remove all duplicated characteristics of the given topic, * except for duplicate associations the topic may participate in. */ public static void removeDuplicates(TopicIF topic) { // base name duplicates removeDuplicateTopicNames(topic.getTopicNames()); // occurrence duplicates removeDuplicateOccurrences(topic.getOccurrences()); // duplicate association roles are not removed here; that job must // be done by removing duplicate associations globally } /** * INTERNAL: do not call this method. */ public static void removeDuplicateTopicNames(Collection<TopicNameIF> basenames) { Map<String, TopicNameIF> map = new HashMap<String, TopicNameIF>(); Iterator<TopicNameIF> it = new ArrayList<TopicNameIF>(basenames).iterator(); while (it.hasNext()) { TopicNameIF basename = it.next(); String key = KeyGenerator.makeTopicNameKey(basename); TopicNameIF duplicate = map.get(key); if (duplicate != null) { if (duplicate != basename) { MergeUtils.mergeInto(duplicate, basename); basename = duplicate; // do this so that we can remove duplicate variants later } } else map.put(key, basename); removeDuplicates(basename); } } /** * INTERNAL: do not call this method. */ public static void removeDuplicateOccurrences(Collection<OccurrenceIF> occurs) { Map<String, OccurrenceIF> map = new HashMap<String, OccurrenceIF>(); Iterator<OccurrenceIF> it = new ArrayList<OccurrenceIF>(occurs).iterator(); while (it.hasNext()) { OccurrenceIF occ = it.next(); String key = KeyGenerator.makeOccurrenceKey(occ); OccurrenceIF duplicate = map.get(key); if (duplicate != null) { if (duplicate != occ) MergeUtils.mergeInto(duplicate, occ); } else map.put(key, occ); } } /** * INTERNAL: do not call this method. */ public static void removeDuplicateAssociations(Collection<AssociationIF> assocs) { if (assocs.isEmpty()) return; Map<String, AssociationIF> map = new HashMap<String, AssociationIF>(); int batchSize = 50; // get topicmap AssociationIF a = CollectionUtils.getFirst(assocs); TopicMapIF topicmap = a.getTopicMap(); Iterator<AssociationIF> it = new ArrayList<AssociationIF>(assocs).iterator(); while (it.hasNext()) { // prefetch associations List<AssociationIF> batch = CollectionUtils.nextBatch(it, batchSize); prefetchAssociations(topicmap, batch); // produce key and detect duplicates Iterator<AssociationIF> aiter = batch.iterator(); while (aiter.hasNext()) { AssociationIF assoc = aiter.next(); removeDuplicates(assoc); String key = KeyGenerator.makeAssociationKey(assoc); AssociationIF duplicate = map.get(key); if (duplicate != null) { if (duplicate != assoc) MergeUtils.mergeInto(duplicate, assoc); } else map.put(key, assoc); } } } /** * PUBLIC: Remove all duplicate variant names of the given topic name. */ public static void removeDuplicates(TopicNameIF basename) { Map<String, VariantNameIF> map = new HashMap<String, VariantNameIF>(); Iterator<VariantNameIF> it = new ArrayList<VariantNameIF>(basename.getVariants()).iterator(); while (it.hasNext()) { VariantNameIF variant = it.next(); String key = KeyGenerator.makeVariantKey(variant); VariantNameIF duplicate = map.get(key); if (duplicate != null) MergeUtils.mergeInto(duplicate, variant); else map.put(key, variant); } } /** * PUBLIC: Remove all duplicate association roles of the association. */ public static void removeDuplicates(AssociationIF assoc) { Map<String, AssociationRoleIF> map = new HashMap<String, AssociationRoleIF>(); Iterator<AssociationRoleIF> it = new ArrayList<AssociationRoleIF>(assoc.getRoles()).iterator(); while (it.hasNext()) { AssociationRoleIF role = it.next(); String key = KeyGenerator.makeAssociationRoleKey(role); if (map.get(key) != null) MergeUtils.mergeInto((AssociationRoleIF) map.get(key), role); else map.put(key, role); } } /** * PUBLIC: Removes all duplicate associations of this topic. * * @since 2.1 */ public static Map<AssociationIF, Set<AssociationIF>> removeDuplicateAssociations(TopicIF topic) { Map<String, AssociationIF> map = new HashMap<String, AssociationIF>(); Map<AssociationIF, Set<AssociationIF>> resultMap = new HashMap<AssociationIF, Set<AssociationIF>>(); Iterator<AssociationRoleIF> it = new ArrayList<AssociationRoleIF>(topic.getRoles()).iterator(); while (it.hasNext()) { AssociationIF assoc = it.next().getAssociation(); if (assoc == null) continue; String key = KeyGenerator.makeAssociationKey(assoc); AssociationIF existing = map.get(key); // For associations where the same topic plays more than one // role, the associations are the same, and this is not a duplicate. if (assoc.equals(existing)) continue; if (existing != null) { if (existing.getTopicMap() != null) { copySourceLocators(existing, assoc); assoc.remove(); resultMap.get(existing).add(assoc); } else { map.put(key, assoc); resultMap.put(assoc, resultMap.remove(existing)); } } else { map.put(key, assoc); resultMap.put(assoc, new HashSet<AssociationIF>()); } } return resultMap; } // --- Internal helper methods private static void copySourceLocators(TMObjectIF target, TMObjectIF source) { Collection<LocatorIF> srclocs = source.getItemIdentifiers(); if (srclocs.isEmpty()) return; LocatorIF[] list = srclocs.toArray(new LocatorIF[srclocs.size()]); for (int i = 0; i < list.length; i++) { LocatorIF loc = list[i]; source.removeItemIdentifier(loc); target.addItemIdentifier(loc); } } }