/* * Copyright 2003-2010 Tufts University Licensed under the * Educational Community License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.osedu.org/licenses/ECL-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ /* * MergeMapFactory.java * * @created 2012-07-17 * @author Scott Fraize * @author dhelle01 (original LWMergeMap) */ package tufts.vue; import java.util.*; import java.io.StringWriter; import java.io.PrintWriter; import edu.tufts.vue.compare.*; import edu.tufts.vue.style.*; import tufts.vue.LWComponent.Flag; import tufts.vue.LWComponent.ChildKind; import edu.tufts.vue.metadata.VueMetadataElement; /** * A parameterized factory for constructing merged LWMaps. Each factory can only be used to create * a single LWMap. The constructed LWMap will contain LWComponent clientData of type * MergeMapFactory.class, which for the remainder of the runtime can be tested to see if the map is * a merge-map, or queried to view the generating params. * * Note that the "base map", if it is active (and not excluded) is always the first map we process, * which means any nodes found there will be the first encountered with a given merge key, and thus * will be the nodes that are duplicated to the final merge map. Thus, the base-map also serves as * a kind of priority template map. * */ public class MergeMapFactory { private static final org.apache.log4j.Logger Log = org.apache.log4j.Logger.getLogger(MergeMapFactory.class); public static final int THRESHOLD_DEFAULT = 20; private int nodeThresholdSliderValue = THRESHOLD_DEFAULT; private int linkThresholdSliderValue = THRESHOLD_DEFAULT; // these two bools represent 3 possible design configs -- would be better as single case tags: // all/similarity/difference /** default true -- include all the non-primary (base) maps */ private boolean includeSecondaryMaps = true; /** default false -- filter out nodes (keys) that exist on the base map */ private boolean excludeNodesOnBaseMap = false; public final LWMap baseMap; public final boolean baseMapInactive; public final LWMap outputMap; /** Actual maps used to generate the most recent merge. **/ public final List<LWMap> mapList; /** to declare inactive status of individual maps -- if empty or shorter, status defaults to active */ public final List<Boolean> activeStatus; public final List<LWMap> activeMaps; private List<Double> nodeIntervalBoundaries; private List<Double> linkIntervalBoundaries; /** keysMerged will be used during construction to track what's already been merged */ private final Map<Object,LWComponent> keysMerged = new HashMap(); /** if we will be excluding all keys found on the base map from the output, this will be filled */ private Set baseMapKeys; private boolean alreadyUsed = false; private static int CreationCount = 0; public MergeMapFactory(LWMap baseMap, List<LWMap> mapList, List<Boolean> actives) { this.baseMap = baseMap; this.mapList = new ArrayList(mapList); this.activeStatus = new ArrayList(actives); this.outputMap = new LWMap(getNextTitle()); this.activeMaps = computeActiveMaps(); boolean bma = false; for (LWMap m : activeMaps) if (m == baseMap) { bma = true; break; } this.baseMapInactive = !bma; } public LWMap createAsVoteMerge() { return createMergeMap(true); } public LWMap createAsWeightMerge() { return createMergeMap(false); } private LWMap createMergeMap(boolean asVote) { if (alreadyUsed) throw new Error(getClass() + ": already used"); alreadyUsed = true; if (excludeNodesOnBaseMap && !includeSecondaryMaps) throw new Error("merge-map would have no contents in this configuration"); if (asVote) createAggregateAndFillMap(VoteAggregate.class); else createAggregateAndFillMap(WeightAggregate.class); annotateMap(asVote); // Make sure to layout the new map 1st time so, for instance, any node-icons will be properly displayed: outputMap.layoutAndValidateNewMap(); // Someday, VUE.displayMap should check for special clientData that // indicates if the map has had an initializing layout, and automatically // do one if a layout key isn't found. Log.info("created: " + outputMap + "; " + outputMap.getChild(0)); baseMapKeys = null; // gc keysMerged.clear(); // gc for (LWMap m : activeMaps) m.setClientData(LinksCache.class, null); // gc // we only allow one creation per factory in case anyone ever wants // to pull the params out of this client data later. outputMap.putClientData(this); return outputMap; } public void setFilterOnBaseMap(boolean doFilter) { includeSecondaryMaps = !doFilter; } public void setExcludeNodesFromBaseMap(boolean doExclude) { excludeNodesOnBaseMap = doExclude; } public void setNodeThresholdSliderValue(int value) { nodeThresholdSliderValue = value; } public int getNodeThresholdSliderValue() { return nodeThresholdSliderValue; } public void setLinkThresholdSliderValue(int value) { linkThresholdSliderValue = value; } public int getLinkThresholdSliderValue() { return linkThresholdSliderValue; } public boolean isBaseMapActive() { return !baseMapInactive; } public void setNodeIntervalBoundaries(List<Double> nib) { if (DEBUG.MERGE) { Log.debug("nodeIntervals:"); tufts.Util.dump(nib); } nodeIntervalBoundaries = new ArrayList(nib); } public void setLinkIntervalBoundaries(List<Double> lib) { if (DEBUG.MERGE) { Log.debug("linkIntervals:"); tufts.Util.dump(lib); } linkIntervalBoundaries = new ArrayList(lib); } public List<LWMap> getMapList() { return mapList; } private static String getNextTitle() { return "Merge Map " + (++CreationCount); } /** @return the list of currently active maps (the maps list adjusted by activeStatus) * Note that the baseMap is not special in this regard: in can be inactive and not in this list.*/ private List<LWMap> computeActiveMaps() { if (activeStatus == null || activeStatus.size() == 0) { // no status list specified, thus, all maps are active: return mapList; } final List<LWMap> maps = getMapList(); final List<LWMap> actives = new ArrayList(maps.size()); final Iterator<Boolean> statusIter = activeStatus.iterator(); for (LWMap map : maps) { if (statusIter.hasNext()) { if (statusIter.next().booleanValue()) actives.add(map); else /* leave this map out */; } else { actives.add(map); } } return actives; } private void createAggregateAndFillMap(final Class clazz) { // Only one of these two will be set to non-null final VoteAggregate voteAggregate; final WeightAggregate weightAggregate; final ConnectivityMatrixList cms = new ConnectivityMatrixList(); //----------------------------------------------------------------------------- // Create a connectivity matrix for each active map to be fed to the Aggregate //----------------------------------------------------------------------------- if (excludeNodesOnBaseMap) { // generate the key set that will be used to exclude any keys // that were found on the base map. this.baseMapKeys = hashMergeKeys(baseMap); } for (LWMap map : activeMaps) { // if (map != getBaseMap()) // TODO: check -- really add baseMap matrix if ignoring baseMap? // old comment had commented out check to skip baseMap... cms.add(new ConnectivityMatrix(map)); } //----------------------------------------------------------------------------- // Create the desired aggregate //----------------------------------------------------------------------------- if (clazz == VoteAggregate.class) { double nodeThresh = (double) getNodeThresholdSliderValue() / 100.0; double linkThresh = (double) getLinkThresholdSliderValue() / 100.0; weightAggregate = null; voteAggregate = VoteAggregate.create(cms, nodeThresh, linkThresh); } else { weightAggregate = WeightAggregate.create(cms); voteAggregate = null; } //----------------------------------------------------------------------------- // The base-map is the first map we process, which means any nodes found there with a given // key will be the nodes that are duplicated to the final merge map. Thus, the base-map // also serves as a kind of priority template map. if (excludeNodesOnBaseMap || baseMapInactive) { if (DEBUG.MERGE) Log.debug("excluding base-map nodes from merge; active=" + !baseMapInactive); } else mergeInNodes(baseMap, voteAggregate); if (includeSecondaryMaps) { // When we do NOT do this, it means we will look at the votes or weights from the // aggregate merge-key analysis of all maps, but only ever take nodes from the base map // -- nodes and links from all other maps will be ignored except for the presence of // their merge keys in the matrix. for (LWMap map : activeMaps) { if (map == baseMap) { Log.info(" loop; skipping baseMap " + baseMap); } else { mergeInNodes(map, voteAggregate); } } } // Ah: note that CHILDREN OF A MERGED NODE CAN BE ANYTHING... WE ONLY WANT THE TOP-LEVEL-CHILDREN! //final Collection<LWComponent> allMergedNodes = outputMap.getAllDescendents(ChildKind.PROPER); // Fetch all the top level children of the single default layer in the merge-map. Note // we're about to add links, so be sure to pass down and iterate a copy of the live // list. final Collection<LWComponent> allMergedNodes = new ArrayList(outputMap.getChild(0).getChildren()); for (LWComponent c : allMergedNodes) if (isMergeSkipped(c)) Log.warn("unexpected content in merge results: " + c); Log.info("pre-link content: " + tufts.Util.tags(allMergedNodes)); if (voteAggregate != null) installLinksForVotes(allMergedNodes, cms, voteAggregate); else installLinksAndStylesForWeights(allMergedNodes, cms, weightAggregate); } private static boolean isMergeSkipped(LWComponent c) { if (c == null || c.hasFlag(Flag.ICON)) return true; final Class cc = c.getClass(); // to NOT allow LWPortal, which is a subclass of LWNode return ! (cc == LWNode.class || cc == LWImage.class); } private void mergeInNodes(final LWMap sourceMap, final VoteAggregate voteAggregate) { Log.info("mergeInNodes; adding map " + sourceMap + (sourceMap == baseMap ? " (BASE-MAP)" : "") + "; voter=" + voteAggregate); final boolean isVoting = (voteAggregate != null); for (LWComponent srcNode : sourceMap.getAllDescendents(ChildKind.PROPER)) { if (isMergeSkipped(srcNode)) continue; final Object mergeKey = getMergeKey(srcNode); if (mergeKey == null) continue; final LWComponent alreadyMerged = keysMerged.get(mergeKey); if (alreadyMerged != null) { annotateNodeSource(sourceMap, srcNode, alreadyMerged); continue; } //----------------------------------------------------------------------------- // weightAggregate was ignored here //----------------------------------------------------------------------------- // Note: UI is misleading: vote "style" does't just refer to coloring -- it // also refers to what goes in the map. //----------------------------------------------------------------------------- if (isVoting && !voteAggregate.isNodeVotedIn(mergeKey)) continue; if (excludeNodesOnBaseMap && baseMapKeys.contains(mergeKey)) ; // skip this srcNode else copyInNode(sourceMap, srcNode, mergeKey); } } /** duplicate the source, annotate with it's source, and copy it to the * output map, also recording the mergeKey in the keysMerged map */ private void copyInNode(LWMap sourceMap, LWComponent sourceNode, Object mergeKey) { final LWComponent node = sourceNode.duplicate(); annotateNodeSource(sourceMap, sourceNode, node); keysMerged.put(mergeKey, node); outputMap.addChild(node); } /** takes an already duplicated link, annotates it, and adds it to the map */ private void copyInLink(ConnectivityMatrixList cms, Object headKey, Object tailKey, LWLink link) { annotateLinkSources(cms, headKey, tailKey, link); outputMap.addChild(link); } private void installLinksForVotes( final Collection<LWComponent> allMergedNodes, final ConnectivityMatrixList cms, final VoteAggregate voteAggregate) { // This often used to create a seprate link for each direction: I presume that // was a bug and it's been fixed. for (LWComponent head : allMergedNodes) { final Object headKey = getMergeKey(head); for (LWComponent tail : allMergedNodes) { if (head != tail) { final Object tailKey = getMergeKey(tail); if (voteAggregate.testAndConsumeOppositeLinkVote(headKey, tailKey)) { // This link relationship appeared on enough maps to make the vote final LWLink link = new LWLink(head, tail); link.setArrowState(LWLink.ARROW_NONE); // default is tail copyInLink(cms, headKey, tailKey, link); } } } } } private void installLinksAndStylesForWeights( final Collection<LWComponent> allMergedNodes, final ConnectivityMatrixList cms, final WeightAggregate weightAggregate) { final List<Style> nodeStyles = new ArrayList<Style>(); final List<Style> linkStyles = new ArrayList<Style>(); for(int si=0;si<5;si++) nodeStyles.add(StyleMap.getStyle("node.w" + (si +1))); for(int lsi=0;lsi<5;lsi++) linkStyles.add(StyleMap.getStyle("link.w" + (lsi +1))); for (LWComponent node : allMergedNodes) { // In using our new Resource-but-default-to-Label merge, it would still might be nice // if later label hits would hit the resource label as well. They'd probably // have to map to the same index, however, which would make ConnectivityMatrix // much more complicated, as well as the code that uses it. final double score = weightAggregate.getPercentFound(node); final Style style = nodeStyles.get(getIntervalForNode(score)-1); final String styleColor = style.getAttribute("font-color"); node.setFillColor(Style.hexToColor(style.getAttribute("background"))); if (styleColor != null) node.setTextColor(Style.hexToColor(styleColor)); } for (LWComponent head : allMergedNodes) { final Object headKey = getMergeKey(head); for (LWComponent tail : allMergedNodes) { if (head == tail) continue; final Object tailKey = getMergeKey(tail); // weight: the number of maps that reflect this connection. Note that multiple // connections between the two node keys on the SAME map to NOT increase the // weight. final int weightAlpha = weightAggregate.getConnection(headKey, tailKey); if (weightAlpha <= 0) continue; double score = 100 * weightAlpha / weightAggregate.getCount(); // [DAN] are either of these ever happenning? If so, why? [SMF: if merge on 0 maps...] if (score > 100) score = 100; else if (score < 0) score = 0; final Style linkStyle = linkStyles.get(getIntervalForLink(score)-1); final LWLink link = new LWLink(head, tail); // If the revese (omega) connection exists, zero it out in aggregate so we don't // add another link for it. final int weightOmega = weightAggregate.getConnection(tailKey, headKey); if (weightOmega > 0 && includeSecondaryMaps) { // todo: flag check looks wrong: what's base-map filter have to do with this? link.setArrowState(LWLink.ARROW_BOTH); weightAggregate.setConnection(tailKey, headKey, 0); } link.setStrokeColor(Style.hexToColor(linkStyle.getAttribute("background"))); link.setStrokeWidth(weightAlpha); // weight can never be > number of maps merged copyInLink(cms, headKey, tailKey, link); } } } private static Set hashMergeKeys(LWMap map) { final Set hashedKeys = new HashSet(); for (LWComponent c : map.getAllDescendents(ChildKind.PROPER)) { if (!isMergeSkipped(c)) { final Object key = getMergeKey(c); if (key != null) hashedKeys.add(key); } } return hashedKeys; } private int getIntervalForNode(double score) { int count = 0; for (Double d : nodeIntervalBoundaries) { if (score < d.doubleValue()) return count; count++; } return 0; } private int getIntervalForLink(double score) { int count = 0; for (Double d : linkIntervalBoundaries) { if (score < d.doubleValue()) return count; count++; } return 0; } private static Object getMergeKey(LWComponent c) { return edu.tufts.vue.compare.Util.getMergeProperty(c); } /* for LWComponent.putclientData */ private static final class Counter { int count = 0; } // Note that since clientData is runtime *instance* information, and not copied over on // duplication, we don't have to worry about cleaning these up / having them propagate. private void annotateNodeSource(LWMap sourceMap, LWComponent sourceNode, LWComponent newMergeNode) { annotate(new StringBuilder(annotationPartForMap(sourceMap)), sourceNode, newMergeNode); } private final StringBuilder _cachedBuilder = new StringBuilder(); private void annotate(StringBuilder mapPart, LWComponent source, LWComponent target) { final StringBuilder anno = _cachedBuilder; anno.setLength(0); // a problem with putting id in a "natural" place such as after the map name is then we // can't search on "map-name/node-name" as ID is in way, as in "map-name/12345/node-name". // this string is designed based on the ease of search options it provides e.g., // "i/some-map" will mean all images from that map, "some-map/bob" means any node whose // name begins with "bob" from some-map. final String id = source.getID(); anno.append(id); anno.append('/').append(source.getComponentTypeLabel().charAt(0)); anno.append('/').append(mapPart); anno.append('/').append(source.getDisplayLabel()); // todo: truncate putMergeAnnotation(target, anno.toString()); if (target instanceof LWImage || DEBUG.TEST) { // images have no rollovers to show the merge data // We could store a list of all the actual source nodes in the client data, and annotate // at the end. That could even allow us to design a fancier "merge" that somehow lets us // combine the properties of the merged nodes, instead of the random "duplicate the 1st // found" method we have now. // FYI: the first time this method sees this component during a merge, it happens to // be the actual duplicate of source (unless it's a link), and it wont have any // counter set yet. Each time we see it after that, source is another node with the // same merge-key, possibly from a different source map. (BTW, the choice of which // merge-key matching node to use as the actual duplication source is somewhat random: // the merge process simply uses the first it comes across in the order we merge the // maps.) Counter counter = target.getClientData(Counter.class); if (counter == null) counter = target.putClientData(new Counter()); counter.count++; final String notesAnno = String.format("[in:%d:%s]", counter.count, anno.toString()); if (target.hasNotes()) { target.setNotes(target.getNotes() + (counter.count > 1 ? "\n" : "\n----\n") + notesAnno); } else target.setNotes(notesAnno); } } private static String annotationPartForMap(LWMap m) { final String name = m.getDisplayLabel(); if (name.endsWith(".vue")) return name.substring(0, name.length()-4); else return name; } /** cache for list of LWLink's used as a clientData key */ private static final class LinksCache extends ArrayList<LWLink> { static List<LWLink> getLinksList(LWMap map) { final List<LWLink> list = map.getClientData(LinksCache.class); return list == null ? new LinksCache(map) : list; } /**/private LinksCache(LWMap map) { // getDescendentsOfType actually generates a filtering Iterator on the whole list, not // a list of the actual types desired. for (LWLink link : map.getDescendentsOfType(LWLink.class)) add(link); map.putClientData(this); } } // todo: above generic "ListCache" w/parameterized type could be handy /* Todo: this could be slow when merging large similar maps. It iterates every link in each and * every input map that has a connection bewteen the two input keys (there was an LWLink between * those two keys). Possible changes: the call context could provide some of the source info, * and/or the matrix could store a list of links at the [head][tail] / [tail][head] connection * sites. Or, we could do this once per map and create a new kind of matrix to hold it. Note * that this works differently than node annotation because we do not copy actual links: we * create new ones based on the presence of a link between merge-key nodes on the source * maps. */ private void annotateLinkSources(final ConnectivityMatrixList sources, final Object key1, final Object key2, final LWLink newLink) { // final StringBuilder anno = new StringBuilder("#source:"); final StringBuilder annoBuf = new StringBuilder(""); final int tagPrefix = annoBuf.length(); int matchCount = 0; for (ConnectivityMatrix matrix : sources) { if (matrix.getConnection(key1, key2) <= 0) continue; final LWMap map = matrix.getMap(); annoBuf.setLength(tagPrefix); annoBuf.append(annotationPartForMap(map)); final int mapPrefix = annoBuf.length(); for (final LWLink link : LinksCache.getLinksList(map)) { final LWComponent head = link.getHead(); // note: affected by pruning final LWComponent tail = link.getTail(); // note: affected by pruning if (head == null || tail == null) { // currently shouldn't be happening -- connectivity matrix only counts // links with both, but do nothing, just in case } else { final Object headMP = Util.getMergeProperty(head); final Object tailMP = Util.getMergeProperty(tail); final boolean matches = (headMP.equals(key2) && tailMP.equals(key1)) || (headMP.equals(key1) && tailMP.equals(key2)) ; // we ignore directionality on purpose if (matches) { annoBuf.setLength(mapPrefix); annotate(annoBuf, link, newLink); matchCount++; } } } } if (matchCount > 1) newLink.setStrokeWidth(matchCount); else if (matchCount <= 0) Log.warn("no sources for: " + newLink); } private static void putMergeAnnotation(final LWComponent mergeComponent, final String annotation) { // Note that this meta-data does NOT show up in the "Keywords" InspectorPane tab -- it's // just used in rollovers (would be better to show up somehow, uneditable, in Keywords, // however). This data will also appear in the RDFIndex, and this can be searched on, tho // the user won't have a place to see where the hit came from except the rollover. // final VueMetadataElement vme = new VueMetadataElement(); // // vme.setType(VueMetadataElement.OTHER); // impl has always ended up ignoring this // vme.setObject(annotation); // VME IMPL HAS ALWAYS OVERWRITTEN TYPE TO TAG WHEN DOING THIS // mergeComponent.getMetadataList().getMetadata().add(vme); mergeComponent.getMetadataList().getMetadata() .add(VueMetadataElement.createSourceTag(annotation)); } private void annotateMap(boolean asVote) { final StringWriter buf = new StringWriter(64); final PrintWriter p = new PrintWriter(buf); p.println("Merge map: " + new Date()); p.println("Merge property: " + edu.tufts.vue.compare.Util.getMergeProperty()); p.println("Merge style: by " + (asVote ? "vote" : "weight")); if (asVote) { p.printf("Node vote threshold: %d%%\n", nodeThresholdSliderValue); p.printf("Link vote threshold: %d%%\n", nodeThresholdSliderValue); } // could include weight intervals../ p.println("Sources: " + activeMaps.size()); p.println("Source: " + baseMap.getDisplayLabel() + " (base)"); for (LWMap map : activeMaps) { if (map != baseMap) p.println("Source: " + map.getDisplayLabel()); } outputMap.setNotes(buf.toString()); } // [DAN] old method for recording source nodes code currently does not compile // (its commented out below) - needs adjustment to // LWComponent from LWNode (for both LWImage and LWNode) // a relatively minor fix and probably // also needs to be stored in special metadata of a new type... // (since otherwise this info always appears in the notes and possibly // out of sight) // **new system is to use VueMetadataElement.OTHER (see below)** // public static final boolean RECORD_SOURCE_NODES = false; // edu.tufts.vue.metadata.VueMetadataElement vme = new edu.tufts.vue.metadata.VueMetadataElement(); // vme.setType(edu.tufts.vue.metadata.VueMetadataElement.OTHER); // vme.setObject("source: " + node.getMap().getLabel() + "," + sourceLabel); // c.getMetadataList().getMetadata().add(vme); // // todo: this should become only the default initialization method // // for interval boundaries -- in fact: implementing 4/1/2008 // private void setIntervalBoundaries() // { // nodeIntervalBoundaries = new ArrayList<Double>(); // for(int vai = 0;vai<6;vai++) { // double va = 20*vai + 0.5; // nodeIntervalBoundaries.add(new Double(va)); // } // linkIntervalBoundaries = new ArrayList<Double>(); // for(int vai = 0;vai<6;vai++) { // double va = 20*vai + 0.5; // linkIntervalBoundaries.add(new Double(va)); // } // } public String toString() { return getClass().getSimpleName() + "=" + outputMap.getLabel(); } }