/* * Copyright 2003-2010 Tufts University Licensed under the * Educational Community License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.osedu.org/licenses/ECL-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ package tufts.vue.ds; import tufts.Util; import tufts.vue.DEBUG; import tufts.vue.MetaMap; import tufts.vue.LWComponent; import java.util.*; /** * Mainly a collection of static methods for searching data-sets (Schemas) * and finding relationships among row-nodes and value-nodes. Some * methods return instances of a Relation which say something about * how the data was related. * * @version $Revision: 1.8 $ / $Date: 2010-02-03 19:13:16 $ / $Author: mike $ * @author Scott Fraize */ public final class Relation { private static final org.apache.log4j.Logger Log = org.apache.log4j.Logger.getLogger(Relation.class); // relationship types public static final Object USER = "USER"; // user association public static final Object AUTOMATIC = "AUTO"; public static final Object JOIN = "JOIN"; /** essentially an intra-schema "join" */ public static final Object COUNT = "COUNT"; static final String ALL_VALUES = "*all-values*"; private static final String NO_VALUE = "<no-value>"; private static final boolean CROSS_SCHEMA = true; final Object type; final String key, value; final boolean isCrossSchema; int count = 1; private Relation(Object type, String k, String v, boolean crossSchema) { this.type = type; this.key = k; this.value = v; this.isCrossSchema = crossSchema; } private Relation(Object type, String k, String v) { this(type, k, v, false); } int getCount() { return count; } boolean isCrossSchema() { return isCrossSchema; } // private void setCrossSchema() { // isCrossSchema = true; // } String getDescription() { String s; if (type == JOIN || type == COUNT) { // value is empty, key is description s = String.format("%s: %s", type, key); } else { s = String.format("%s=%s", key, value); } if (count > 1) { s += String.format(" x %d", count); } return s; } // private Relation(Object type, String description) { // this(Relation.JOIN, true, description, NO_VALUE); // if (type != JOIN) // throw new Error("must be a join relation if only a description is provided"); // } @Override public String toString() { return String.format("Relation[%s %s=%s #%d%s]", type, key, value, count, isCrossSchema ? " X" : ""); // if (type == JOIN) { // // value is empty, key is description // return String.format("Relation[%-10s %s]", type, key); // } else { // return String.format("Relation[%-10s %s=%s]", type, key, value); // } // //return String.format("Relation[%-10s %s=%s %s]", type, key, value, isForward ? "->" : "<-"); } //======================================================================================== // Can we do this: all the findMatching / getMatching association using code below // is converted to generic non-schema based code, that just works on lists of // MetaMaps -- then we could use it with both the list of DataRow's, as well as lists // of LWComonent meta-data? The getField checks returning EMPTY_LIST couldn't be done // tho -- that would be a nice optimization to keep. Or could we really keep a Schema // around which represents all the data on the map? *adding* to that schema would be // easy, but *removing* data from it would be a problem -- schema's don't work that way. //======================================================================================== /** an interface for any key-value data map. may eventually be removed, but * useful for making the search routines work with anything that can provide * these basic functions or easily delegate to something that can */ // todo: consider renaming to DataRow, and renaming the Schema.DataRow impl & making it private public interface Scannable { public boolean hasEntry(String key, CharSequence value); public String getString(String key); public Collection<String> getValues(String key); public Schema getSchema(); } // /** @return null if none found, or the VALUE of what was the relation was based on otherwise */ // static String getCrossSchemaRelation // (final Field field, // final MetaMap rowData, // final String fieldValue) // { // String relation = null; // int count = -1; // for (String joinedValue : getCrossSchemaJoinedValues(field, rowData, fieldValue)) { // count++; // if (count > 0) { // Log.debug("IGNORING JOINED VALUE #" + count + " FOR LINK CREATION: " + joinedValue, new Throwable("HERE")); // continue; // } // //final String relation = String.format("%s=\"%s\"\n%s=\"%s\"", // //indexKey, indexValue, // //extractKey, extractValue); // relation = String.format("matched joined value \"%s\"", joinedValue); // } // return relation; // } /** @return the first Relation found if any, null otherwise */ static Relation getCrossSchemaRelation (final Field field, final MetaMap rowData, final String fieldValue) { Relation relation = null; int count = -1; for (Relation join : getCrossSchemaJoinedValues(field, rowData, fieldValue)) { count++; if (count > 0) { Log.debug("IGNORING JOINED VALUE #" + count + " FOR LINK CREATION: " + join.value, new Throwable("HERE")); continue; } relation = join; } return relation; } // static Collection<String> getCrossSchemaJoinedValues // (final Field field, // final MetaMap rowData, // final String fieldValue) // return extract values that match, unless this is ALL_FIELD_VALUES // { // return getCrossSchemaJoinedValues(field, Collections.singletonList(rowData), fieldValue); // } //---------------------------------------------------------------------------------------- // The first VUE3 Grant case, Rockwell Kent: // // The most complicated part is joining the Mediums. We could just get away with // using row nodes for mediums, and our existing code would find relationships pretty // well, but the problem is that there are 42 paintings (records), with an eventual // enumerated set of only 12 medium's, and we'd like to be able to place just THOSE // TWELVE on the map, w/out having to add a new record for every painting, so we // could cluster the paintings by medium if we wish. //---------------------------------------------------------------------------------------- /** * currently used in these ways: * 1 - if fieldValue is ALL_VALUES, use rowData to find all relating field values * 2 - if fieldValue is specific, we return only Relations found with that fieldValue * 3 - called repeatly across rows to find rows that have a matching fieldValue */ static Collection<Relation> getCrossSchemaJoinedValues (final Field field, final MetaMap rowData, //final Collection<MetaMap> rowData, final String fieldValue) // return extract values that match, unless this is ALL_FIELD_VALUES { if (rowData == null) { //Util.printStackTrace("NULL ROW DATA AGAINST FIELD: " + field); return Collections.EMPTY_LIST; } //----------------------------------------------------------------------------- // // WE IMPLEMENT JOIN's HERE: E.g., if we drag Rockwell-Mediums.medium onto a // Rockwell-Paintings.<row-node> and Mediums has been joined to Paintings via // their key Field "titles" (tho they don't have to have the same name as per // Associations), then we search for all Mediums rows with a title that matches // the drop-target row-node "titles" (doesn't have to be same name), and those // will be the value nodes. // We could implement this elsewhere if we replace the above getValues call // with something like Field.getMatchingValues(<row-data>), which will // work like Schema.getMatchingValues (or put it right in the schema code). // //----------------------------------------------------------------------------- final Schema fieldSchema = field.getSchema(); final Schema rowSchema = rowData.getSchema(); //final Set<String> valuesSeen = new HashSet(); // to detect dups //final List<Relation> results = new ArrayList(); final Map<String,Relation> results = new HashMap(); if (DEBUG.SCHEMA || DEBUG.WORK) { Log.debug("getCrossSchemaJoinedValues:" + "\n\tfieldSchema: " + fieldSchema + "\n\t rowSchema: " + rowSchema + "\n\t rowData: " + rowData + "\n\t field: " + quoteKV(field, fieldValue) ); } if (fieldSchema == rowSchema) { // Log.warn("JOIN: same schema, no joins possible for " + quoteKey(field) + "<=>" + rowData, // new Throwable("HERE")); //Log.debug("JOIN: warning: same schema, no joins possible for " + field + "<=>" + rowData); //Util.printStackTrace("same schema: " + fieldSchema); //---------------------------------------------------------------------------------------- // TODO: procuring these params this way is a total hack -- need uniform method of // pulling this info from a MetaMap //---------------------------------------------------------------------------------------- final String fieldName = rowData.getString(LWComponent.EnumeratedValueKey); if (fieldName == null) { if (DEBUG.SCHEMA || DEBUG.WORK) Log.debug("JOIN: same schema, no joins possible for " + quoteKey(field) + "<=>" + rowData); //new Throwable("HERE")); return Collections.EMPTY_LIST; } final Field indexKey = rowSchema.getField(fieldName); final String indexValue = rowData.getString(fieldName); //---------------------------------------------------------------------------------------- Log.debug("INTRA-SCHEMA JOIN: indexing " + quoteKV(indexKey, indexValue)); runJoin(fieldSchema, indexKey, indexValue, field, fieldValue, results, Relation.COUNT, false); return results.values(); //return Collections.EMPTY_LIST; } int i = -1; //for (Association join : Association.getBetweens(fieldSchema, rowSchema)) { for (Association join : Association.getJoins(rowSchema, field)) { i++; //----------------------------------------------------------------------------- // THE CASE WE'RE MISSING IS THIS: (to handle IN getCrossSchemaRelation, // which should become a more generic getRelations) -- if the SEARCH field // (e.g., medium) is DIFFERENT than the field found in the Association, only // THEN do we need to attempt a join, otherwise, we can use a straight // Association. Actually, not 100% sure where to put that -- we may want to // check for this in callers before we even get here, and/or go back to // having Association.getJoins(schema1, schema1, FIELD), which will only // return betweens that do NOT match the given field. // // [ABOVE IS NOW HANDLED VIA Association.getJoins, which implements the exclusion] // // We may need to revisit everywhere we check Associations in low-level // code and pull that up to higher level, or at least replace them // with a smarter association check that automatically handles the join // cases (in which case, joins might even auto-cascade across data-sets, // tho that could be dangerous, and recording the relationship would // become a recursive process). //----------------------------------------------------------------------------- if (DEBUG.Enabled) Log.debug("JOIN #" + i + ": " + join); // This works for the Rockwell-Mediums case, tho only for initial node // creation of course -- NEED TO GENERALIZE final Field indexKey = join.getFieldForSchema(fieldSchema); final String indexValue = rowData.getString(join.getKeyForSchema(rowSchema)); // todo: multi-values runJoin(fieldSchema, indexKey, indexValue, field, fieldValue, results, Relation.JOIN, true); // insert process-join here? (that can be re-usable for INTRA-schema "joins") } return results.size() > 0 ? results.values() : Collections.EMPTY_LIST; } /** @return passed in results for convenience */ // call this something like "getUniqueRelatedValues" // this doesn't care if the searchKey is from the indexSchema or not (originally, this // was from code that would only work if the searchKey was from a DIFFERENT Schema) private static void runJoin (final Schema indexSchema, final Field indexKey, final String indexValue, final Field searchKey, final String searchValue, final Map<String,Relation> results, final Object joinType, final boolean isCrossSchema) { if (DEBUG.Enabled) { Log.debug("RUNJOIN " + quoteKV(indexKey, indexValue) + " in " + indexSchema + " for " + quoteKV(searchKey, searchValue)); } final Collection<DataRow> matchingRows = indexSchema.getMatchingRows(indexKey, indexValue); if (DEBUG.Enabled) Log.debug("JOIN: found 1st pass rows: " + Util.tags(matchingRows)); for (DataRow row : matchingRows) { // todo: use Schema.searchData? final Collection<String> joinedValues = row.getValues(searchKey); if (DEBUG.Enabled && joinedValues.size() > 1) Log.debug("JOIN: extracted " + Util.tags(joinedValues)); for (String extractValue : joinedValues) { // most commonly, we will only iterate over one extractValue // (one value per key) //------------------------------------------------------------------ // ** THE EXCLUSION CASE ** if (searchValue == ALL_VALUES) ; // proceed -- we're not filtering, accumulate all values else if (searchValue.equals(extractValue)) ; // proceed -- we've matched a value we're looking for else continue; // stop: doesn't match what we're looking for //------------------------------------------------------------------ if (DEBUG.Enabled) { String debug = String.format("%s=%s; %s=%s", indexKey, Util.tags(indexValue), searchKey, Util.tags(extractValue)); Log.debug(Util.TERM_YELLOW + "JOIN: relation " + debug); // if we keep the code that is just looking to see if a relation // exists at all (e.g., the link finding code), we can abort runJoin // entirely as soon as the first one is found. } Relation r = results.get(extractValue); if (r == null) { final String relation = String.format("%s=\"%s\"; %s=\"%s\"", indexKey, indexValue, searchKey, extractValue); r = new Relation(joinType, relation, extractValue, isCrossSchema); Log.debug("JOIN: added " + r); results.put(extractValue, r); } else { r.count++; } } } } // if (DEBUG.Enabled) { // Log.debug("JOIN #" + i // + ": indexKey=" + quoteKey(indexKey) // + "; indexValue=" + quoteVal(indexValue)); // } // final Collection<DataRow> matchingRows = fieldSchema.getMatchingRows(indexKey, indexValue); // if (DEBUG.Enabled) Log.debug("JOIN: found rows: " + Util.tags(matchingRows)); // final Field extractKey = field; // for (DataRow row : matchingRows) { // // todo: use Schema.searchData? // final Collection<String> joinedValues = row.getValues(extractKey); // if (DEBUG.Enabled) Log.debug("JOIN: extracted " + Util.tags(joinedValues)); // for (String extractValue : joinedValues) { // //------------------------------------------------------------------ // // ** THE EXCLUSION CASE ** // if (fieldValue == ALL_VALUES) // ; // proceed -- we're not filtering, accumulate all values // else if (fieldValue.equals(extractValue)) // ; // proceed -- we've matched a value we're looking for // else // continue; // stop: doesn't match what we're looking for // //------------------------------------------------------------------ // if (DEBUG.Enabled) { // String debug = String.format("%s=%s; %s=%s", // indexKey, Util.tags(indexValue), // extractKey, Util.tags(extractValue)); // Log.debug(Util.TERM_YELLOW + "JOIN: FOUND RELATION " + debug); // } // if (valuesSeen.add(extractValue)) { // final String relation = String.format("%s=\"%s\"; %s=\"%s\"", // indexKey, indexValue, // extractKey, extractValue); // Relation r = new Relation(Relation.JOIN, relation, extractValue); // results.add(r); // } // } // } //---------------------------------------------------------------------------------------- // todo: all these methods take DataRows, which are really a wrapper of a MetaMap -- // should chage all this stuff to work from MetaMaps (which actually have the // darn schema in them now anyway!) And then we could also use some of the util // functions / link-creating funtions based on LWComponent to use MetaMap's as well. // TODO: need to move isDataValueNode / isDataRowNode to MetaMap //---------------------------------------------------------------------------------------- /** * @param schema - the schema to search through * @param filterNode - the data-node (either a data-row-node or a data-value-node), * to use as a basis for searching the schema for related rows */ static Collection<DataRow> findRelatedRows(Schema schema, LWComponent filterNode) { if (DEBUG.Enabled) { Log.debug("\n\nfindRelatedRows:" + "\n\t schema: " + schema + "\n\t node: " + filterNode + "\n\t data: " + filterNode.getRawData() ); } if (filterNode.isDataValueNode()) { if (DEBUG.Enabled) Log.debug("findRelatedRows: VALUE node: " + filterNode); //----------------------------------------------------------------------------- // TODO: Support "merged" or compound value nodes. A node that either has multiple // values, or multiple keys and values. This will complicate this code, tho // maybe it will actually all become more similar in some respects as it // will all behave like a row-based search? Actually, all we need to do is // get all (non-internal) key/values from the filterNode, and accumulate the // matching rows here, or impl that in getMatchingRows. // // Compound nodes would make the HIERARCHY use case much easier. E.g., // in ClubZora test set, drop "Role" onto "Latin America", where "Latin America" // is surrounded by it's row nodes, and we simply create compound value // nodes like this: // Compound 1: Region="Latin America", Role="user" // Compound 2: Region="Latin America", Role="mentor" // Compound 3: Region="Latin America", Role="admin" // etc... // // And then we'd cluster these compounds around Latin America, with extra // distance for the 2nd tier clustering, and then re-cluster the row nodes // around each of the compound nodes. The relationship from each compound // node to it's set of row nodes would naturally be found by the search // routines once their modified to handle compound matching, which shouldn't // be too hard. In this particular use case, the links from "Latin America" // to all the row nodes may become pretty messy/noisy tho -- need helper // actions for cleaning those out. // // Or, could hack this at the map level by MakeCluster on a parent node // w/no links does a joined cluster from all data-value children. // OR even just by putting them in a group. Cluster nodes could be made // smart to prefer clustering for each of the grouped relations // in the "pie-slice" closest / most facing the appropriate node, tho // that would only really work with 2 nodes when stacked vertically, // more nodes than that would work best laid out in a row (as nodes are // usually wider than they are tall, especially value nodes). // ----------------------------------------------------------------------------- final Field filterField = filterNode.getDataValueField(); if (filterField == null) throw new NullPointerException("node has no field: " + filterNode); final String filterValue = filterNode.getDataValue(filterField.getName()); return schema.getMatchingRows(filterField, filterValue); } else if (filterNode.isDataRowNode()) { if (DEBUG.Enabled) Log.debug("findRelatedRows: ROW node: " + filterNode); //Log.debug("FINDING ROWS MATCHING KEY FIELD OF FILTERING ROW NODE: " + filterNode); final Schema filterSchema = filterNode.getDataSchema(); if (filterSchema == schema) throw new Error("can't do row-node filter from same schema"); return schema.getMatchingRows(filterNode.getRawData()); } else { throw new Error("unhandled filter case: " + filterNode); } } /** search the given Scannable's for the given Field=fieldValue, using association's, and add matches to results * This essentially does an "A.K.A" with the Field based on the user associations */ //--------------------------------------------------------------------------------------------------- // TODO: currently only being used with schema rows as searchSet -- meant to also use with // LWComponent's to normalize data-search code. // --------------------------------------------------------------------------------------------------- static void searchDataWithField (final Field fieldKey, final String fieldValue, final Collection<? extends Scannable> searchSet, final Collection results) { final String fieldName = fieldKey.getName(); if (DEBUG.Enabled) { Log.debug(String.format("searchDataWithField: %s=%s", quoteKey(fieldKey), Util.tags(fieldValue))); // Log.debug(String.format("searchDataWithField: %s='%s'\n\tsearchSet: %s", // fieldKey, fieldValue, Util.tags(searchSet))); // //Util.dump(Association.lookup(field)); } searchData(fieldName, fieldValue, searchSet, results); for (Association a : Association.getAliases(fieldKey)) { if (a.isEnabled()) { final String relatedField = a.getPairedField(fieldKey).getName(); if (fieldName.equals(relatedField)) continue; // already searched above searchData(relatedField, fieldValue, searchSet, results); } } } /** * Uses an entire row of data to do AKA searches looking for relationships * betweens rows from DIFFERENT schema's. * * @param rowKey - a data "row" - a bag of related key/value pairs * @param searchSet - a bag of rows to search for association based relationships * @param searchSchema - Schema for the rows in the searchSet, DIFFERENT from rowKey's schema * @param results - rows found in the searchSet that have a relationship to rowKey will be added here * * For example, in VUE, this would eventually be called after dropping "all rows" in * the DataTree from Schema-InTree, onto a node on the map from Schema-OnMap, and searching * all rows in Schema-InTree for nodes to add to the map that are related to the row from * Schema-OnMap. * */ //--------------------------------------------------------------------------------------------------- // TODO: currently only being used with schema rows as searchSet -- meant to also use with // LWComponent's to normalize data-search code. // --------------------------------------------------------------------------------------------------- static void searchDataWithRow (final Scannable rowKey, // e.g., a MetaMap, from a DIFFERENT schema than the search-set final Collection<? extends Scannable> searchSet, final Schema searchSchema, // may be null if the searchSet contains more than one Schema final Collection results) { if (searchSchema == null) throw new UnsupportedOperationException("schema is null; variable searchSet schema's not implemented"); // [edit] look auto-joins e.g., if there are ANY join between Faculty & Pubs (e.g., // Name=Author), then these to schemas are in fact "joined", and can filter // based on that. // // So dropping Pubs onto a Faculty can find the joins (just the first joined? // priority to key fields?) uses Faculty.Name to search through all Pubs for // matching Pubs.Author's, and pulls those records. // // Note that in this case, Faculty.Name happens to be a key field, but // Pubs.Author is NOT a key field. (Pubs.Title is the key field there) final Schema keySchema = rowKey.getSchema(); if (DEBUG.Enabled) { Log.debug("searchDataWithRow: " + "\n rowKey: " + rowKey + "\n searchSet: " + Util.tags(searchSet) + "\nsearchSchema: " + searchSchema ); } if (keySchema == searchSchema) throw new Error("can only search a schema with a row from another schema: " + keySchema); for (Association a : Association.getBetweens(searchSchema, keySchema)) { if (DEBUG.Enabled) Log.debug("searchDataWithRow: scanning for " + a); final String localKey = a.getKeyForSchema(searchSchema); final String remoteKey = a.getKeyForSchema(keySchema); // instead of data.getString, we really needs to search ALL the values for that key??? // (e.g., multiple category values) searchData(localKey, rowKey.getString(remoteKey), // TODO: handle multiple values searchSet, results); } for (Scannable s : searchSet) { if (isAutoRelated(rowKey, s)) results.add(s); } } /** search the given Scannable's for the given key=value, and add matches to results */ private static void searchData (final String key, final String value, final Collection<? extends Scannable> searchSet, final Collection results) { if (DEBUG.Enabled) Log.debug("searchData: " + quoteKV(key, value) + " in " + Util.tags(searchSet)); for (Scannable row : searchSet) { if (row.hasEntry(key, value)) results.add(row); } } private static Relation tryAutoRelate(Scannable row1, Scannable row2) { //----------------------------------------------------------------------------- // [note: logic was initially from makeCrossSchemaRowNodeLinks) // First, check for matching key field names, even if there isn't an explicit // association between the two. This code checks for the presence of the key // field from one schema ANYWHERE else in the paired node -- not just as the key // field. Essentially an automatic open-ended association based on key fields. // ----------------------------------------------------------------------------- String relatedValue; //------------------------------------------------------- final Schema s1 = row1.getSchema(); final String keyField1 = s1.getKeyFieldName(); relatedValue = relatedBy(keyField1, row1, row2); if (relatedValue != null) return new Relation(AUTOMATIC, keyField1, relatedValue); //------------------------------------------------------- final Schema s2 = row2.getSchema(); final String keyField2 = s2.getKeyFieldName(); relatedValue = relatedBy(keyField2, row1, row2); if (relatedValue != null) return new Relation(AUTOMATIC, keyField2, relatedValue); //------------------------------------------------------- // TODO: do open-ended relating based in any key fields that happen to have the same name // for performance, in the cases where we know the schemas up front, we can inspect // them for shared names and then just only ever look for those shared names. // Actually, could just compute & store that in every schema -- a map in each schema // by all other schemas of all field names shared by those other schemas. return null; } /** note: duplicate core logic of tryAutoRelate, trimmed for performance (it doesn't return a new Relation) */ private static boolean isAutoRelated(Scannable row1, Scannable row2) { final Schema s1 = row1.getSchema(); if (relatedBy(s1.getKeyFieldName(), row1, row2) != null) return true; final Schema s2 = row2.getSchema(); if (relatedBy(s2.getKeyFieldName(), row1, row2) != null) return true; return false; } /** @return true if the two rows (from the same schema) are the "same" -- the have the same key field value */ public static boolean isSameRow(final Scannable row1, final Scannable row2) { if (DEBUG.Enabled) { if (row1.getSchema() != row2.getSchema()) { //Log.debug("testing same row for different schemas"); return false; } } return relatedBy(row1.getSchema().getKeyFieldName(), row1, row2) != null; } public static boolean isSameRow(final LWComponent c1, final LWComponent c2) { if (c1 == null || c2 == null) return false; return isSameRow(c1.getRawData(), c2.getRawData()); } /** @return true if the two rows in the given schema are the "same" -- the have the same key field value */ public static boolean isSameRow(final Schema schema, final Scannable row1, final Scannable row2) { if (DEBUG.Enabled) { if (row1.getSchema() != row2.getSchema()) throw new Error("different schemas"); if (row1.getSchema() != schema) throw new Error("schema mis-match"); } return relatedBy(schema.getKeyFieldName(), row1, row2) != null; } /** @return the value that was found to match between the two * If more than one, we return the first for now. E.g., each row may have multiple * "category" values, and more than one might match. [todo: currently only checks first value!] */ private static String relatedBy (final String key, final Scannable row1, final Scannable row2) { //---------------------------------------------------------------------------------------- // NOTE: this uses hasEntry instead of fetching & comparing // values, which will automatically check ALL values for the given key //---------------------------------------------------------------------------------------- final String row1_value = row1.getString(key); // TODO: handle multiple values //Log.debug(String.format("relatedBy: %s", key)); //Log.debug(String.format("relatedBy0 %s='%s' in %s", key, row1_value, Util.tags(row2))); if (row2.hasEntry(key, row1_value)) { Log.debug(String.format("relatedBy: found %s='%s'", key, row1_value)); return row1_value; } // The semantic reverse of the above case. THE REASON WE DO TWO TESTS is only // for the case of multple values (e.g., 10 different category values). There // was some test case I forget where I wanted this -- pretty sure it was a news // feed example, probably at some time when we attempted to auto-relate on all // fields in a row. However, BUG: even doing this isn't enough: E.g., if key // was "category", we'd need to iterate through ALL the values for "category" // found in row1 (not just the first), and check it against ALL the values for // "category" found in row2. final String row2_value = row2.getString(key); // TODO: handle multiple values //Log.debug(String.format("relatedBy1 %s='%s' in %s", key, row2_value, Util.tags(row1))); if (row1.hasEntry(key, row2_value)) { Util.printStackTrace("relatedBy: returning on 2nd value: " + key + "=" + row2_value + "\n\trow1: " + row1 + "\n\trow2: " + row2 ); return row2_value; } return null; } // todo: if schema is flat (e.g., non-xml), we can do a much simpler/faster test */ /** if the two rows are related by the given keys, return the value they're related by, null otherwise */ private static String relatedByAKA_multiValues (final String key1, final String key2, final Scannable row1, final Scannable row2) { int i; // TODO: case independence for values? i = 0; for (String row1value : row1.getValues(key1)) { if (row2.hasEntry(key2, row1value)) { Log.debug("relatedByAKA: 1st pass found match at row1 value #" + i + " " + quoteKV(key2, row1value)); return row1value; } i++; } i = 0; for (String row2value : row2.getValues(key2)) { if (row1.hasEntry(key1, row2value)) { Log.debug("relatedByAKA: 2nd pass found match at row1 value #" + i + " " + quoteKV(key1, row2value)); return row2value; } i++; } return null; } /** @return a Relation between the two rows if any can be found, null otherwise * The rows must be from different schemas. */ public static Relation getRelation(Scannable row1, Scannable row2) { final Schema schema1 = row1.getSchema(); final Schema schema2 = row2.getSchema(); if (schema1 == schema2) throw new Error("same schema: " + schema1); if (DEBUG.SCHEMA && DEBUG.META) Log.debug("getRelation;\n\trow1=" + row1 + "\n\trow2=" + row2); for (Association a : Association.getAll()) { if (a.isEnabled() && a.isBetween(schema1, schema2)) ; // go ahead and apply this association else continue; final String key1 = a.getKeyForSchema(schema1); final String key2 = a.getKeyForSchema(schema2); //Log.debug("getRelation: applying key1=" + key1 + "; key2=" + key2 + " from " + a); // todo: if schema is flat (e.g., non-xml), we can do a much simpler/faster test String relatedValue = relatedByAKA_multiValues(key1, key2, row1, row2); if (relatedValue != null) { final String alsoKnownAsKey = String.format("%s=%s", key1, key2); // TWO keys equal this value return new Relation(USER, alsoKnownAsKey, relatedValue, CROSS_SCHEMA); } } return tryAutoRelate(row1, row2); // will return null if none found } // Old single-value-only relation code: // String relatedValue = row1.getString(key1); // // TODO: handle multiple values -- use row.hasEntry like relatedBy // if (relatedValue != null && relatedValue.equals(row2.getString(key2))) { // TODO: case independence? // final String alsoKnownAsKey = String.format("%s=%s", key1, key2); // TWO keys equals this value // return new Relation(RELATION_JOIN, // alsoKnownAsKey, // relatedValue); // } /** debug */ static String quoteKey(String s) { return Util.TERM_GREEN + s + Util.TERM_CLEAR; } /** debug */ static String quoteKey(Field f) { return Util.TERM_GREEN + f + Util.TERM_CLEAR; } /** debug */ static String quoteVal(String s) { return Util.quote(s, Util.TERM_RED); } /** debug */ static String quoteKV(String k, String v) { return quoteKey(k) + "=" + quoteVal(v); } /** debug */ static String quoteKV(Field f, String v) { return quoteKey(f) + "=" + quoteVal(v); } }