/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lens.cube.parse; import static java.util.Calendar.DAY_OF_MONTH; import static java.util.Calendar.HOUR_OF_DAY; import static org.apache.lens.cube.metadata.DateFactory.*; import static org.apache.lens.cube.metadata.UpdatePeriod.*; import static org.testng.Assert.*; import java.util.*; import org.apache.lens.cube.metadata.*; import org.apache.lens.cube.metadata.ExprColumn.ExprSpec; import org.apache.lens.cube.metadata.ReferencedDimAttribute.ChainRefCol; import org.apache.lens.cube.metadata.timeline.EndsAndHolesPartitionTimeline; import org.apache.lens.cube.metadata.timeline.PartitionTimeline; import org.apache.lens.cube.metadata.timeline.StoreAllPartitionTimeline; import org.apache.lens.server.api.LensConfConstants; import org.apache.lens.server.api.error.LensException; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.mapred.TextInputFormat; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import lombok.Getter; import lombok.extern.slf4j.Slf4j; /* * Here is the cube test setup * * Cube : testCube * * Fact storage and Updates: * testFact : {C1, C2, C3, C4} -> {Minutely, hourly, daily, monthly, quarterly, yearly} * testFact2 : {C1} -> {Hourly} * testFactMonthly : {C2} -> {Monthly} * summary1,summary2,summary3 - {C1, C2} -> {daily, hourly, minutely} * cheapFact: {C99} -> {Minutely, hourly, daily, monthly, quarterly, yearly} * C2 has multiple dated partitions * C99 is not to be used as supported storage in testcases * * CityTable : C1 - SNAPSHOT and C2 - NO snapshot * * Cube : Basecube * Derived cubes : der1, der2,der3 * * Fact storage and Updates: * testFact1_BASE : {C1, C2, C3, C4} -> {Minutely, hourly, daily, monthly, quarterly, yearly} * testFact2_BASE : {C1, C2, C3, C4} -> {Minutely, hourly, daily, monthly, quarterly, yearly} * testFact1_RAW_BASE : {C1} -> {hourly} * testFact2_RAW_BASE : {C1} -> {hourly} */ @SuppressWarnings("deprecation") @Slf4j public class CubeTestSetup { private Set<CubeMeasure> cubeMeasures; private Set<CubeDimAttribute> cubeDimensions; public static final String TEST_CUBE_NAME = "testCube"; public static final String DERIVED_CUBE_NAME = "derivedCube"; public static final String BASE_CUBE_NAME = "baseCube"; public static final String DERIVED_CUBE_NAME1 = "der1"; public static final String DERIVED_CUBE_NAME2 = "der2"; public static final String DERIVED_CUBE_NAME3 = "der3"; private static String c0 = "C0"; private static String c1 = "C1"; private static String c2 = "C2"; private static String c3 = "C3"; private static String c4 = "C4"; private static String c5 = "C5"; private static String c99 = "C99"; private static Map<String, String> factValidityProperties = Maps.newHashMap(); @Getter private static Map<String, List<UpdatePeriod>> storageToUpdatePeriodMap = new LinkedHashMap<>(); static { factValidityProperties.put(MetastoreConstants.FACT_RELATIVE_START_TIME, "now.year - 90 days"); } public static String getDateUptoHours(Date dt) { return HOURLY.format(dt); } interface StoragePartitionProvider { Map<String, String> providePartitionsForStorage(String storage); } public static String getExpectedUnionQuery(String cubeName, List<String> storages, StoragePartitionProvider provider, String outerSelectPart, String outerWhere, String outerPostWhere, String innerQuerySelectPart, String innerJoin, String innerWhere, String innerPostWhere) { if (!innerQuerySelectPart.trim().toLowerCase().endsWith("from")) { innerQuerySelectPart += " from "; } StringBuilder sb = new StringBuilder(); sb.append(outerSelectPart); if (!outerSelectPart.trim().toLowerCase().endsWith("from")) { sb.append(" from "); } sb.append(" ("); String sep = ""; for (String storage : storages) { sb.append(sep).append(getExpectedQuery(cubeName, innerQuerySelectPart + " ", innerJoin, innerWhere, innerPostWhere, null, provider.providePartitionsForStorage(storage))); sep = " UNION ALL "; } return sb.append(") ").append(cubeName).append(" ").append(outerWhere == null ? "" : outerWhere) .append(" ").append(outerPostWhere == null ? "" : outerPostWhere).toString(); } public static String getExpectedUnionQuery(String cubeName, List<String> storages, StoragePartitionProvider provider, String outerSelectPart, String outerWhere, String outerPostWhere, String innerQuerySelectPart, String innerWhere, String innerPostWhere) { return getExpectedUnionQuery(cubeName, storages, provider, outerSelectPart, outerWhere, outerPostWhere, innerQuerySelectPart, null, innerWhere, innerPostWhere); } public static String getExpectedQuery(String cubeName, String selExpr, String whereExpr, String postWhereExpr, Map<String, String> storageTableToWhereClause) { return getExpectedQuery(cubeName, selExpr, whereExpr, postWhereExpr, storageTableToWhereClause, null); } public static String getExpectedQuery(String cubeName, String selExpr, String whereExpr, String postWhereExpr, Map<String, String> storageTableToWhereClause, List<String> notLatestConditions) { StringBuilder expected = new StringBuilder(); for (Map.Entry<String, String> entry : storageTableToWhereClause.entrySet()) { String storageTable = entry.getKey(); expected.append(selExpr).append(storageTable).append(" ").append(cubeName).append(" WHERE ").append("("); if (notLatestConditions != null) { for (String cond : notLatestConditions) { expected.append(cond).append(" AND "); } } if (whereExpr != null) { expected.append(whereExpr).append(" AND "); } expected.append(entry.getValue()).append(")"); if (postWhereExpr != null) { expected.append(" ").append(postWhereExpr); } } return expected.toString(); } public static String getExpectedQuery(String cubeName, String selExpr, String whereExpr, String postWhereExpr, String rangeWhere, String storageTable) { return getExpectedQuery(cubeName, selExpr, whereExpr, postWhereExpr, rangeWhere, storageTable, null); } public static String getExpectedQuery(String cubeName, String selExpr, String whereExpr, String postWhereExpr, String rangeWhere, String storageTable, List<String> notLatestConditions) { StringBuilder expected = new StringBuilder() .append(selExpr).append(getDbName()).append(storageTable).append(" ").append(cubeName) .append(" WHERE ").append("("); if (notLatestConditions != null) { for (String cond : notLatestConditions) { expected.append(cond).append(" AND "); } } if (whereExpr != null) { expected.append(whereExpr).append(" AND "); } expected.append(rangeWhere).append(")"); if (postWhereExpr != null) { expected.append(postWhereExpr); } return expected.toString(); } public static String getExpectedQuery(String cubeName, String selExpr, String joinExpr, String whereExpr, String postWhereExpr, List<String> joinWhereConds, Map<String, String> storageTableToWhereClause) { return getExpectedQuery(cubeName, selExpr, joinExpr, whereExpr, postWhereExpr, joinWhereConds, storageTableToWhereClause, null); } public static String getExpectedQuery(String cubeName, String selExpr, String joinExpr, String whereExpr, String postWhereExpr, List<String> joinWhereConds, Map<String, String> storageTableToWhereClause, List<String> notLatestConditions) { StringBuilder expected = new StringBuilder(); int numTabs = storageTableToWhereClause.size(); assertEquals(1, numTabs); for (Map.Entry<String, String> entry : storageTableToWhereClause.entrySet()) { String storageTable = entry.getKey(); expected.append(selExpr).append(storageTable).append(" ").append(cubeName); if (joinExpr != null) { expected.append(joinExpr); } expected.append(" WHERE ").append("("); if (notLatestConditions != null) { for (String cond : notLatestConditions) { expected.append(cond).append(" AND "); } } if (whereExpr != null) { expected.append(whereExpr).append(" AND "); } expected.append(entry.getValue()); if (joinWhereConds != null) { for (String joinEntry : joinWhereConds) { expected.append(" AND ").append(joinEntry); } } expected.append(")"); if (postWhereExpr != null) { expected.append(postWhereExpr); } } return expected.toString(); } public static Map<String, String> getWhereForDailyAndHourly2days(String cubeName, String... storageTables) { return getWhereForDailyAndHourly2daysWithTimeDim(cubeName, "dt", storageTables); } public static String getDbName() { String database = SessionState.get().getCurrentDatabase(); if (!"default".equalsIgnoreCase(database) && StringUtils.isNotBlank(database)) { return database + "."; } return ""; } public static Map<String, String> getWhereForDailyAndHourly2daysWithTimeDim(String cubeName, String timedDimension, String... storageTables) { return getWhereForDailyAndHourly2daysWithTimeDim(cubeName, timedDimension, TWODAYS_BACK, NOW, storageTables); } public static Map<String, String> getWhereForDailyAndHourly2daysWithTimeDim(String cubeName, String timedDimension, Date from, Date to, String... storageTables) { Map<String, String> storageTableToWhereClause = new LinkedHashMap<>(); if (storageToUpdatePeriodMap.isEmpty()) { String whereClause = getWhereForDailyAndHourly2daysWithTimeDim(cubeName, timedDimension, from, to); storageTableToWhereClause.put(getStorageTableString(storageTables), whereClause); } else { for (String tbl : storageTables) { for (UpdatePeriod updatePeriod : storageToUpdatePeriodMap.get(tbl)) { String whereClause = getWhereForDailyAndHourly2daysWithTimeDimUnionQuery(cubeName, timedDimension, from, to) .get(updatePeriod.getName()); storageTableToWhereClause.put(getStorageTableString(tbl), whereClause); } } } return storageTableToWhereClause; } private static String getStorageTableString(String... storageTables) { String dbName = getDbName(); if (!StringUtils.isBlank(dbName)) { List<String> tbls = new ArrayList<>(); for (String tbl : storageTables) { tbls.add(dbName + tbl); } return StringUtils.join(tbls, ","); } return StringUtils.join(storageTables, ","); } public static String getWhereForDailyAndHourly2daysWithTimeDim(String cubeName, String timedDimension, Date from, Date to) { Set<String> hourlyparts = new HashSet<>(); Set<String> dailyparts = new HashSet<>(); Date dayStart; if (!isZerothHour()) { addParts(hourlyparts, HOURLY, from, DateUtil.getCeilDate(from, DAILY)); addParts(hourlyparts, HOURLY, DateUtil.getFloorDate(to, DAILY), DateUtil.getFloorDate(to, HOURLY)); dayStart = DateUtil.getCeilDate(from, DAILY); } else { dayStart = from; } addParts(dailyparts, DAILY, dayStart, DateUtil.getFloorDate(to, DAILY)); List<String> parts = new ArrayList<>(); parts.addAll(hourlyparts); parts.addAll(dailyparts); Collections.sort(parts); return StorageUtil.getWherePartClause(timedDimension, cubeName, parts); } public static Map<String, String> getWhereForDailyAndHourly2daysWithTimeDimUnionQuery(String cubeName, String timedDimension, Date from, Date to) { Map<String, String> updatePeriodToWhereMap = new HashMap<String, String>(); List<String> hourlyparts = new ArrayList<String>(); List<String> dailyparts = new ArrayList<String>(); Date dayStart; if (!isZerothHour()) { addParts(hourlyparts, HOURLY, from, DateUtil.getCeilDate(from, DAILY)); addParts(hourlyparts, HOURLY, DateUtil.getFloorDate(to, DAILY), DateUtil.getFloorDate(to, HOURLY)); dayStart = DateUtil.getCeilDate(from, DAILY); } else { dayStart = from; } addParts(dailyparts, DAILY, dayStart, DateUtil.getFloorDate(to, DAILY)); updatePeriodToWhereMap.put("DAILY", StorageUtil.getWherePartClause(timedDimension, cubeName, dailyparts)); updatePeriodToWhereMap.put("HOURLY", StorageUtil.getWherePartClause(timedDimension, cubeName, hourlyparts)); return updatePeriodToWhereMap; } // storageTables[0] is hourly // storageTables[1] is daily // storageTables[2] is monthly public static Map<String, String> getWhereForMonthlyDailyAndHourly2months(String... storageTables) { Map<String, String> storageTableToWhereClause = new LinkedHashMap<String, String>(); List<String> hourlyparts = new ArrayList<String>(); List<String> dailyparts = new ArrayList<String>(); List<String> monthlyparts = new ArrayList<String>(); Date dayStart = TWO_MONTHS_BACK; Date monthStart = TWO_MONTHS_BACK; if (!isZerothHour()) { addParts(hourlyparts, HOURLY, TWO_MONTHS_BACK, DateUtil.getCeilDate(TWO_MONTHS_BACK, DAILY)); addParts(hourlyparts, HOURLY, DateUtil.getFloorDate(NOW, DAILY), DateUtil.getFloorDate(NOW, HOURLY)); dayStart = DateUtil.getCeilDate(TWO_MONTHS_BACK, DAILY); monthStart = DateUtil.getCeilDate(TWO_MONTHS_BACK, MONTHLY); } Calendar cal = new GregorianCalendar(); cal.setTime(dayStart); if (cal.get(DAY_OF_MONTH) != 1) { addParts(dailyparts, DAILY, dayStart, DateUtil.getCeilDate(TWO_MONTHS_BACK, MONTHLY)); monthStart = DateUtil.getCeilDate(TWO_MONTHS_BACK, MONTHLY); } addParts(dailyparts, DAILY, DateUtil.getFloorDate(NOW, MONTHLY), DateUtil.getFloorDate(NOW, DAILY)); addParts(monthlyparts, MONTHLY, monthStart, DateUtil.getFloorDate(NOW, MONTHLY)); List<String> parts = new ArrayList<String>(); parts.addAll(dailyparts); parts.addAll(hourlyparts); parts.addAll(monthlyparts); StringBuilder tables = new StringBuilder(); if (storageTables.length > 1) { if (!hourlyparts.isEmpty()) { tables.append(getDbName()); tables.append(storageTables[0]); tables.append(","); } tables.append(getDbName()); tables.append(storageTables[2]); if (!dailyparts.isEmpty()) { tables.append(","); tables.append(getDbName()); tables.append(storageTables[1]); } } else { tables.append(getDbName()); tables.append(storageTables[0]); } Collections.sort(parts); storageTableToWhereClause.put(tables.toString(), StorageUtil.getWherePartClause("dt", TEST_CUBE_NAME, parts)); return storageTableToWhereClause; } public static Map<String, String> getWhereForMonthlyDailyAndHourly2monthsUnionQuery(String storageTable) { Map<String, List<String>> updatePeriodToPart = new LinkedHashMap<String, List<String>>(); List<String> hourlyparts = new ArrayList<String>(); List<String> dailyparts = new ArrayList<String>(); List<String> monthlyparts = new ArrayList<String>(); Date dayStart = TWO_MONTHS_BACK; Date monthStart = TWO_MONTHS_BACK; if (!isZerothHour()) { addParts(hourlyparts, HOURLY, TWO_MONTHS_BACK, DateUtil.getCeilDate(TWO_MONTHS_BACK, DAILY)); addParts(hourlyparts, HOURLY, DateUtil.getFloorDate(NOW, DAILY), DateUtil.getFloorDate(NOW, HOURLY)); dayStart = DateUtil.getCeilDate(TWO_MONTHS_BACK, DAILY); monthStart = DateUtil.getCeilDate(TWO_MONTHS_BACK, MONTHLY); } Calendar cal = new GregorianCalendar(); cal.setTime(dayStart); if (cal.get(DAY_OF_MONTH) != 1) { addParts(dailyparts, DAILY, dayStart, DateUtil.getCeilDate(TWO_MONTHS_BACK, MONTHLY)); monthStart = DateUtil.getCeilDate(TWO_MONTHS_BACK, MONTHLY); } addParts(dailyparts, DAILY, DateUtil.getFloorDate(NOW, MONTHLY), DateUtil.getFloorDate(NOW, DAILY)); addParts(monthlyparts, MONTHLY, monthStart, DateUtil.getFloorDate(NOW, MONTHLY)); updatePeriodToPart.put("HOURLY", hourlyparts); updatePeriodToPart.put("DAILY", dailyparts); updatePeriodToPart.put("MONTHLY", monthlyparts); List<String> unionParts = new ArrayList<String>(); for (Map.Entry<String, List<UpdatePeriod>> entry : storageToUpdatePeriodMap.entrySet()) { String table = entry.getKey(); for (UpdatePeriod updatePeriod : entry.getValue()) { String uperiod = updatePeriod.getName(); if (table.equals(storageTable) && updatePeriodToPart.containsKey(uperiod)) { unionParts.addAll(updatePeriodToPart.get(uperiod)); Collections.sort(unionParts); } } } HashMap<String, String> tabWhere = new LinkedHashMap<String, String>(); tabWhere.put(getStorageTableString(storageTable), StorageUtil.getWherePartClause("dt", TEST_CUBE_NAME, unionParts)); return tabWhere; } public static Map<String, String> getWhereForMonthly2months(String monthlyTable) { Map<String, String> storageTableToWhereClause = new LinkedHashMap<String, String>(); List<String> parts = new ArrayList<String>(); addParts(parts, MONTHLY, TWO_MONTHS_BACK, DateUtil.getFloorDate(NOW, MONTHLY)); storageTableToWhereClause.put(getDbName() + monthlyTable, StorageUtil.getWherePartClause("dt", TEST_CUBE_NAME, parts)); return storageTableToWhereClause; } public static Map<String, String> getWhereForDays(String dailyTable, Date startDay, Date endDay) { Map<String, String> storageTableToWhereClause = new LinkedHashMap<>(); List<String> parts = new ArrayList<>(); addParts(parts, DAILY, startDay, DateUtil.getFloorDate(endDay, DAILY)); storageTableToWhereClause.put(getDbName() + dailyTable, StorageUtil.getWherePartClause("dt", TEST_CUBE_NAME, parts)); return storageTableToWhereClause; } public static Map<String, String> getWhereForHourly2days(String hourlyTable) { return getWhereForHourly2days(TEST_CUBE_NAME, hourlyTable); } public static Map<String, String> getWhereForHourly2days(String alias, String hourlyTable) { Map<String, String> storageTableToWhereClause = new LinkedHashMap<String, String>(); List<String> parts = new ArrayList<String>(); addParts(parts, HOURLY, TWODAYS_BACK, DateUtil.getFloorDate(NOW, HOURLY)); storageTableToWhereClause.put(getDbName() + hourlyTable, StorageUtil.getWherePartClause("dt", alias, parts)); return storageTableToWhereClause; } public static void addParts(Collection<String> partitions, UpdatePeriod updatePeriod, Date from, Date to) { Calendar cal = Calendar.getInstance(); cal.setTime(from); Date dt = cal.getTime(); while (dt.before(to)) { String part = updatePeriod.format(dt); cal.add(updatePeriod.calendarField(), 1); partitions.add(part); dt = cal.getTime(); } } public static String getExpectedQuery(String dimName, String selExpr, String postWhereExpr, String storageTable, boolean hasPart) { return getExpectedQuery(dimName, selExpr, null, null, postWhereExpr, storageTable, hasPart); } public static String getExpectedQuery(String dimName, String selExpr, String joinExpr, String whereExpr, String postWhereExpr, String storageTable, boolean hasPart) { StringBuilder expected = new StringBuilder(); String partWhere = null; if (hasPart) { partWhere = StorageUtil.getWherePartClause("dt", dimName, StorageConstants.getPartitionsForLatest()); } expected.append(selExpr); expected.append(getDbName() + storageTable); expected.append(" "); expected.append(dimName); if (joinExpr != null) { expected.append(joinExpr); } if (whereExpr != null || hasPart) { expected.append(" WHERE "); expected.append("("); if (whereExpr != null) { expected.append(whereExpr); if (partWhere != null) { expected.append(" AND "); } } if (partWhere != null) { expected.append(partWhere); } expected.append(")"); } if (postWhereExpr != null) { expected.append(postWhereExpr); } return expected.toString(); } Set<ExprColumn> exprs; private void createCube(CubeMetastoreClient client) throws HiveException, ParseException, LensException { cubeMeasures = new HashSet<CubeMeasure>(); Map<String, String> tags = new HashMap<>(); tags.put(MetastoreConstants.MEASURE_DATACOMPLETENESS_TAG, "tag1"); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int", "first measure"), null, null, null, null, null, null, null, null, null, tags)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr2", "float", "second measure"), "Measure2", null, "SUM", "RS")); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr21", "float", "second measure"), "Measure22", null, "SUM", "RS")); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr22", "float", "second measure"), "Measure22", null, "SUM", "RS")); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr3", "double", "third measure"), "Measure3", null, "MAX", null)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr4", "bigint", "fourth measure"), "Measure4", null, "COUNT", null)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr9", "bigint", "ninth measure"), null, null, null, null, null, null, null, null, null, tags)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("noAggrMsr", "bigint", "measure without a default aggregate"), "No aggregateMsr", null, null, null)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("newmeasure", "bigint", "measure available from now"), "New measure", null, null, null, NOW, null, 100.0)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr15", "int", "fifteenth measure"), "Measure15", null, "SUM", "RS")); cubeDimensions = new HashSet<CubeDimAttribute>(); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("d_time", "timestamp", "d time"))); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("processing_time", "timestamp", "processing time"))); List<CubeDimAttribute> locationHierarchy = new ArrayList<CubeDimAttribute>(); locationHierarchy.add(new BaseDimAttribute(new FieldSchema("zipcode", "int", "zip"))); locationHierarchy.add(new BaseDimAttribute(new FieldSchema("cityid", "int", "city"))); locationHierarchy.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state"))); locationHierarchy.add(new BaseDimAttribute(new FieldSchema("countryid", "int", "country"))); List<String> regions = Arrays.asList("APAC", "EMEA", "USA"); locationHierarchy.add(new BaseDimAttribute(new FieldSchema("regionname", "string", "region"), "regionname", null, null, null, null, regions)); cubeDimensions.add(new HierarchicalDimAttribute("location", "Location hierarchy", locationHierarchy)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim1", "string", "basedim"))); // Added for ambiguity test cubeDimensions.add(new BaseDimAttribute(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity"))); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("dim2", "int", "ref dim"), "dim2 refer", "dim2chain", "id", null, null, 0.0)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("cdim2", "int", "ref dim"), "Dim2 refer", NOW, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("urdimid", "int", "ref dim"), "urdim refer", null, null, 10.0)); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("unreachableName", "string", ""), "urdim name", "unreachableDim_chain", "name", null, null, 10.0)); // denormalized reference cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("dim2big1", "bigint", "ref dim"), "dim2 refer", "dim2chain", "bigid1", null, null, 0.0)); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("dim2big2", "bigint", "ref dim"), "dim2 refer", "dim2chain", "bigid2", null, null, 0.0)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("dim2bignew", "bigint", "ref dim"), "Dim2 refer", NOW, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_hour_id", "int", "ref dim"), "Timedim reference", null, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_day_id", "int", "ref dim"), "Timedim reference", null, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_hour_id2", "int", "ref dim"))); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("test_time_dim_day_id2", "int", "ref dim"))); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("testDim3id", "string", "direct id to testdim3"), "dim3 refer", "dim3chain", "id", null, null, 0.0)); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("cityname", "string", "city name"), "city name", "cubecity", "name", null, null, 0.0)); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("statename_cube", "string", "state name"), "state name", "cubestate", "name", null, null, 0.0)); List<ChainRefCol> references = new ArrayList<>(); references.add(new ChainRefCol("timedatechain1", "full_date")); references.add(new ChainRefCol("timehourchain1", "full_hour")); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("test_time_dim", "date", "ref dim"), "Timedim full date", references, null, null, null, null)); List<ChainRefCol> chainRefs = new ArrayList<>(); chainRefs.add(new ChainRefCol("timehourchain2", "full_hour")); chainRefs.add(new ChainRefCol("timedatechain2", "full_date")); cubeDimensions.add(new ReferencedDimAttribute(new FieldSchema("test_time_dim2", "date", "chained dim"), "Timedim full date", chainRefs, null, null, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("cityid1", "int", "id to city"), "City1", null, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("cityid2", "int", "id to city"), "City2", null, null, null)); cubeDimensions.add(new BaseDimAttribute(new FieldSchema("concatedcitystate", "string", "citystate"), "CityState", null, null, null)); Map<String, JoinChain> joinChains = new HashMap<>(); addCubeChains(joinChains, TEST_CUBE_NAME); exprs = new HashSet<ExprColumn>(); exprs.add(new ExprColumn(new FieldSchema("avgmsr", "double", "avg measure"), "Avg Msr", "avg(msr1 + msr2)")); exprs.add(new ExprColumn(new FieldSchema("singlecolmsr2expr", "double", "measure2"), "Msr2", "msr2)")); exprs.add(new ExprColumn(new FieldSchema("singlecolmsr2qualifiedexpr", "double", "testcube.measure2"), "Msr2", "testcube.msr2")); exprs.add(new ExprColumn(new FieldSchema("singlecoldim1expr", "string", "dim1"), "dim1", "dim1)")); exprs.add(new ExprColumn(new FieldSchema("singlecoldim1qualifiedexpr", "string", "testcube.dim1"), "dim1", "testcube.dim1")); exprs.add(new ExprColumn(new FieldSchema("singlecolchainid", "string", "dim3chain.id"), "dim3chainid", "dim3chain.id)")); exprs.add(new ExprColumn(new FieldSchema("singlecolchainrefexpr", "string", "testcube.testDim3id"), "dim3chainid", "testcube.testDim3id")); exprs.add(new ExprColumn(new FieldSchema("singlecolchainfield", "string", "cubecity.name"), "cubecityname", "cubecity.name")); exprs.add(new ExprColumn(new FieldSchema("summsrs", "double", "sum measures"), "Sum Msrs", "(1000 + sum(msr1) + sum(msr2))/100")); exprs.add(new ExprColumn(new FieldSchema("msr5", "double", "materialized in some facts"), "Fifth Msr", "msr2 + msr3")); exprs.add(new ExprColumn(new FieldSchema("msr8", "double", "measure expression"), "Sixth Msr", "msr2 + msr3")); exprs.add(new ExprColumn(new FieldSchema("msr7", "double", "measure expression"), "Seventh Msr", "case when sum(msr2) = 0 then 0 else sum(case when cityid='x' then msr21 else msr22 end)/sum(msr2) end")); exprs.add(new ExprColumn(new FieldSchema("equalsums", "double", "sums are equals"), "equalsums", new ExprSpec("msr3 + msr4", null, null), new ExprSpec("(msr3 + msr2)/100", null, null))); exprs.add(new ExprColumn(new FieldSchema("roundedmsr1", "double", "rounded measure1"), "Rounded msr1", "round(msr1/1000)")); exprs.add(new ExprColumn(new FieldSchema("roundedmsr2", "double", "rounded measure2"), "Rounded msr2", "round(msr2/1000)")); exprs.add(new ExprColumn(new FieldSchema("flooredmsr12", "double", "floored measure12"), "Floored msr12", "floor(msr12)")); exprs.add(new ExprColumn(new FieldSchema("nestedexpr", "double", "nested expr"), "Nested expr", new ExprSpec("avg(roundedmsr2)", null, null), new ExprSpec("avg(equalsums)", null, null), new ExprSpec("case when substrexpr = 'xyz' then avg(msr5) when substrexpr = 'abc' then avg(msr4)/100 end", null, null))); exprs.add(new ExprColumn(new FieldSchema("msr2expr", "double", "nested expr"), "Nested expr", new ExprSpec("case when cityStateName = 'xyz' then msr2 else 0 end", null, null))); exprs.add(new ExprColumn(new FieldSchema("nestedExprWithTimes", "double", "nested expr"), "Nested expr", new ExprSpec("avg(roundedmsr2)", null, null), new ExprSpec("avg(equalsums)", null, null), new ExprSpec("case when substrexpr = 'xyz' then avg(msr5) when substrexpr = 'abc' then avg(msr4)/100 end", NOW, null), new ExprSpec("avg(newmeasure)", null, null))); exprs.add(new ExprColumn(new FieldSchema("msr6", "bigint", "sixth measure"), "Measure6", "sum(msr2) + max(msr3)/ count(msr4)")); exprs.add(new ExprColumn(new FieldSchema("booleancut", "boolean", "a boolean expression"), "Boolean cut", "(dim1 != 'x' AND dim2 != 10)")); exprs.add(new ExprColumn(new FieldSchema("substrexpr", "string", "a sub-string expression"), "Substr expr", new ExprSpec("substr(dim1, 3))", null, null), new ExprSpec("substr(ascii(dim2chain.name), 3)", null, null))); exprs.add(new ExprColumn(new FieldSchema("substrexprdim2", "string", "a sub-string expression"), "Substr expr", new ExprSpec("substr(dim2, 3))", null, null), new ExprSpec("substr(ascii(dim2chain.name), 3)", null, null))); exprs.add(new ExprColumn(new FieldSchema("indiasubstr", "boolean", "nested sub string expression"), "Nested expr", "substrexpr = 'INDIA'")); exprs.add(new ExprColumn(new FieldSchema("refexpr", "string", "expression which facts and dimensions"), "Expr with cube and dim fields", "concat(dim1, \":\", citydim.name)")); exprs.add(new ExprColumn(new FieldSchema("nocolexpr", "string", "expression which non existing colun"), "No col expr", "myfun(nonexist)")); exprs.add(new ExprColumn(new FieldSchema("newexpr", "string", "expression which non existing colun"), "new measure expr", "myfun(newmeasure)")); exprs.add(new ExprColumn(new FieldSchema("cityAndState", "String", "city and state together"), "City and State", new ExprSpec("concat(cityname, \":\", statename_cube)", null, null), new ExprSpec("substr(concatedcitystate, 10)", null, null))); exprs.add(new ExprColumn(new FieldSchema("cityAndStateNew", "String", "city and state together"), "City and State", new ExprSpec("concat(cityname, \":\", statename_cube)", null, TWO_MONTHS_BACK), new ExprSpec("substr(concatedcitystate, 10)", null, null))); exprs.add(new ExprColumn(new FieldSchema("cityStateName", "String", "city state"), "City State", "concat('CityState:', cubecity.statename)")); exprs.add(new ExprColumn(new FieldSchema("isIndia", "String", "is indian city/state"), "Is Indian City/state", "cubecity.name == 'DELHI' OR cubestate.name == 'KARNATAKA' OR cubestate.name == 'MAHARASHTRA'")); exprs.add(new ExprColumn(new FieldSchema("cubeStateName", "String", "statename from cubestate"), "CubeState Name", "substr(cubestate.name, 5)")); exprs.add(new ExprColumn(new FieldSchema("substrdim2big1", "String", "substr of dim2big1"), "dim2big1 substr", "substr(dim2big1, 5)")); exprs.add(new ExprColumn(new FieldSchema("asciicity", "String", "ascii cityname"), "ascii cityname substr", "ascii(cityname)")); exprs.add(new ExprColumn(new FieldSchema("countofdistinctcityid", "int", "Count of Distinct CityId"), "Count of Distinct CityId Expr", "count(distinct(cityid))")); exprs.add(new ExprColumn(new FieldSchema("notnullcityid", "int", "Not null cityid"), "Not null cityid Expr", "case when cityid is null then 0 else cityid end")); Map<String, String> cubeProperties = new HashMap<String, String>(); cubeProperties.put(MetastoreUtil.getCubeTimedDimensionListKey(TEST_CUBE_NAME), "d_time,pt,it,et,test_time_dim,test_time_dim2"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim", "ttd"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim2", "ttd2"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "d_time", "dt"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "it", "it"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "et", "et"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "pt", "pt"); cubeProperties.put(MetastoreConstants.TIMEDIM_RELATION + "d_time", "test_time_dim+[-10 days,10 days]"); client.createCube(TEST_CUBE_NAME, cubeMeasures, cubeDimensions, exprs, Sets.newHashSet(joinChains.values()), cubeProperties); Set<String> measures = new HashSet<String>(); measures.add("msr1"); measures.add("msr2"); measures.add("msr3"); measures.add("msr9"); Set<String> dimensions = new HashSet<String>(); dimensions.add("dim1"); dimensions.add("dim2"); dimensions.add("dim2big1"); dimensions.add("dim2big2"); dimensions.add("dim2bignew"); // Try creating derived cube with non existant dim/measures try{ client.createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME, Sets.newHashSet("random_measure"), Sets.newHashSet("random_dim_attribute"), new HashMap<String, String>(), 5L); } catch(LensException e) { assertTrue(e.getMessage().contains("random_measure")); assertTrue(e.getMessage().contains("random_dim_attribute")); assertTrue(e.getMessage().contains("not present")); } client.createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME, measures, dimensions, new HashMap<String, String>(), 5L); } private void addCubeChains(Map<String, JoinChain> joinChains, final String cubeName) { joinChains.put("timehourchain1", new JoinChain("timehourchain1", "time chain", "time dim thru hour dim") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "test_time_dim_hour_id")); add(new TableReference("hourdim", "id")); } }); } }); joinChains.put("timedatechain1", new JoinChain("timedatechain1", "time chain", "time dim thru date dim") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "test_time_dim_day_id")); add(new TableReference("daydim", "id")); } }); } }); joinChains.put("timehourchain2", new JoinChain("timehourchain2", "time chain", "time dim thru hour dim") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "test_time_dim_hour_id2")); add(new TableReference("hourdim", "id")); } }); } }); joinChains.put("timedatechain2", new JoinChain("timedatechain2", "time chain", "time dim thru date dim") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "test_time_dim_day_id2")); add(new TableReference("daydim", "id")); } }); } }); joinChains.put("cubeCity", new JoinChain("cubeCity", "cube-city", "city thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "cityid")); add(new TableReference("citydim", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2")); add(new TableReference("testdim2", "id")); add(new TableReference("testdim2", "cityid")); add(new TableReference("citydim", "id")); } }); } }); joinChains.put("cubeCity1", new JoinChain("cubeCity1", "cube-city", "city thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "cityid1")); add(new TableReference("citydim", "id")); } }); } }); joinChains.put("cubeCity2", new JoinChain("cubeCity2", "cube-city", "city thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "cityid2")); add(new TableReference("citydim", "id")); } }); } }); joinChains.put("cubeState", new JoinChain("cubeState", "cube-state", "state thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "stateid")); add(new TableReference("statedim", "id")); } }); } }); joinChains.put("cubeZip", new JoinChain("cubeZip", "cube-zip", "Zipcode thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "zipcode")); add(new TableReference("zipdim", "code")); } }); } }); joinChains.put("cubeCountry", new JoinChain("cubeCountry", "cube-country", "country thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "countryid")); add(new TableReference("countrydim", "id")); } }); } }); joinChains.put("dim2chain", new JoinChain("dim2chain", "cube-testdim2", "testdim2 thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2")); add(new TableReference("testdim2", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2big1")); add(new TableReference("testdim2", "bigid1")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2big2")); add(new TableReference("testdim2", "bigid2")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2bignew")); add(new TableReference("testdim2", "bigidnew")); } }); } }); joinChains.put("dim3chain", new JoinChain("dim3chain", "cube-testdim3", "cyclicdim thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2")); add(new TableReference("testdim2", "id")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2big1")); add(new TableReference("testdim2", "bigid1")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2big2")); add(new TableReference("testdim2", "bigid2")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2bignew")); add(new TableReference("testdim2", "bigidnew")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "testdim3id")); add(new TableReference("testdim3", "id")); } }); } }); joinChains.put("dim4chain", new JoinChain("dim4chain", "cube-testdim3", "cyclicdim thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2")); add(new TableReference("testdim2", "id")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2big1")); add(new TableReference("testdim2", "bigid1")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2big2")); add(new TableReference("testdim2", "bigid2")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "dim2bignew")); add(new TableReference("testdim2", "bigidnew")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); } }); joinChains.put("cdimChain", new JoinChain("cdimChain", "cube-cyclicdim", "cyclicdim thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "cdim2")); add(new TableReference("cycledim1", "id")); } }); } }); joinChains.put("unreachableDim_chain", new JoinChain("unreachableDim_chain", "cube-unreachableDim", "unreachableDim thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "urdimid")); add(new TableReference("unreachableDim", "id")); } }); } }); joinChains.put("cubeCountry", new JoinChain("cubeCountry", "cube-country", "country thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference(cubeName, "countryid")); add(new TableReference("countrydim", "id")); } }); } }); } private void createBaseAndDerivedCubes(CubeMetastoreClient client) throws HiveException, ParseException, LensException { Set<CubeMeasure> cubeMeasures2 = new HashSet<>(cubeMeasures); Set<CubeDimAttribute> cubeDimensions2 = new HashSet<>(cubeDimensions); cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr11", "int", "first measure"))); cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr12", "float", "second measure"), "Measure2", null, "SUM", "RS")); cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr13", "double", "third measure"), "Measure3", null, "MAX", null)); cubeMeasures2.add(new ColumnMeasure(new FieldSchema("msr14", "bigint", "fourth measure"), "Measure4", null, "COUNT", null)); cubeMeasures2.add(new ColumnMeasure(new FieldSchema("directMsr", "bigint", "fifth measure"), "Direct Measure", null, "SUM", null)); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("dim11", "string", "basedim"))); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("dim12", "int", "ref dim"), "Dim2 refer", "dim2chain", "id", null, null, null)); // used as key in the chains cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("dim22", "int", "ref dim"), "Dim2 refer", "dim2chain", "id", null, null, null)); // not used as key in the chains cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("dim13", "string", "basedim"))); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("userid", "int", "userid"))); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("xuserid", "int", "userid"))); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("yuserid", "int", "userid"))); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("user_id_added_in_past", "int", "user_id_added_in_past"))); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("user_id_added_far_future", "int", "user_id_added_far_future"))); cubeDimensions2.add(new BaseDimAttribute(new FieldSchema("user_id_deprecated", "int", "user_id_deprecated"))); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("xsports", "array<string>", ""), "xuser sports", "xusersports", "name", null, null, null)); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("ysports", "array<string>", ""), "yuser sports", "yusersports", "name", null, null, null)); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("sports", "array<string>", ""), "user sports", "usersports", "name", null, null, null)); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("sportids", "array<int>", ""), "user sports", "userInterestIds", "sport_id", null, null, null)); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("statecountry", "string", ""), "state country", "cubestatecountry", "name", null, null, null)); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("citycountry", "string", ""), "city country", "cubecitystatecountry", "name", null, null, null)); List<ChainRefCol> refCols = new ArrayList<>(); refCols.add(new ChainRefCol("cubeState", "countrycapital")); refCols.add(new ChainRefCol("cubeCityStateCountry", "capital")); cubeDimensions2.add(new ReferencedDimAttribute(new FieldSchema("cubeCountryCapital", "String", "ref dim"), "Country capital", refCols, null, null, null, null)); Map<String, String> cubeProperties = new HashMap<>(); cubeProperties.put(MetastoreUtil.getCubeTimedDimensionListKey(BASE_CUBE_NAME), "d_time,pt,it,et,test_time_dim,test_time_dim2"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim", "ttd"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "test_time_dim2", "ttd2"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "d_time", "dt"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "it", "it"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "et", "et"); cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "processing_time", "pt"); cubeProperties.put(MetastoreConstants.TIMEDIM_RELATION + "d_time", "processing_time+[-5 days,5 days]"); cubeProperties.put(MetastoreConstants.TIMEDIM_RELATION + "processing_time", "test_time_dim+[-5 days,5 days]"); cubeProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "false"); Map<String, JoinChain> joinChainMap = new HashMap<>(); addCubeChains(joinChainMap, "basecube"); // update new paths joinChainMap.get("dim2chain").addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "dim12")); add(new TableReference("testdim2", "id")); } }); joinChainMap.get("dim3chain").addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "dim12")); add(new TableReference("testdim2", "id")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); } }); joinChainMap.get("dim4chain").addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "dim12")); add(new TableReference("testdim2", "id")); add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); Set<JoinChain> joinChains = Sets.newHashSet(joinChainMap.values()); joinChains.add(new JoinChain("cityState", "city-state", "state thru city") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "cityid")); add(new TableReference("citydim", "id")); add(new TableReference("citydim", "stateid")); add(new TableReference("statedim", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "cityid")); add(new TableReference("citydim", "id")); add(new TableReference("citydim", "statename")); add(new TableReference("statedim", "name")); } }); } }); joinChains.add(new JoinChain("cityZip", "city-zip", "zip thru city") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "cityid")); add(new TableReference("citydim", "id")); add(new TableReference("citydim", "zipcode")); add(new TableReference("zipdim", "code")); } }); } }); joinChains.add(new JoinChain("cubeStateCountry", "cube-state-country", "country through state") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "stateid")); add(new TableReference("statedim", "id")); add(new TableReference("statedim", "countryid")); add(new TableReference("countrydim", "id")); } }); } }); joinChains.add(new JoinChain("cubeCityStateCountry", "cube-city-state-country", "country through state thru city") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "cityid")); add(new TableReference("citydim", "id")); add(new TableReference("citydim", "stateid")); add(new TableReference("statedim", "id")); add(new TableReference("statedim", "countryid")); add(new TableReference("countrydim", "id")); } }); } }); joinChains.add(new JoinChain("userchain", "user-chain", "user chain") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "userid")); add(new TableReference("userdim", "id")); } }); } }); joinChains.add(new JoinChain("user_id_added_far_future_chain", "user_id_added_far_future_chain", "user_id_added_far_future_chain") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "user_id_added_far_future")); add(new TableReference("userdim", "user_id_added_far_future")); } }); } }); joinChains.add(new JoinChain("userSports", "user-sports", "user sports") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "userid")); add(new TableReference("userdim", "id")); add(new TableReference("userdim", "id")); add(new TableReference("user_interests", "user_id", true)); add(new TableReference("user_interests", "sport_id")); add(new TableReference("sports", "id")); } }); } }); joinChains.add(new JoinChain("userInterestIds", "user-interestsIds", "user interest ids") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "userid")); add(new TableReference("userdim", "id")); add(new TableReference("userdim", "id")); add(new TableReference("user_interests", "user_id", true)); } }); } }); joinChains.add(new JoinChain("xuserSports", "xuser-sports", "xuser sports") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "xuserid")); add(new TableReference("userdim", "id")); add(new TableReference("userdim", "id")); add(new TableReference("user_interests", "user_id", true)); add(new TableReference("user_interests", "sport_id")); add(new TableReference("sports", "id")); } }); } }); joinChains.add(new JoinChain("yuserSports", "user-sports", "user sports") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("basecube", "yuserid")); add(new TableReference("userdim", "id")); add(new TableReference("userdim", "id")); add(new TableReference("user_interests", "user_id", true)); add(new TableReference("user_interests", "sport_id")); add(new TableReference("sports", "id")); } }); } }); // add ref dim through chain cubeDimensions2.add( new ReferencedDimAttribute(new FieldSchema("cityStateCapital", "string", "State's capital thru city"), "State's capital thru city", "cityState", "capital", null, null, null)); Set<ExprColumn> baseExprs = new HashSet<>(exprs); baseExprs.add(new ExprColumn(new FieldSchema("substrsprorts", "String", "substr of sports"), "substr sports", "substr(sports, 10)")); baseExprs.add(new ExprColumn(new FieldSchema("xsports_abbr", "array<string>", ""), "xuser sports", "substr(xsports, 3)")); baseExprs.add(new ExprColumn(new FieldSchema("ysports_abbr", "array<string>", ""), "yuser sports", "substr(ysports, 3)")); baseExprs.add(new ExprColumn(new FieldSchema("sports_abbr", "array<string>", ""), "user sports", "substr(sports, 3)")); baseExprs.add(new ExprColumn(new FieldSchema("sportids_abbr", "array<string>", ""), "user sports", "case when sportids == 1 then 'CKT' when sportids == 2 then 'FTB' else 'NON' end")); baseExprs.add(new ExprColumn(new FieldSchema("directMsrExpr", "bigint", ""), "Direct Measure", new ExprSpec("directMsr + 0", null, null), new ExprSpec("msr13 + msr14", null, null))); client.createCube(BASE_CUBE_NAME, cubeMeasures2, cubeDimensions2, baseExprs, joinChains, cubeProperties); Map<String, String> derivedProperties = new HashMap<>(); derivedProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "true"); Set<String> measures = new HashSet<>(); measures.add("msr1"); measures.add("msr9"); measures.add("msr11"); Set<String> dimensions = new HashSet<>(); dimensions.add("dim1"); dimensions.add("dim11"); dimensions.add("d_time"); client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 5L); measures = new HashSet<>(); measures.add("msr2"); measures.add("msr12"); measures.add("msr13"); measures.add("msr14"); measures.add("directmsr"); dimensions = new HashSet<>(); dimensions.add("cityid"); dimensions.add("stateid"); dimensions.add("userid"); dimensions.add("xuserid"); dimensions.add("yuserid"); dimensions.add("dim1"); dimensions.add("dim2"); dimensions.add("dim2big1"); dimensions.add("dim2big2"); dimensions.add("dim2bignew"); dimensions.add("dim11"); dimensions.add("dim13"); dimensions.add("dim12"); dimensions.add("dim22"); dimensions.add("d_time"); dimensions.add("test_time_dim"); dimensions.add("test_time_dim2"); dimensions.add("test_time_dim_hour_id"); dimensions.add("test_time_dim_day_id"); dimensions.add("test_time_dim_hour_id2"); dimensions.add("test_time_dim_day_id2"); client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME2, measures, dimensions, derivedProperties, 10L); measures = new HashSet<>(); measures.add("msr3"); measures.add("msr13"); dimensions = new HashSet<>(); dimensions.add("dim1"); dimensions.add("location"); dimensions.add("d_time"); dimensions.add("test_time_dim"); dimensions.add("test_time_dim2"); dimensions.add("test_time_dim_hour_id"); dimensions.add("test_time_dim_day_id"); dimensions.add("test_time_dim_hour_id2"); dimensions.add("test_time_dim_day_id2"); client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME3, measures, dimensions, derivedProperties, 20L); // create base cube facts createBaseCubeFacts(client); } private void createBaseCubeFacts(CubeMetastoreClient client) throws HiveException, LensException { Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(MINUTELY); updates.add(HOURLY); updates.add(DAILY); updates.add(MONTHLY); updates.add(QUARTERLY); updates.add(YEARLY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>(); s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2.setPartCols(s2PartCols); s2.setTimePartCols(Arrays.asList("ttd", "ttd2")); storageAggregatePeriods.put(c1, updates); storageAggregatePeriods.put(c2, updates); storageAggregatePeriods.put(c3, updates); storageAggregatePeriods.put(c4, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c4, s2); storageTables.put(c2, s1); storageTables.put(c3, s1); String factName = "testFact1_BASE"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); } // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("zipcode", "int", "zip")); factColumns.add(new FieldSchema("cityid", "int", "city id")); factColumns.add(new FieldSchema("stateid", "int", "state id")); factColumns.add(new FieldSchema("userid", "int", "user id")); factColumns.add(new FieldSchema("xuserid", "int", "user id")); factColumns.add(new FieldSchema("yuserid", "int", "user id")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("dim11", "string", "base dim")); factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id")); // create cube fact with materialized expressions client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, factValidityProperties, storageTables); factName = "testFact5_BASE"; factColumns = new ArrayList<>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); } // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("booleancut", "boolean", "expr dim")); // create cube fact client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 150L, factValidityProperties, storageTables); // create fact only with extra measures factName = "testFact2_BASE"; factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr12", "float", "second measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("dim11", "string", "base dim")); factColumns.add(new FieldSchema("dim2", "int", "dim2 id")); factColumns.add(new FieldSchema("userid", "int", "user id")); factColumns.add(new FieldSchema("xuserid", "int", "user id")); factColumns.add(new FieldSchema("yuserid", "int", "user id")); // create cube fact client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, factValidityProperties, storageTables); Map<String, String> properties = Maps.newHashMap(factValidityProperties); properties.put(MetastoreConstants.FACT_ABSOLUTE_END_TIME, DateUtil.relativeToAbsolute("now.day - 2 days")); properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 3 days")); client.createCubeFactTable(BASE_CUBE_NAME, "testfact_deprecated", factColumns, storageAggregatePeriods, 5L, properties, storageTables); // create fact only with extra measures factName = "testFact3_BASE"; factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr13", "double", "third measure")); factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("dim11", "string", "base dim")); // create cube fact client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, factValidityProperties, storageTables); // create fact with materialized expression factName = "testFact6_BASE"; factColumns = new ArrayList<>(); factColumns.add(new FieldSchema("msr13", "double", "third measure")); factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("booleancut", "boolean", "expr dim")); // create cube fact client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 150L, factValidityProperties, storageTables); // create raw fact only with extra measures factName = "testFact2_RAW_BASE"; factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr11", "int", "first measure")); factColumns.add(new FieldSchema("msr12", "float", "second measure")); factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("dim11", "string", "base dim")); factColumns.add(new FieldSchema("dim13", "string", "base dim")); factColumns.add(new FieldSchema("dim12", "string", "base dim")); factColumns.add(new FieldSchema("dim22", "string", "base dim")); factColumns.add(new FieldSchema("cityid", "int", "city id")); storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); updates = new HashSet<UpdatePeriod>(); updates.add(HOURLY); storageAggregatePeriods.put(c1, updates); storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); // create cube fact properties.clear(); properties.putAll(factValidityProperties); properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false"); properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2"); client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); // create raw fact only with extra measures factName = "testFact3_RAW_BASE"; factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr13", "double", "third measure")); factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("dim11", "string", "base dim")); factColumns.add(new FieldSchema("dim12", "string", "base dim")); storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); updates = new HashSet<UpdatePeriod>(); updates.add(HOURLY); storageAggregatePeriods.put(c1, updates); storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_in_past"), "2016-01-01"); properties.put(MetastoreConstants.FACT_COL_END_TIME_PFX.concat("user_id_deprecated"), "2016-01-01"); properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_far_future"), "2099-01-01"); client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); factName = "testFact4_RAW_BASE"; factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr13", "double", "third measure")); factColumns.add(new FieldSchema("msr14", "bigint", "fourth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("user_id_added_in_past", "int", "user id")); factColumns.add(new FieldSchema("user_id_added_far_future", "int", "user id")); factColumns.add(new FieldSchema("user_id_deprecated", "int", "user id")); storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_in_past"), "2016-01-01"); properties.put(MetastoreConstants.FACT_COL_END_TIME_PFX.concat("user_id_deprecated"), "2016-01-01"); properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_far_future"), "2099-01-01"); client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); factName = "testFact5_RAW_BASE"; factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); properties.clear(); properties.putAll(factValidityProperties); properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false"); properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2"); client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); CubeFactTable fact = client.getFactTable(factName); // Add all hourly partitions for two days Calendar cal = Calendar.getInstance(); cal.setTime(TWODAYS_BACK); Date temp = cal.getTime(); while (!(temp.after(NOW))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put("dt", temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); client.addPartition(sPartSpec, c1, CubeTableType.FACT); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } } private void createCubeContinuousFact(CubeMetastoreClient client) throws Exception { // create continuous raw fact only with extra measures String factName = "testFact_CONTINUOUS"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr11", "double", "third measure")); factColumns.add(new FieldSchema("msr15", "int", "fifteenth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); factColumns.add(new FieldSchema("dim1", "string", "base dim")); factColumns.add(new FieldSchema("dim11", "string", "base dim")); factColumns.add(new FieldSchema("dim12", "string", "base dim")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(CONTINUOUS); storageAggregatePeriods.put(c0, updates); StorageTableDesc s0 = new StorageTableDesc(); s0.setInputFormat(TextInputFormat.class.getCanonicalName()); s0.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c0, s0); Map<String, String> properties = Maps.newHashMap(factValidityProperties); properties.put(MetastoreConstants.FACT_ABSOLUTE_START_TIME, DateUtil.relativeToAbsolute("now.day - 3 days")); client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); } private void createCubeFact(CubeMetastoreClient client) throws Exception { String factName = "testFact"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { if (!measure.getColumn().getName().equals("msr15")) { //do not add msr15 factColumns.add(measure.getColumn()); } } factColumns.add(new FieldSchema("msr5", "double", "msr5")); // add dimensions of the cube factColumns.add(new FieldSchema("zipcode", "int", "zip")); factColumns.add(new FieldSchema("cityid", "int", "city id")); factColumns.add(new FieldSchema("cityid1", "int", "city id")); factColumns.add(new FieldSchema("stateid", "int", "city id")); factColumns.add(new FieldSchema("test_time_dim_day_id", "int", "time id")); factColumns.add(new FieldSchema("test_time_dim_day_id2", "int", "time id")); factColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(MINUTELY); updates.add(HOURLY); updates.add(DAILY); updates.add(MONTHLY); updates.add(QUARTERLY); updates.add(YEARLY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>(); s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2.setPartCols(s2PartCols); s2.setTimePartCols(Arrays.asList("ttd", "ttd2")); StorageTableDesc s3 = new StorageTableDesc(); s3.setInputFormat(TextInputFormat.class.getCanonicalName()); s3.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s3.setPartCols(partCols); s3.setTimePartCols(timePartCols); s3.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "now.day - 90 days"); s3.getTblProps().put(MetastoreUtil.getStoragetableEndTimesKey(), "now.day - 10 days"); StorageTableDesc s5 = new StorageTableDesc(); s5.setInputFormat(TextInputFormat.class.getCanonicalName()); s5.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s5.setPartCols(partCols); s5.setTimePartCols(timePartCols); s5.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "now.day - 10 days"); storageAggregatePeriods.put(c1, updates); storageAggregatePeriods.put(c2, updates); storageAggregatePeriods.put(c3, updates); storageAggregatePeriods.put(c4, updates); storageAggregatePeriods.put(c5, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c4, s2); storageTables.put(c2, s1); storageTables.put(c3, s3); storageTables.put(c5, s5); //add storage with continuous update period updates.add(CONTINUOUS); storageAggregatePeriods.put(c0, updates); StorageTableDesc s0 = new StorageTableDesc(); s0.setInputFormat(TextInputFormat.class.getCanonicalName()); s0.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); storageTables.put(c0, s0); // create cube fact client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, factValidityProperties, storageTables); client.getTimelines(factName, c1, null, null); client.getTimelines(factName, c4, null, null); client.clearHiveTableCache(); CubeFactTable fact = client.getFactTable(factName); Table table = client.getTable(MetastoreUtil.getStorageTableName(fact.getName(), Storage.getPrefix(c1))); assertEquals(table.getParameters().get(MetastoreUtil.getPartitionTimelineCachePresenceKey()), "true"); for (UpdatePeriod period : Lists.newArrayList(MINUTELY, HOURLY, DAILY, MONTHLY, YEARLY, QUARTERLY)) { for (String partCol : Lists.newArrayList("dt")) { assertTimeline(client, fact.getName(), c1, period, partCol, EndsAndHolesPartitionTimeline.class); } } table = client.getTable(MetastoreUtil.getStorageTableName(fact.getName(), Storage.getPrefix(c4))); assertEquals(table.getParameters().get(MetastoreUtil.getPartitionTimelineCachePresenceKey()), "true"); for (UpdatePeriod period : Lists.newArrayList(MINUTELY, HOURLY, DAILY, MONTHLY, YEARLY, QUARTERLY)) { for (String partCol : Lists.newArrayList("ttd", "ttd2")) { assertTimeline(client, fact.getName(), c4, period, partCol, EndsAndHolesPartitionTimeline.class); } } } private void assertTimeline(CubeMetastoreClient client, String factName, String storageName, UpdatePeriod updatePeriod, String timeDim, PartitionTimeline expectedTimeline) throws Exception { assertNotNull(factName); assertNotNull(storageName); assertNotNull(updatePeriod); assertNotNull(timeDim); String storageTableName = MetastoreUtil.getFactOrDimtableStorageTableName(factName, storageName); List<PartitionTimeline> timelines = client.getTimelines(factName, storageName, updatePeriod.name(), timeDim); assertEquals(timelines.size(), 1); PartitionTimeline actualTimeline = timelines.get(0); assertEquals(actualTimeline, expectedTimeline); assertEquals(client.getTable(storageTableName).getParameters() .get(MetastoreUtil.getPartitionTimelineStorageClassKey(updatePeriod, timeDim)), expectedTimeline.getClass().getCanonicalName()); expectedTimeline.init(client.getTable(MetastoreUtil.getFactOrDimtableStorageTableName(factName, storageName))); assertEquals(actualTimeline, expectedTimeline); } private void assertTimeline(CubeMetastoreClient client, String factName, String storageName, UpdatePeriod updatePeriod, String timeDim, Class<? extends PartitionTimeline> partitionTimelineClass) throws Exception { String storageTableName = MetastoreUtil.getFactOrDimtableStorageTableName(factName, storageName); PartitionTimeline expectedTimeline = partitionTimelineClass.getConstructor( String.class, UpdatePeriod.class, String.class) .newInstance(storageTableName, updatePeriod, timeDim); assertTimeline(client, factName, storageName, updatePeriod, timeDim, expectedTimeline); } private void createCubeCheapFact(CubeMetastoreClient client) throws HiveException, LensException { String factName = "cheapFact"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); } // add dimensions of the cube factColumns.add(new FieldSchema("zipcode", "int", "zip")); factColumns.add(new FieldSchema("cityid", "int", "city id")); factColumns.add(new FieldSchema("stateid", "int", "city id")); factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id")); factColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(MINUTELY); updates.add(HOURLY); updates.add(DAILY); updates.add(MONTHLY); updates.add(QUARTERLY); updates.add(YEARLY); ArrayList<FieldSchema> partCols = new ArrayList<>(); List<String> timePartCols = new ArrayList<>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); s1.setTblProps(new HashMap<String, String>()); s1.getTblProps().put(MetastoreUtil.getStoragetableStartTimesKey(), "2000, now - 10 years"); s1.getTblProps().put(MetastoreUtil.getStoragetableEndTimesKey(), "now - 5 years, 2010"); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); ArrayList<FieldSchema> s2PartCols = new ArrayList<>(); s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2.setPartCols(s2PartCols); s2.setTimePartCols(Arrays.asList("ttd", "ttd2")); storageAggregatePeriods.put(c99, updates); storageAggregatePeriods.put(c0, updates); Map<String, StorageTableDesc> storageTables = new HashMap<>(); storageTables.put(c99, s2); storageTables.put(c0, s1); // create cube fact client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, factValidityProperties, storageTables); CubeFactTable fact = client.getFactTable(factName); // Add all hourly partitions for two days Calendar cal = Calendar.getInstance(); cal.setTime(TWODAYS_BACK); Date temp = cal.getTime(); while (!(temp.after(NOW))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put("ttd", temp); timeParts.put("ttd2", temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); client.addPartition(sPartSpec, c99, CubeTableType.FACT); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } // Add all hourly partitions for TWO_DAYS_RANGE_BEFORE_4_DAYS cal.setTime(BEFORE_6_DAYS); temp = cal.getTime(); while (!(temp.after(BEFORE_4_DAYS))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put("ttd", temp); timeParts.put("ttd2", temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); client.addPartition(sPartSpec, c99, CubeTableType.FACT); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } } private void createCubeFactWeekly(CubeMetastoreClient client) throws Exception { String factName = "testFactWeekly"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); } // add dimensions of the cube factColumns.add(new FieldSchema("zipcode", "int", "zip")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(WEEKLY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); storageAggregatePeriods.put(c1, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); // create cube fact client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, factValidityProperties, storageTables); } private void createCubeFactOnlyHourly(CubeMetastoreClient client) throws Exception { String factName = "testFact2"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { if (!measure.getName().equals("msr4")) { factColumns.add(measure.getColumn()); } } // add dimensions of the cube factColumns.add(new FieldSchema("zipcode", "int", "zip")); factColumns.add(new FieldSchema("cityid", "int", "city id")); factColumns.add(new FieldSchema("cityid2", "int", "city id")); factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id")); factColumns.add(new FieldSchema("test_time_dim_hour_id2", "int", "time id")); factColumns.add(new FieldSchema("cdim2", "int", "cycledim id")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(HOURLY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>(); s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2PartCols.add(new FieldSchema("ttd2", serdeConstants.STRING_TYPE_NAME, "test date partition")); s2.setPartCols(s2PartCols); s2.setTimePartCols(Arrays.asList("ttd", "ttd2")); storageAggregatePeriods.put(c1, updates); storageAggregatePeriods.put(c4, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c4, s2); // create cube fact client .createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 10L, factValidityProperties, storageTables); CubeFactTable fact = client.getFactTable(factName); // Add all hourly partitions for two days Calendar cal = Calendar.getInstance(); cal.setTime(TWODAYS_BACK); Date temp = cal.getTime(); while (!(temp.after(NOW))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put(TestCubeMetastoreClient.getDatePartitionKey(), temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); try { client.addPartition(sPartSpec, c1, CubeTableType.FACT); } catch (HiveException e) { log.error("Encountered Hive exception.", e); } catch (LensException e) { log.error("Encountered Lens exception.", e); } cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } // Add all hourly partitions for TWO_DAYS_RANGE_BEFORE_4_DAYS cal.setTime(BEFORE_6_DAYS); temp = cal.getTime(); while (!(temp.after(BEFORE_4_DAYS))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put(TestCubeMetastoreClient.getDatePartitionKey(), temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); client.addPartition(sPartSpec, c1, CubeTableType.FACT); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } client.clearHiveTableCache(); Table table = client.getTable(MetastoreUtil.getStorageTableName(fact.getName(), Storage.getPrefix(c4))); table.getParameters().put(MetastoreUtil.getPartitionTimelineStorageClassKey(HOURLY, "ttd"), StoreAllPartitionTimeline.class.getCanonicalName()); table.getParameters().put(MetastoreUtil.getPartitionTimelineStorageClassKey(HOURLY, "ttd2"), StoreAllPartitionTimeline.class.getCanonicalName()); client.pushHiveTable(table); // Add all hourly partitions for two days on C4 cal = Calendar.getInstance(); cal.setTime(TWODAYS_BACK); temp = cal.getTime(); List<StoragePartitionDesc> storagePartitionDescs = Lists.newArrayList(); List<String> partitions = Lists.newArrayList(); StoreAllPartitionTimeline ttdStoreAll = new StoreAllPartitionTimeline(MetastoreUtil.getFactOrDimtableStorageTableName(fact.getName(), c4), HOURLY, "ttd"); StoreAllPartitionTimeline ttd2StoreAll = new StoreAllPartitionTimeline(MetastoreUtil.getFactOrDimtableStorageTableName(fact.getName(), c4), HOURLY, "ttd2"); while (!(temp.after(NOW))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put("ttd", temp); timeParts.put("ttd2", temp); TimePartition tp = TimePartition.of(HOURLY, temp); ttdStoreAll.add(tp); ttd2StoreAll.add(tp); partitions.add(HOURLY.format(temp)); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); storagePartitionDescs.add(sPartSpec); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } client.addPartitions(storagePartitionDescs, c4, CubeTableType.FACT); client.clearHiveTableCache(); table = client.getTable(MetastoreUtil.getStorageTableName(fact.getName(), Storage.getPrefix(c4))); assertEquals(table.getParameters().get(MetastoreUtil.getPartitionTimelineCachePresenceKey()), "true"); assertTimeline(client, fact.getName(), c4, HOURLY, "ttd", ttdStoreAll); assertTimeline(client, fact.getName(), c4, HOURLY, "ttd2", ttd2StoreAll); // Add all hourly partitions for TWO_DAYS_RANGE_BEFORE_4_DAYS cal.setTime(BEFORE_6_DAYS); temp = cal.getTime(); while (!(temp.after(BEFORE_4_DAYS))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put("ttd", temp); timeParts.put("ttd2", temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); client.addPartition(sPartSpec, c4, CubeTableType.FACT); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } } private void createCubeFactOnlyHourlyRaw(CubeMetastoreClient client) throws HiveException, LensException { String factName = "testFact2_raw"; String factName2 = "testFact1_raw_BASE"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); } // add dimensions of the cube factColumns.add(new FieldSchema("zipcode", "int", "zip")); factColumns.add(new FieldSchema("cityid", "int", "city id")); factColumns.add(new FieldSchema("cityid1", "int", "city id")); factColumns.add(new FieldSchema("cityid2", "int", "city id")); factColumns.add(new FieldSchema("stateid", "int", "state id")); factColumns.add(new FieldSchema("countryid", "int", "country id")); factColumns.add(new FieldSchema("dim1", "string", "dim1")); factColumns.add(new FieldSchema("dim2", "int", "dim2")); factColumns.add(new FieldSchema("concatedCityState", "string", "citystate")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(HOURLY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); storageAggregatePeriods.put(c1, updates); storageAggregatePeriods.put(c3, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c3, s1); // create cube fact Map<String, String> properties = new HashMap<String, String>(); properties.putAll(factValidityProperties); properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false"); properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f1"); client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); client.createCubeFactTable(BASE_CUBE_NAME, factName2, factColumns, storageAggregatePeriods, 100L, properties, storageTables); CubeFactTable fact2 = client.getFactTable(factName); // Add all hourly partitions for two days Calendar cal = Calendar.getInstance(); cal.setTime(TWODAYS_BACK); Date temp = cal.getTime(); while (!(temp.after(NOW))) { Map<String, Date> timeParts = new HashMap<String, Date>(); timeParts.put(TestCubeMetastoreClient.getDatePartitionKey(), temp); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact2.getName(), timeParts, null, HOURLY); client.addPartition(sPartSpec, c3, CubeTableType.FACT); cal.add(HOUR_OF_DAY, 1); temp = cal.getTime(); } } private void createCubeFactMonthly(CubeMetastoreClient client) throws Exception { String factName = "testFactMonthly"; List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); } // add one dimension of the cube factColumns.add(new FieldSchema("countryid", "int", "country id")); Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>(); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(MONTHLY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); storageAggregatePeriods.put(c2, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c2, s1); // create cube fact client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, factValidityProperties, storageTables); } // DimWithTwoStorages private void createCityTable(CubeMetastoreClient client) throws Exception { Set<CubeDimAttribute> cityAttrs = new HashSet<CubeDimAttribute>(); cityAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); cityAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "city name"))); cityAttrs.add(new BaseDimAttribute(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity"))); cityAttrs.add(new BaseDimAttribute(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity"))); cityAttrs.add(new BaseDimAttribute(new FieldSchema("nocandidatecol", "string", "used in testing no" + " candidate available"))); cityAttrs.add(new BaseDimAttribute(new FieldSchema("stateid", "int", "state id"))); cityAttrs.add(new ReferencedDimAttribute(new FieldSchema("statename", "string", "state name"), "State name", "citystate", "name", null, null, null, null)); cityAttrs.add(new BaseDimAttribute(new FieldSchema("zipcode", "int", "zip code"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey("citydim"), TestCubeMetastoreClient.getDatePartitionKey()); Set<ExprColumn> exprs = new HashSet<ExprColumn>(); exprs.add(new ExprColumn(new FieldSchema("CityAddress", "string", "city with state and city and zip"), "City Address", new ExprSpec("concat(citydim.name, \":\", citystate.name, \":\", citycountry.name, \":\", cityzip.code)", null, null), new ExprSpec("concat(citydim.name, \":\", citystate.name)", null, null))); exprs.add(new ExprColumn(new FieldSchema("CityState", "string", "city's state"), "City State", new ExprSpec("concat(citydim.name, \":\", citydim.statename)", null, null))); exprs.add(new ExprColumn(new FieldSchema("AggrExpr", "int", "count(name)"), "city count", new ExprSpec("count(name)", null, null))); Set<JoinChain> joinchains = new HashSet<JoinChain>() { { add(new JoinChain("cityState", "city-state", "state thru city") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("citydim", "stateid")); add(new TableReference("statedim", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference("citydim", "statename")); add(new TableReference("statedim", "name")); } }); } }); } }; joinchains.add(new JoinChain("cityCountry", "cube-zip", "country thru city") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("citydim", "stateid")); add(new TableReference("statedim", "id")); add(new TableReference("statedim", "countryid")); add(new TableReference("countrydim", "id")); } }); addPath(new ArrayList<TableReference>() { { add(new TableReference("citydim", "statename")); add(new TableReference("statedim", "name")); add(new TableReference("statedim", "countryid")); add(new TableReference("countrydim", "id")); } }); } }); joinchains.add(new JoinChain("cityZip", "city-zip", "Zipcode thru city") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("citydim", "zipcode")); add(new TableReference("zipdim", "code")); } }); } }); Dimension cityDim = new Dimension("citydim", cityAttrs, exprs, joinchains, dimProps, 0L); client.createDimension(cityDim); String dimName = "citytable"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("stateid", "int", "state id")); dimColumns.add(new FieldSchema("zipcode", "int", "zip code")); dimColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity")); dimColumns.add(new FieldSchema("ambigdim2", "string", "used in " + "testColumnAmbiguity")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); Map<String, String> tblPros = Maps.newHashMap(); tblPros.put(LensConfConstants.STORAGE_COST, "100"); s1.setTblProps(tblPros); dumpPeriods.put(c1, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c2, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); dimName = "citytable2"; dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("stateid", "int", "state id")); dumpPeriods = new HashMap<String, UpdatePeriod>(); storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c4, s2); dumpPeriods.put(c4, null); client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); dimName = "citytable3"; dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "name")); client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); dimName = "citytable4"; dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); client.createCubeDimensionTable(cityDim.getName(), dimName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createTestDim2(CubeMetastoreClient client) throws Exception { String dimName = "testDim2"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); Set<JoinChain> joinchains = new HashSet<>(); JoinChain cityState = new JoinChain("cityState", "city-state", "state thru city"); List<TableReference> statePaths1 = new ArrayList<>(); statePaths1.add(new TableReference("testDim2", "cityid")); statePaths1.add(new TableReference("citydim", "id")); statePaths1.add(new TableReference("citydim", "stateid")); statePaths1.add(new TableReference("statedim", "id")); cityState.addPath(statePaths1); List<TableReference> statePaths2 = new ArrayList<TableReference>(); statePaths2.add(new TableReference("testDim2", "cityid")); statePaths2.add(new TableReference("citydim", "id")); statePaths2.add(new TableReference("citydim", "statename")); statePaths2.add(new TableReference("statedim", "name")); cityState.addPath(statePaths2); joinchains.add(cityState); joinchains.add(new JoinChain("dim2city", "dim2-city", "city thru dim2") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("testdim2", "cityid")); add(new TableReference("citydim", "id")); } }); } }); joinchains.add(new JoinChain("dim3chain", "dim3-chain", "dim3 thru dim2") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("testdim2", "testDim3id")); add(new TableReference("testdim3", "id")); } }); } }); joinchains.add(new JoinChain("unreachableDim_chain", "dim2-unreachableDim", "unreachableDim thru dim2") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("testdim2", "urdimid")); add(new TableReference("unreachableDim", "id")); } }); } }); joinchains.add(new JoinChain("dim4chain", "cube-testdim3", "cyclicdim thru cube") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("testdim2", "testdim3id")); add(new TableReference("testdim3", "id")); add(new TableReference("testdim3", "testdim4id")); add(new TableReference("testdim4", "id")); } }); } }); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("bigid1", "bigint", "big id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("bigid2", "bigint", "big id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("bigidnew", "bigint", "big id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new ReferencedDimAttribute(new FieldSchema("testDim3id", "string", "f-key to testdim3"), "dim3 refer", "dim3chain", "id", null, null, 0.0)); dimAttrs.add(new BaseDimAttribute(new FieldSchema("cityId", "string", "f-key to citydim"))); dimAttrs.add(new ReferencedDimAttribute(new FieldSchema("cityname", "string", "name"), "cityname", "dim2city", "name", null, null, 0.0)); dimAttrs.add(new BaseDimAttribute(new FieldSchema("urdimid", "int", "ref dim"), "urdim refer", null, null, 10.0)); dimAttrs.add(new ReferencedDimAttribute(new FieldSchema("unreachableName", "string", ""), "urdim name", "unreachableDim_chain", "name", null, null, 10.0)); // add ref dim through chain dimAttrs.add(new ReferencedDimAttribute( new FieldSchema("cityStateCapital", "string", "State's capital thru city"), "State's capital thru city", "cityState", "capital", null, null, null)); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension testDim2 = new Dimension(dimName, dimAttrs, null, joinchains, dimProps, 0L); client.createDimension(testDim2); String dimTblName = "testDim2Tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("cityId", "string", "f-key to cityDim")); dimColumns.add(new FieldSchema("testDim3id", "string", "f-key to testdim3")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c2, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); // create table2 dimTblName = "testDim2Tbl2"; dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("bigid1", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("cityId", "string", "f-key to cityDim")); storageTables.put(c3, s1); dumpPeriods.put(c3, HOURLY); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 10L, dumpPeriods, dimProps, storageTables); // create table2 dimTblName = "testDim2Tbl3"; dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("bigid1", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("testDim3id", "string", "f-key to testdim3")); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 20L, dumpPeriods, dimProps, storageTables); } private void createTimeDims(CubeMetastoreClient client) throws Exception { String dimName = "dayDim"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("full_date", "string", "full date"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("calendar_quarter", "int", "quarter id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("day_number_of_year", "int", "day number in year"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("is_weekend", "boolean", "is weekend?"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension testDim = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(testDim); String dimTblName = "dayDimTbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("full_date", "string", "field1")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c3, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c4, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c3, s1); storageTables.put(c4, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); String dimName2 = "hourDim"; dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("full_hour", "string", "full date"))); dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName2), TestCubeMetastoreClient.getDatePartitionKey()); testDim = new Dimension(dimName2, dimAttrs, dimProps, 0L); client.createDimension(testDim); String dimTblName2 = "hourDimTbl"; dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("full_hour", "string", "field1")); client.createCubeDimensionTable(dimName2, dimTblName2, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createTestDim3(CubeMetastoreClient client) throws Exception { String dimName = "testDim3"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("testDim4id", "string", "f-key to testdim4"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Set<JoinChain> joinchains = new HashSet<JoinChain>() { { add(new JoinChain("dim4chain", "dim4-chain", "dim4 thru dim3") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("testdim3", "testDim4id")); add(new TableReference("testdim4", "id")); } }); } }); } }; Dimension testDim3 = new Dimension(dimName, dimAttrs, null, joinchains, dimProps, 0L); client.createDimension(testDim3); String dimTblName = "testDim3Tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("testDim4id", "string", "f-key to testDim4")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c2, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createTestDim4(CubeMetastoreClient client) throws Exception { String dimName = "testDim4"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension testDim4 = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(testDim4); String dimTblName = "testDim4Tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c2, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createCyclicDim1(CubeMetastoreClient client) throws Exception { String dimName = "cycleDim1"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("cyleDim2Id", "string", "link to cyclic dim 2"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Set<JoinChain> joinchains = new HashSet<JoinChain>() { { add(new JoinChain("cycledim2chain", "cycledim2chain", "cycledim2chain") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("cycledim1", "cyleDim2Id")); add(new TableReference("cycleDim2", "id")); } }); } }); } }; Dimension cycleDim1 = new Dimension(dimName, dimAttrs, null, joinchains, dimProps, 0L); client.createDimension(cycleDim1); String dimTblName = "cycleDim1Tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("cyleDim2Id", "string", "link to cyclic dim 2")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c2, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createCyclicDim2(CubeMetastoreClient client) throws Exception { String dimName = "cycleDim2"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("cyleDim1Id", "string", "link to cyclic dim 1"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Set<JoinChain> joinchains = new HashSet<JoinChain>() { { add(new JoinChain("cycledim1chain", "cycledim1chain", "cycledim1chain") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("cycledim2", "cyleDim1Id")); add(new TableReference("cycleDim1", "id")); } }); } }); } }; Dimension cycleDim2 = new Dimension(dimName, dimAttrs, null, joinchains, dimProps, 0L); client.createDimension(cycleDim2); String dimTblName = "cycleDim2Tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("cyleDim1Id", "string", "link to cyclic dim 1")); Map<String, List<TableReference>> dimensionReferences = new HashMap<String, List<TableReference>>(); dimensionReferences.put("cyleDim1Id", Arrays.asList(new TableReference("cycleDim1", "id"))); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c2, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createZiptable(CubeMetastoreClient client) throws Exception { String dimName = "zipdim"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("code", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("f1", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("f2", "string", "name"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension zipDim = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(zipDim); String dimTblName = "ziptable"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("code", "int", "code")); dimColumns.add(new FieldSchema("f1", "string", "field1")); dimColumns.add(new FieldSchema("f2", "string", "field2")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createUnReachabletable(CubeMetastoreClient client) throws Exception { String dimName = "unreachableDim"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "int", "code"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension urDim = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(urDim); String dimTblName = "unreachableDimTable"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createCountryTable(CubeMetastoreClient client) throws Exception { String dimName = "countrydim"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("captial", "string", "field2"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("region", "string", "region name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension countryDim = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(countryDim); String dimTblName = "countrytable"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("capital", "string", "field2")); dimColumns.add(new FieldSchema("region", "string", "region name")); dimColumns.add(new FieldSchema("ambigdim2", "string", "used in" + " testColumnAmbiguity")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c1, null); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); dimTblName = "countrytable_partitioned"; StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); ArrayList<FieldSchema> partCols = Lists.newArrayList(); partCols.add(dimColumns.remove(dimColumns.size() - 2)); s2.setPartCols(partCols); dumpPeriods.clear(); dumpPeriods.put(c3, HOURLY); storageTables.clear(); storageTables.put(c3, s2); dimProps.put(MetastoreUtil.getDimTablePartsKey(dimTblName), partCols.get(0).getName()); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createStateTable(CubeMetastoreClient client) throws Exception { String dimName = "statedim"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "code"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("capital", "string", "field2"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("countryid", "string", "link to country table"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Set<JoinChain> joinchains = new HashSet<JoinChain>() { { add(new JoinChain("countrychain", "countrychain", "countrychain") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("statedim", "countryid")); add(new TableReference("country", "id")); } }); } }); } }; Dimension stateDim = new Dimension(dimName, dimAttrs, null, joinchains, dimProps, 0L); client.createDimension(stateDim); String dimTblName = "statetable"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "code")); dimColumns.add(new FieldSchema("name", "string", "field1")); dimColumns.add(new FieldSchema("capital", "string", "field2")); dimColumns.add(new FieldSchema("countryid", "string", "region name")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); dumpPeriods.put(c1, HOURLY); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); // In this, country id will be a partition dimTblName = "statetable_partitioned"; StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); partCols.add(dimColumns.remove(dimColumns.size() - 1)); s2.setPartCols(partCols); s2.setTimePartCols(timePartCols); dumpPeriods.clear(); dumpPeriods.put(c3, HOURLY); storageTables.clear(); storageTables.put(c3, s2); dimProps.put(MetastoreUtil.getDimTablePartsKey(dimTblName), partCols.get(1).getName()); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createUserTable(CubeMetastoreClient client) throws Exception { String dimName = "userdim"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("age", "string", "age"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("gender", "string", "gender"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("gender", "string", "gender"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Set<JoinChain> joinChains = new HashSet<JoinChain>(); joinChains.add(new JoinChain("userSports", "user-sports", "user sports") { { addPath(new ArrayList<TableReference>() { { add(new TableReference("userdim", "id")); add(new TableReference("user_interests", "user_id", true)); add(new TableReference("user_interests", "sport_id")); add(new TableReference("sports", "id")); } }); } }); Dimension userDim = new Dimension(dimName, dimAttrs, null, joinChains, dimProps, 0L); client.createDimension(userDim); String dimTblName = "usertable"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "id")); dimColumns.add(new FieldSchema("name", "string", "name")); dimColumns.add(new FieldSchema("age", "string", "age")); dimColumns.add(new FieldSchema("gender", "string", "gender")); dimColumns.add(new FieldSchema("user_id_added_in_past", "int", "user_id_added_in_past")); dimColumns.add(new FieldSchema("user_id_added_far_future", "int", "user_id_added_far_future")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c1, null); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s2.setPartCols(partCols); s2.setTimePartCols(timePartCols); dumpPeriods.put(c2, HOURLY); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createUserInterests(CubeMetastoreClient client) throws Exception { String dimName = "user_interests"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("user_id", "int", "user id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("sport_id", "int", "sport id"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension interestDim = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(interestDim); String dimTblName = "user_interests_tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "id")); dimColumns.add(new FieldSchema("user_id", "int", "user id")); dimColumns.add(new FieldSchema("sport_id", "int", "sport id")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c1, null); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s2.setPartCols(partCols); s2.setTimePartCols(timePartCols); dumpPeriods.put(c2, HOURLY); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } private void createSports(CubeMetastoreClient client) throws Exception { String dimName = "sports"; Set<CubeDimAttribute> dimAttrs = new HashSet<CubeDimAttribute>(); dimAttrs.add(new BaseDimAttribute(new FieldSchema("id", "int", "id"))); dimAttrs.add(new BaseDimAttribute(new FieldSchema("name", "string", "name"))); Map<String, String> dimProps = new HashMap<String, String>(); dimProps.put(MetastoreUtil.getDimTimedDimensionKey(dimName), TestCubeMetastoreClient.getDatePartitionKey()); Dimension interestDim = new Dimension(dimName, dimAttrs, dimProps, 0L); client.createDimension(interestDim); String dimTblName = "sports_tbl"; List<FieldSchema> dimColumns = new ArrayList<FieldSchema>(); dimColumns.add(new FieldSchema("id", "int", "id")); dimColumns.add(new FieldSchema("name", "string", "name")); Map<String, UpdatePeriod> dumpPeriods = new HashMap<String, UpdatePeriod>(); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); dumpPeriods.put(c1, null); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s2.setPartCols(partCols); s2.setTimePartCols(timePartCols); dumpPeriods.put(c2, HOURLY); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); client.createCubeDimensionTable(dimName, dimTblName, dimColumns, 0L, dumpPeriods, dimProps, storageTables); } public void createSources(HiveConf conf, String dbName) throws Exception { try { Database database = new Database(); database.setName(dbName); Hive.get(conf).dropDatabase(dbName, true, true, true); Hive.get(conf).createDatabase(database); SessionState.get().setCurrentDatabase(dbName); CubeMetastoreClient client = CubeMetastoreClient.getInstance(conf); client.createStorage(new HDFSStorage(c0)); client.createStorage(new HDFSStorage(c1)); client.createStorage(new HDFSStorage(c2)); client.createStorage(new HDFSStorage(c3)); client.createStorage(new HDFSStorage(c4)); client.createStorage(new HDFSStorage(c5)); client.createStorage(new HDFSStorage(c99)); createCube(client); createBaseAndDerivedCubes(client); createCubeFact(client); createCubeContinuousFact(client); createCubeCheapFact(client); // commenting this as the week date format throws IllegalPatternException // createCubeFactWeekly(client); createCubeFactOnlyHourly(client); createCubeFactOnlyHourlyRaw(client); createCityTable(client); // For join resolver test createTestDim2(client); createTestDim3(client); createTestDim4(client); createTimeDims(client); // For join resolver cyclic links in dimension tables createCyclicDim1(client); createCyclicDim2(client); createCubeFactMonthly(client); createZiptable(client); createCountryTable(client); createStateTable(client); createCubeFactsWithValidColumns(client); createUnReachabletable(client); createUserTable(client); createSports(client); createUserInterests(client); } catch (Exception exc) { log.error("Exception while creating sources.", exc); throw exc; } } public void dropSources(HiveConf conf, String dbName) throws Exception { Hive metastore = Hive.get(conf); metastore.dropDatabase(dbName, true, true, true); } private void createCubeFactsWithValidColumns(CubeMetastoreClient client) throws Exception { String factName = "summary1"; StringBuilder commonCols = new StringBuilder(); List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size()); for (CubeMeasure measure : cubeMeasures) { factColumns.add(measure.getColumn()); commonCols.append(measure.getName()); commonCols.append(","); } // add dimensions of the cube factColumns.add(new FieldSchema("dim1", "string", "dim1")); factColumns.add(new FieldSchema("dim2", "string", "dim2")); factColumns.add(new FieldSchema("testdim3id", "string", "dim2")); factColumns.add(new FieldSchema("dim2big", "string", "dim2")); factColumns.add(new FieldSchema("zipcode", "int", "zip")); factColumns.add(new FieldSchema("cityid", "int", "city id")); Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>(); updates.add(MINUTELY); updates.add(HOURLY); updates.add(DAILY); ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>(); List<String> timePartCols = new ArrayList<String>(); partCols.add(TestCubeMetastoreClient.getDatePartition()); timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey()); StorageTableDesc s1 = new StorageTableDesc(); s1.setInputFormat(TextInputFormat.class.getCanonicalName()); s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s1.setPartCols(partCols); s1.setTimePartCols(timePartCols); ArrayList<FieldSchema> partCols2 = new ArrayList<FieldSchema>(); List<String> timePartCols2 = new ArrayList<String>(); partCols2.add(new FieldSchema("pt", "string", "p time")); partCols2.add(new FieldSchema("it", "string", "i time")); partCols2.add(new FieldSchema("et", "string", "e time")); timePartCols2.add("pt"); timePartCols2.add("it"); timePartCols2.add("et"); StorageTableDesc s2 = new StorageTableDesc(); s2.setInputFormat(TextInputFormat.class.getCanonicalName()); s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); s2.setPartCols(partCols2); s2.setTimePartCols(timePartCols2); Map<String, Set<UpdatePeriod>> storageUpdatePeriods = new HashMap<String, Set<UpdatePeriod>>(); storageUpdatePeriods.put(c1, updates); storageUpdatePeriods.put(c2, updates); Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c1, s1); storageTables.put(c2, s2); // create cube fact summary1 Map<String, String> properties = new HashMap<String, String>(); properties.putAll(factValidityProperties); String validColumns = commonCols.toString() + ",dim1,testdim3id"; properties.put(MetastoreUtil.getValidColumnsKey(factName), validColumns); CubeFactTable fact1 = new CubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageUpdatePeriods, 10L, properties); client.createCubeTable(fact1, storageTables); createPIEParts(client, fact1, c2); // create summary2 - same schema, different valid columns factName = "summary2"; validColumns = commonCols.toString() + ",dim1,dim2"; properties.put(MetastoreUtil.getValidColumnsKey(factName), validColumns); CubeFactTable fact2 = new CubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageUpdatePeriods, 20L, properties); client.createCubeTable(fact2, storageTables); createPIEParts(client, fact2, c2); factName = "summary3"; validColumns = commonCols.toString() + ",dim1,dim2,cityid,stateid"; properties.put(MetastoreUtil.getValidColumnsKey(factName), validColumns); CubeFactTable fact3 = new CubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageUpdatePeriods, 30L, properties); client.createCubeTable(fact3, storageTables); createPIEParts(client, fact3, c2); // create summary4 only on c2 storageUpdatePeriods = new HashMap<String, Set<UpdatePeriod>>(); storageUpdatePeriods.put(c2, updates); storageTables = new HashMap<String, StorageTableDesc>(); storageTables.put(c2, s2); factName = "summary4"; validColumns = commonCols.toString() + ",dim1,dim2big1,dim2big2,cityid"; properties.put(MetastoreUtil.getValidColumnsKey(factName), validColumns); CubeFactTable fact4 = new CubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageUpdatePeriods, 15L, properties); client.createCubeTable(fact4, storageTables); createPIEParts(client, fact4, c2); } private void createPIEParts(CubeMetastoreClient client, CubeFactTable fact, String storageName) throws Exception { // Add partitions in PIE storage Calendar pcal = Calendar.getInstance(); pcal.setTime(TWODAYS_BACK); pcal.set(HOUR_OF_DAY, 0); Calendar ical = Calendar.getInstance(); ical.setTime(TWODAYS_BACK); ical.set(HOUR_OF_DAY, 0); Map<UpdatePeriod, TreeSet<Date>> pTimes = Maps.newHashMap(); pTimes.put(DAILY, Sets.<Date>newTreeSet()); pTimes.put(HOURLY, Sets.<Date>newTreeSet()); Map<UpdatePeriod, TreeSet<Date>> iTimes = Maps.newHashMap(); iTimes.put(DAILY, Sets.<Date>newTreeSet()); iTimes.put(HOURLY, Sets.<Date>newTreeSet()); Map<String, Map<UpdatePeriod, TreeSet<Date>>> times = Maps.newHashMap(); times.put("et", iTimes); times.put("it", iTimes); times.put("pt", pTimes); // pt=day1 and it=day1 // pt=day2-hour[0-3] it = day1-hour[20-23] // pt=day2 and it=day1 // pt=day2-hour[4-23] it = day2-hour[0-19] // pt=day2 and it=day2 // pt=day3-hour[0-3] it = day2-hour[20-23] // pt=day3-hour[4-23] it = day3-hour[0-19] for (int p = 1; p <= 3; p++) { Date ptime = pcal.getTime(); Date itime = ical.getTime(); Map<String, Date> timeParts = new HashMap<String, Date>(); if (p == 1) { // day1 timeParts.put("pt", ptime); timeParts.put("it", itime); timeParts.put("et", itime); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, DAILY); pTimes.get(DAILY).add(ptime); iTimes.get(DAILY).add(itime); client.addPartition(sPartSpec, storageName, CubeTableType.FACT); pcal.add(DAY_OF_MONTH, 1); ical.add(HOUR_OF_DAY, 20); } else if (p == 2) { // day2 // pt=day2-hour[0-3] it = day1-hour[20-23] // pt=day2 and it=day1 // pt=day2-hour[4-23] it = day2-hour[0-19] // pt=day2 and it=day2 ptime = pcal.getTime(); itime = ical.getTime(); timeParts.put("pt", ptime); timeParts.put("it", itime); timeParts.put("et", itime); // pt=day2 and it=day1 StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, DAILY); pTimes.get(DAILY).add(ptime); iTimes.get(DAILY).add(itime); client.addPartition(sPartSpec, storageName, CubeTableType.FACT); // pt=day2-hour[0-3] it = day1-hour[20-23] // pt=day2-hour[4-23] it = day2-hour[0-19] for (int i = 0; i < 24; i++) { ptime = pcal.getTime(); itime = ical.getTime(); timeParts.put("pt", ptime); timeParts.put("it", itime); timeParts.put("et", itime); sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); pTimes.get(HOURLY).add(ptime); iTimes.get(HOURLY).add(itime); client.addPartition(sPartSpec, storageName, CubeTableType.FACT); pcal.add(HOUR_OF_DAY, 1); ical.add(HOUR_OF_DAY, 1); } // pt=day2 and it=day2 sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, DAILY); pTimes.get(DAILY).add(ptime); iTimes.get(DAILY).add(itime); client.addPartition(sPartSpec, storageName, CubeTableType.FACT); } else if (p == 3) { // day3 // pt=day3-hour[0-3] it = day2-hour[20-23] // pt=day3-hour[4-23] it = day3-hour[0-19] for (int i = 0; i < 24; i++) { ptime = pcal.getTime(); itime = ical.getTime(); timeParts.put("pt", ptime); timeParts.put("it", itime); timeParts.put("et", itime); StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); pTimes.get(HOURLY).add(ptime); iTimes.get(HOURLY).add(itime); client.addPartition(sPartSpec, storageName, CubeTableType.FACT); pcal.add(HOUR_OF_DAY, 1); ical.add(HOUR_OF_DAY, 1); } } } String storageTableName = MetastoreUtil.getStorageTableName(fact.getName(), Storage.getPrefix( storageName)); Map<String, String> params = client.getTable(storageTableName).getParameters(); String prefix = MetastoreConstants.STORAGE_PFX + MetastoreConstants.PARTITION_TIMELINE_CACHE; assertEquals(params.get(prefix + "present"), "true"); for (String p : Arrays.asList("et", "it", "pt")) { assertTimeline(client, fact.getName(), storageName, MINUTELY, p, EndsAndHolesPartitionTimeline.class); for (UpdatePeriod up : Arrays.asList(DAILY, HOURLY)) { EndsAndHolesPartitionTimeline timeline = new EndsAndHolesPartitionTimeline(storageTableName, up, p); timeline.setFirst(TimePartition.of(up, times.get(p).get(up).first())); timeline.setLatest(TimePartition.of(up, times.get(p).get(up).last())); assertTimeline(client, fact.getName(), storageName, up, p, timeline); } } } public static void printQueryAST(String query, String label) throws LensException { System.out.println("--" + label + "--AST--"); System.out.println("--query- " + query); HQLParser.printAST(HQLParser.parseHQL(query, new HiveConf())); } }