/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.atlas.discovery; import com.google.common.collect.ImmutableSet; import org.apache.atlas.AtlasException; import org.apache.atlas.BaseRepositoryTest; import org.apache.atlas.RequestContext; import org.apache.atlas.TestOnlyModule; import org.apache.atlas.TestUtils; import org.apache.atlas.discovery.graph.GraphBackedDiscoveryService; import org.apache.atlas.query.QueryParams; import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.MetadataRepository; import org.apache.atlas.repository.graph.AtlasGraphProvider; import org.apache.atlas.repository.graph.GraphBackedSearchIndexer; import org.apache.atlas.repository.graphdb.GremlinVersion; import org.apache.atlas.type.AtlasTypeRegistry; import org.apache.atlas.typesystem.ITypedReferenceableInstance; import org.apache.atlas.typesystem.Referenceable; import org.apache.atlas.typesystem.persistence.Id; import org.apache.atlas.typesystem.types.ClassType; import org.apache.atlas.typesystem.types.DataTypes; import org.apache.atlas.typesystem.types.HierarchicalTypeDefinition; import org.apache.atlas.typesystem.types.IDataType; import org.apache.atlas.typesystem.types.Multiplicity; import org.apache.atlas.typesystem.types.TypeSystem; import org.codehaus.jettison.json.JSONArray; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Guice; import org.testng.annotations.Test; import javax.inject.Inject; import java.text.SimpleDateFormat; import java.util.*; import static org.apache.atlas.typesystem.types.utils.TypesUtil.createClassTypeDef; import static org.apache.atlas.typesystem.types.utils.TypesUtil.createOptionalAttrDef; import static org.apache.atlas.typesystem.types.utils.TypesUtil.createRequiredAttrDef; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; @Guice(modules = TestOnlyModule.class) public class GraphBackedDiscoveryServiceTest extends BaseRepositoryTest { @Inject private MetadataRepository repositoryService; @Inject private GraphBackedDiscoveryService discoveryService; private QueryParams queryParams = new QueryParams(40, 0); private static final String idType = "idType"; @Override @BeforeClass public void setUp() throws Exception { super.setUp(); repositoryService = TestUtils.addTransactionWrapper(repositoryService); final TypeSystem typeSystem = TypeSystem.getInstance(); Collection<String> oldTypeNames = new HashSet<>(); oldTypeNames.addAll(typeSystem.getTypeNames()); TestUtils.defineDeptEmployeeTypes(typeSystem); addIndexesForNewTypes(oldTypeNames, typeSystem); ITypedReferenceableInstance hrDept = TestUtils.createDeptEg1(typeSystem); repositoryService.createEntities(hrDept); ITypedReferenceableInstance jane = repositoryService.getEntityDefinition("Manager", "name", "Jane"); Id janeGuid = jane.getId(); ClassType personType = typeSystem.getDataType(ClassType.class, "Person"); ITypedReferenceableInstance instance = personType.createInstance(janeGuid); instance.set("orgLevel", "L1"); repositoryService.updatePartial(instance); } private void addIndexesForNewTypes(Collection<String> oldTypeNames, final TypeSystem typeSystem) throws AtlasException { Set<String> newTypeNames = new HashSet<>(); newTypeNames.addAll(typeSystem.getTypeNames()); newTypeNames.removeAll(oldTypeNames); Collection<IDataType> newTypes = new ArrayList<>(); for(String name : newTypeNames) { try { newTypes.add(typeSystem.getDataType(IDataType.class, name)); } catch (AtlasException e) { e.printStackTrace(); } } //We need to commit the transaction before creating the indices to release the locks held by the transaction. //otherwise, the index commit will fail while waiting for the those locks to be released. AtlasGraphProvider.getGraphInstance().commit(); GraphBackedSearchIndexer idx = new GraphBackedSearchIndexer(new AtlasTypeRegistry()); idx.onAdd(newTypes); } @BeforeMethod public void setupContext() { RequestContext.createContext(); } @AfterClass public void tearDown() throws Exception { super.tearDown(); } private String searchByDSL(String dslQuery) throws Exception { return discoveryService.searchByDSL(dslQuery, queryParams); } @Test public void testSearchBySystemProperties() throws Exception { //system property in select String dslQuery = "from Department select __guid"; String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); assertEquals(results.length(), 3); JSONArray rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); assertNotNull(rows.getJSONObject(0).getString("__guid")); //system property in where clause String guid = rows.getJSONObject(0).getString("__guid"); dslQuery = "Department where __guid = '" + guid + "' and __state = 'ACTIVE'"; jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); results = new JSONObject(jsonResults); assertEquals(results.length(), 3); rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); //Assert system attributes are not null JSONObject sys_attributes = (JSONObject)rows.getJSONObject(0).get("$systemAttributes$"); assertNotNull(sys_attributes.get("createdBy")); assertNotNull(sys_attributes.get("modifiedBy")); assertNotNull(sys_attributes.get("createdTime")); assertNotNull(sys_attributes.get("modifiedTime")); //Assert that createdTime and modifiedTime are valid dates String createdTime = (String) sys_attributes.get("createdTime"); String modifiedTime = (String) sys_attributes.get("modifiedTime"); final String outputFormat = "EEE MMM dd HH:mm:ss z yyyy"; SimpleDateFormat df = new SimpleDateFormat(outputFormat); Date createdDate = df.parse(createdTime); Date modifiedDate = df.parse(modifiedTime); assertNotNull(createdDate); assertNotNull(modifiedDate); final String testTs = "\"2011-11-01T02:35:58.440Z\""; dslQuery = "Department where " + Constants.TIMESTAMP_PROPERTY_KEY + " > " + testTs; jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); results = new JSONObject(jsonResults); assertEquals(results.length(), 3); rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); dslQuery = "Department where " + Constants.MODIFICATION_TIMESTAMP_PROPERTY_KEY + " > " + testTs; jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); results = new JSONObject(jsonResults); assertEquals(results.length(), 3); rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); dslQuery = "from Department select " + Constants.CREATED_BY_KEY; jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); results = new JSONObject(jsonResults); assertEquals(results.length(), 3); rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); dslQuery = "from Department select " + Constants.MODIFIED_BY_KEY; jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); results = new JSONObject(jsonResults); assertEquals(results.length(), 3); rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); } @Test public void testSearchByDSLReturnsEntity() throws Exception { String dslQuery = "from Department"; String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); assertEquals(results.length(), 3); System.out.println("results = " + results); Object query = results.get("query"); assertNotNull(query); JSONObject dataType = results.getJSONObject("dataType"); assertNotNull(dataType); String typeName = dataType.getString("typeName"); assertNotNull(typeName); assertEquals(typeName, "Department"); JSONArray rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), 1); //Assert that entity state is set in the result entities String entityState = rows.getJSONObject(0).getJSONObject("$id$").getString("state"); assertEquals(entityState, Id.EntityState.ACTIVE.name()); } @DataProvider(name = "dslLikeQueriesProvider") private Object[][] createDslLikeQueries() { return new Object[][]{ {"hive_table where name like \"sa?es*\"", 3}, {"hive_db where name like \"R*\"", 1}, {"hive_db where hive_db.name like \"R???rt?*\" or hive_db.name like \"S?l?s\" or hive_db.name like\"Log*\"", 3}, {"hive_db where hive_db.name like \"R???rt?*\" and hive_db.name like \"S?l?s\" and hive_db.name like\"Log*\"", 0}, {"hive_table where name like 'sales*', db where name like 'Sa?es'", 1}, }; } @Test(dataProvider = "dslLikeQueriesProvider") public void testDslSearchUsingLikeOperator(String dslQuery, Integer expectedNumRows) throws Exception { runQuery(dslQuery, expectedNumRows, 50, 0); } @Test(expectedExceptions = Throwable.class) public void testSearchByDSLBadQuery() throws Exception { String dslQuery = "from blah"; searchByDSL(dslQuery); Assert.fail(); } @Test public void testRawSearch1() throws Exception { TestUtils.skipForGremlin3EnabledGraphDb(); // Query for all Vertices in Graph Object r = discoveryService.searchByGremlin("g.V.toList()"); Assert.assertTrue(r instanceof List); List<Map<String, Object>> resultList = (List<Map<String, Object>>) r; Assert.assertTrue(resultList.size() > 0); System.out.println("search result = " + r); // Query for all Vertices of a Type r = discoveryService.searchByGremlin("g.V.filter{it." + Constants.ENTITY_TYPE_PROPERTY_KEY + " == 'Department'}.toList()"); Assert.assertTrue(r instanceof List); resultList = (List<Map<String, Object>>) r; Assert.assertTrue(resultList.size() > 0); System.out.println("search result = " + r); // Property Query: list all Person names r = discoveryService.searchByGremlin("g.V.filter{it." + Constants.ENTITY_TYPE_PROPERTY_KEY + " == 'Person'}.'Person.name'.toList()"); Assert.assertTrue(r instanceof List); resultList = (List<Map<String, Object>>) r; Assert.assertTrue(resultList.size() > 0); System.out.println("search result = " + r); List<Object> names = new ArrayList<>(resultList.size()); for (Map<String, Object> vertexProps : resultList) { names.addAll(vertexProps.values()); } for (String name : Arrays.asList("John", "Max")) { Assert.assertTrue(names.contains(name)); } // Query for all Vertices modified after 01/01/2015 00:00:00 GMT r = discoveryService.searchByGremlin("g.V.filter{it." + Constants.MODIFICATION_TIMESTAMP_PROPERTY_KEY + " > 1420070400000}.toList()"); Assert.assertTrue(r instanceof List); resultList = (List<Map<String, Object>>) r; Assert.assertTrue(resultList.size() > 0); for (Map<String, Object> vertexProps : resultList) { Object object = vertexProps.get(Constants.MODIFICATION_TIMESTAMP_PROPERTY_KEY); assertNotNull(object); Long timestampAsLong = Long.valueOf((String)object); Assert.assertTrue(timestampAsLong > 1420070400000L); object = vertexProps.get(Constants.TIMESTAMP_PROPERTY_KEY); assertNotNull(object); } } @DataProvider(name = "comparisonQueriesProvider") private Object[][] createComparisonQueries() { //create queries the exercise the comparison logic for //all of the different supported data types return new Object[][] { {"Person where (birthday < \"1950-01-01T02:35:58.440Z\" )", 0}, {"Person where (birthday > \"1975-01-01T02:35:58.440Z\" )", 2}, {"Person where (birthday >= \"1975-01-01T02:35:58.440Z\" )", 2}, {"Person where (birthday <= \"1950-01-01T02:35:58.440Z\" )", 0}, {"Person where (birthday = \"1975-01-01T02:35:58.440Z\" )", 0}, {"Person where (birthday != \"1975-01-01T02:35:58.440Z\" )", 4}, {"Person where (hasPets = true)", 2}, {"Person where (hasPets = false)", 2}, {"Person where (hasPets != false)", 2}, {"Person where (hasPets != true)", 2}, {"Person where (numberOfCars > 0)", 2}, {"Person where (numberOfCars > 1)", 1}, {"Person where (numberOfCars >= 1)", 2}, {"Person where (numberOfCars < 2)", 3}, {"Person where (numberOfCars <= 2)", 4}, {"Person where (numberOfCars = 2)", 1}, {"Person where (numberOfCars != 2)", 3}, {"Person where (houseNumber > 0)", 2}, {"Person where (houseNumber > 17)", 1}, {"Person where (houseNumber >= 17)", 2}, {"Person where (houseNumber < 153)", 3}, {"Person where (houseNumber <= 153)", 4}, {"Person where (houseNumber = 17)", 1}, {"Person where (houseNumber != 17)", 3}, {"Person where (carMileage > 0)", 2}, {"Person where (carMileage > 13)", 1}, {"Person where (carMileage >= 13)", 2}, {"Person where (carMileage < 13364)", 3}, {"Person where (carMileage <= 13364)", 4}, {"Person where (carMileage = 13)", 1}, {"Person where (carMileage != 13)", 3}, {"Person where (shares > 0)", 2}, {"Person where (shares > 13)", 2}, {"Person where (shares >= 16000)", 1}, {"Person where (shares < 13364)", 2}, {"Person where (shares <= 15000)", 3}, {"Person where (shares = 15000)", 1}, {"Person where (shares != 1)", 4}, {"Person where (salary > 0)", 2}, {"Person where (salary > 100000)", 2}, {"Person where (salary >= 200000)", 1}, {"Person where (salary < 13364)", 2}, {"Person where (salary <= 150000)", 3}, {"Person where (salary = 12334)", 0}, {"Person where (salary != 12344)", 4}, {"Person where (age > 36)", 1}, {"Person where (age > 49)", 1}, {"Person where (age >= 49)", 1}, {"Person where (age < 50)", 3}, {"Person where (age <= 35)", 2}, {"Person where (age = 35)", 0}, {"Person where (age != 35)", 4} }; } @DataProvider(name = "dslQueriesProvider") private Object[][] createDSLQueries() { return new Object[][]{ {"hive_db as inst where inst.name=\"Reporting\" select inst as id, inst.name", 1}, {"from hive_db as h select h as id", 3}, {"from hive_db", 3}, {"hive_db", 3}, {"hive_db where hive_db.name=\"Reporting\"", 1}, {"hive_db hive_db.name = \"Reporting\"", 1}, {"hive_db where hive_db.name=\"Reporting\" select name, owner", 1}, {"hive_db has name", 3}, {"hive_db, hive_table", 10}, {"View is JdbcAccess", 2}, {"hive_db as db1, hive_table where db1.name = \"Reporting\"", isGremlin3() ? 4 : 0}, //Not working in with Titan 0 - ATLAS-145 // - Final working query -> discoveryService.searchByGremlin("L:{_var_0 = [] as Set;g.V().has(\"__typeName\", \"hive_db\").fill(_var_0);g.V().has(\"__superTypeNames\", \"hive_db\").fill(_var_0);_var_0._().as(\"db1\").in(\"__hive_table.db\").back(\"db1\").and(_().has(\"hive_db.name\", T.eq, \"Reporting\")).toList()}") /* {"hive_db, hive_process has name"}, //Invalid query {"hive_db where hive_db.name=\"Reporting\" and hive_db.createTime < " + System.currentTimeMillis()} */ {"from hive_table", 10}, {"hive_table", 10}, {"hive_table isa Dimension", 3}, {"hive_column where hive_column isa PII", 8}, {"View is Dimension" , 2}, // {"hive_column where hive_column isa PII select hive_column.name", 6}, //Not working - ATLAS-175 {"hive_column select hive_column.name", 37}, {"hive_column select name",37}, {"hive_column where hive_column.name=\"customer_id\"", 6}, {"from hive_table select hive_table.name", 10}, {"hive_db where (name = \"Reporting\")", 1}, {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1", 1}, {"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work {"hive_db hive_table", 10}, {"hive_db where hive_db has name", 3}, {"hive_db as db1 hive_table where (db1.name = \"Reporting\")", isGremlin3() ? 4 : 0}, //Not working in Titan 0 -> ATLAS-145 {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ", 1}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ", 1}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 ", 1}, /* todo: does not work - ATLAS-146 {"hive_db where (name = \"Reporting\") and ((createTime + 1) > 0)"}, {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") select db1.name as dbName, tab.name as tabName"}, {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) or (db1.name = \"Reporting\") select db1.name as dbName, tab.name as tabName"}, {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName"}, {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name = \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName"}, */ // trait searches {"Dimension", 5}, {"JdbcAccess", 2}, {"ETL", 5}, {"Metric", 9}, {"PII", 8}, {"`Log Data`", 4}, // Not sure what the expected rows should be, but since we didn't assign or do anything with the created // I assume it'll be zero {"`isa`", 0}, /* Lineage queries are fired through ClosureQuery and are tested through HiveLineageJerseyResourceIt in webapp module. Commenting out the below queries since DSL to Gremlin parsing/translation fails with lineage queries when there are array types used within loop expressions which is the case with DataSet.inputs and outputs.` // Lineage {"Table LoadProcess outputTable"}, {"Table loop (LoadProcess outputTable)"}, {"Table as _loop0 loop (LoadProcess outputTable) withPath"}, {"Table as src loop (LoadProcess outputTable) as dest select src.name as srcTable, dest.name as " + "destTable withPath"}, */ // {"hive_table as t, sd, hive_column as c where t.name=\"sales_fact\" select c.name as colName, c.dataType as " // + "colType", 0}, //Not working - ATLAS-145 and ATLAS-166 {"hive_table where name='sales_fact', db where name='Sales'", 1}, {"hive_table where name='sales_fact', db where name='Reporting'", 0}, {"hive_partition as p where values = ['2015-01-01']", 1}, // {"StorageDesc select cols", 6} //Not working since loading of lists needs to be fixed yet //check supertypeNames {"DataSet where name='sales_fact'", 1}, {"Asset where name='sales_fact'", 1} }; } @DataProvider(name = "dslExplicitLimitQueriesProvider") private Object[][] createDSLQueriesWithExplicitLimit() { return new Object[][]{ {"hive_column", 37, 40, 0},//with higher limit all rows returned {"hive_column limit 10", 10, 50, 0},//lower limit in query {"hive_column select hive_column.name limit 10", 5, 5, 0},//lower limit in query param {"hive_column select hive_column.name withPath", 20, 20, 0},//limit only in params //with offset, only remaining rows returned {"hive_column select hive_column.name limit 40 withPath", 17, 40, 20}, //with higher offset, no rows returned {"hive_column select hive_column.name limit 40 withPath", 0, 40, 40}, //offset used from query {"hive_column select hive_column.name limit 40 offset 10", 27, 40, 0}, //offsets in query and parameter added up {"hive_column select hive_column.name limit 40 offset 10", 17, 40, 10}, //works with where clause {"hive_db where name = 'Reporting' limit 10 offset 0", 1, 40, 0}, //works with joins {"hive_db, hive_table where db.name = 'Reporting' limit 10", 1, 1, 0}, {"hive_column limit 25", 5, 10, 20}, //last page should return records limited by limit in query {"hive_column limit 25", 0, 10, 30}, //offset > limit returns 0 rows }; } @DataProvider(name = "dslLimitQueriesProvider") private Object[][] createDSLQueriesWithLimit() { return new Object[][]{ {"hive_column limit 10 ", 10}, {"hive_column select hive_column.name limit 10 ", 10}, {"hive_column select hive_column.name withPath", 37}, {"hive_column select hive_column.name limit 10 withPath", 10}, {"from hive_db", 3}, {"from hive_db limit 2", 2}, {"from hive_db limit 2 offset 0", 2}, {"from hive_db limit 2 offset 1", 2}, {"from hive_db limit 3 offset 1", 2}, {"hive_db", 3}, {"hive_db where hive_db.name=\"Reporting\"", 1}, {"hive_db where hive_db.name=\"Reporting\" or hive_db.name=\"Sales\" or hive_db.name=\"Logging\" limit 1 offset 1", 1}, {"hive_db where hive_db.name=\"Reporting\" or hive_db.name=\"Sales\" or hive_db.name=\"Logging\" limit 1 offset 2", 1}, {"hive_db where hive_db.name=\"Reporting\" or hive_db.name=\"Sales\" or hive_db.name=\"Logging\" limit 2 offset 1", 2}, {"hive_db where hive_db.name=\"Reporting\" limit 10 ", 1}, {"hive_db hive_db.name = \"Reporting\"", 1}, {"hive_db where hive_db.name=\"Reporting\" select name, owner", 1}, {"hive_db has name", 3}, {"hive_db has name limit 2 offset 0", 2}, {"hive_db has name limit 2 offset 1", 2}, {"hive_db has name limit 10 offset 1", 2}, {"hive_db has name limit 10 offset 0", 3}, {"hive_db, hive_table", 10}, {"hive_db, hive_table limit 5", 5}, {"hive_db, hive_table limit 5 offset 0", 5}, {"hive_db, hive_table limit 5 offset 5", 5}, {"View is JdbcAccess", 2}, {"View is JdbcAccess limit 1", 1}, {"View is JdbcAccess limit 2 offset 1", 1}, {"hive_db as db1, hive_table where db1.name = \"Reporting\"", isGremlin3() ? 4 : 0}, //Not working in Titan 0 - ATLAS-145 {"from hive_table", 10}, {"from hive_table limit 5", 5}, {"from hive_table limit 5 offset 5", 5}, {"hive_table", 10}, {"hive_table limit 5", 5}, {"hive_table limit 5 offset 5", 5}, {"hive_table isa Dimension", 3}, {"hive_table isa Dimension limit 2", 2}, {"hive_table isa Dimension limit 2 offset 0", 2}, {"hive_table isa Dimension limit 2 offset 1", 2}, {"hive_table isa Dimension limit 3 offset 1", 2}, {"hive_column where hive_column isa PII", 8}, {"hive_column where hive_column isa PII limit 5", 5}, {"hive_column where hive_column isa PII limit 5 offset 1", 5}, {"hive_column where hive_column isa PII limit 5 offset 5", 3}, {"View is Dimension" , 2}, {"View is Dimension limit 1" , 1}, {"View is Dimension limit 1 offset 1" , 1}, {"View is Dimension limit 10 offset 1" , 1}, {"hive_column select hive_column.name", 37}, {"hive_column select hive_column.name limit 5", 5}, {"hive_column select hive_column.name limit 5 offset 36", 1}, {"hive_column select name", 37}, {"hive_column select name limit 5", 5}, {"hive_column select name limit 5 offset 36 ", 1}, {"hive_column where hive_column.name=\"customer_id\"", 6}, {"hive_column where hive_column.name=\"customer_id\" limit 2", 2}, {"hive_column where hive_column.name=\"customer_id\" limit 2 offset 1", 2}, {"hive_column where hive_column.name=\"customer_id\" limit 10 offset 3", 3}, {"from hive_table select hive_table.name", 10}, {"from hive_table select hive_table.name limit 5", 5}, {"from hive_table select hive_table.name limit 5 offset 5", 5}, {"hive_db where (name = \"Reporting\")", 1}, {"hive_db where (name = \"Reporting\") limit 10", 1}, {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1", 1}, {"hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1 limit 10", 1}, {"hive_db where hive_db is JdbcAccess", 0}, //Not supposed to work {"hive_db hive_table", 10}, {"hive_db hive_table limit 5", 5}, {"hive_db hive_table limit 5 offset 5", 5}, {"hive_db where hive_db has name", 3}, {"hive_db where hive_db has name limit 5", 3}, {"hive_db where hive_db has name limit 2 offset 0", 2}, {"hive_db where hive_db has name limit 2 offset 1", 2}, {"hive_db as db1 hive_table where (db1.name = \"Reporting\")", isGremlin3() ? 4 : 0}, //Not working in Titan 0 -> ATLAS-145 {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 ", 1}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10", 1}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10 offset 1", 0}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 limit 10 offset 0", 1}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 ", 1}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 ", 1}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 limit 10 offset 5", 0}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 ", 1}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10 offset 0", 1}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10 offset 1", 0}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 10", 1}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 limit 0 offset 1", 0}, // trait searches {"Dimension", 5}, {"Dimension limit 2", 2}, {"Dimension limit 2 offset 1", 2}, {"Dimension limit 5 offset 4", 1}, {"JdbcAccess", 2}, {"JdbcAccess limit 5 offset 0", 2}, {"JdbcAccess limit 2 offset 1", 1}, {"JdbcAccess limit 1", 1}, {"ETL", 5}, {"ETL limit 2", 2}, {"ETL limit 1", 1}, {"ETL limit 1 offset 0", 1}, {"ETL limit 2 offset 1", 2}, {"Metric", 9}, {"Metric limit 10", 9}, {"Metric limit 2", 2}, {"Metric limit 10 offset 1", 8}, {"PII", 8}, {"PII limit 10", 8}, {"PII limit 2", 2}, {"PII limit 10 offset 1", 7}, {"`Log Data`", 4}, {"`Log Data` limit 3", 3}, {"`Log Data` limit 10 offset 2", 2}, {"hive_table where name='sales_fact', db where name='Sales'", 1}, {"hive_table where name='sales_fact', db where name='Sales' limit 10", 1}, {"hive_table where name='sales_fact', db where name='Sales' limit 10 offset 1", 0}, {"hive_table where name='sales_fact', db where name='Reporting'", 0}, {"hive_table where name='sales_fact', db where name='Reporting' limit 10", 0}, {"hive_table where name='sales_fact', db where name='Reporting' limit 10 offset 1", 0}, {"hive_partition as p where values = ['2015-01-01']", 1}, {"hive_partition as p where values = ['2015-01-01'] limit 10", 1}, {"hive_partition as p where values = ['2015-01-01'] limit 10 offset 1", 0}, }; } @DataProvider(name = "dslOrderByQueriesProvider") private Object[][] createDSLQueriesWithOrderBy() { Boolean isAscending = Boolean.TRUE; return new Object[][]{ //test with alias // {"from hive_db select hive_db.name as 'o' orderby o limit 3", 3, "name", isAscending}, {"from hive_db as h orderby h.owner limit 3", 3, "owner", isAscending}, {"hive_column as c select c.name orderby hive_column.name ", 37, "c.name", isAscending}, {"hive_column as c select c.name orderby hive_column.name limit 5", 5, "c.name", isAscending}, {"hive_column as c select c.name orderby hive_column.name desc limit 5", 5, "c.name", !isAscending}, {"from hive_db orderby hive_db.owner limit 3", 3, "owner", isAscending}, {"hive_column select hive_column.name orderby hive_column.name ", 37, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name limit 5", 5, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name desc limit 5", 5, "hive_column.name", !isAscending}, {"from hive_db orderby owner limit 3", 3, "owner", isAscending}, {"hive_column select hive_column.name orderby name ", 37, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby name limit 5", 5, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby name desc limit 5", 5, "hive_column.name", !isAscending}, //Not working, the problem is in server code not figuring out how to sort. not sure if it is valid use case. // {"hive_db hive_table orderby 'hive_db.owner'", 10, "owner", isAscending}, // {"hive_db hive_table orderby 'hive_db.owner' limit 5", 5, "owner", isAscending}, // {"hive_db hive_table orderby 'hive_db.owner' limit 5 offset 5", 3, "owner", isAscending}, {"hive_db select hive_db.description orderby hive_db.description limit 10 withPath", 3, "hive_db.description", isAscending}, {"hive_db select hive_db.description orderby hive_db.description desc limit 10 withPath", 3, "hive_db.description", !isAscending}, {"hive_column select hive_column.name orderby hive_column.name limit 10 withPath", 10, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name asc limit 10 withPath", 10, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name desc limit 10 withPath", 10, "hive_column.name", !isAscending}, {"from hive_db orderby hive_db.owner limit 3", 3, "owner", isAscending}, {"hive_db where hive_db.name=\"Reporting\" orderby 'owner'", 1, "owner", isAscending}, {"hive_db where hive_db.name=\"Reporting\" orderby hive_db.owner limit 10 ", 1, "owner", isAscending}, {"hive_db where hive_db.name=\"Reporting\" select name, owner orderby hive_db.name ", 1, "name", isAscending}, {"hive_db has name orderby hive_db.owner limit 10 offset 0", 3, "owner", isAscending}, {"from hive_table select hive_table.owner orderby hive_table.owner", 10, "hive_table.owner", isAscending}, {"from hive_table select hive_table.owner orderby hive_table.owner limit 8", 8, "hive_table.owner", isAscending}, {"hive_table orderby hive_table.name", 10, "name", isAscending}, {"hive_table orderby hive_table.owner", 10, "owner", isAscending}, {"hive_table orderby hive_table.owner limit 8", 8, "owner", isAscending}, {"hive_table orderby hive_table.owner limit 8 offset 0", 8, "owner", isAscending}, {"hive_table orderby hive_table.owner desc limit 8 offset 0", 8, "owner", !isAscending}, //Not working because of existing bug Atlas-175 // {"hive_table isa Dimension orderby hive_table.owner", 3, "hive_table.owner", isAscending},//order not working // {"hive_table isa Dimension orderby hive_table.owner limit 3", 3, "hive_table.owner", isAscending}, // {"hive_table isa Dimension orderby hive_table.owner limit 3 offset 0", 3, "hive_table.owner", isAscending}, // {"hive_table isa Dimension orderby hive_table.owner desc limit 3 offset 0", 3, "hive_table.owner", !isAscending}, // // {"hive_column where hive_column isa PII orderby hive_column.name", 6, "hive_column.name", isAscending}, // {"hive_column where hive_column isa PII orderby hive_column.name limit 5", 5, "hive_column.name", isAscending}, // {"hive_column where hive_column isa PII orderby hive_column.name limit 5 offset 1", 5, "hive_column.name", isAscending}, // {"hive_column where hive_column isa PII orderby hive_column.name desc limit 5 offset 1", 5, "hive_column.name", !isAscending}, {"hive_column select hive_column.name orderby hive_column.name ", 37, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name limit 5", 5, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name desc limit 5", 5, "hive_column.name", !isAscending}, {"hive_column select hive_column.name orderby hive_column.name limit 5 offset 28", 5, "hive_column.name", isAscending}, {"hive_column select name orderby hive_column.name", 37, "name", isAscending}, {"hive_column select name orderby hive_column.name limit 5", 5, "name", isAscending}, {"hive_column select name orderby hive_column.name desc", 37, "name", !isAscending}, {"hive_column where hive_column.name=\"customer_id\" orderby hive_column.name", 6, "name", isAscending}, {"hive_column where hive_column.name=\"customer_id\" orderby hive_column.name limit 2", 2, "name", isAscending}, {"hive_column where hive_column.name=\"customer_id\" orderby hive_column.name limit 2 offset 1", 2, "name", isAscending}, {"from hive_table select owner orderby hive_table.owner",10, "owner", isAscending}, {"from hive_table select owner orderby hive_table.owner limit 5", 5, "owner", isAscending}, {"from hive_table select owner orderby hive_table.owner desc limit 5", 5, "owner", !isAscending}, {"from hive_table select owner orderby hive_table.owner limit 5 offset 5", 5, "owner", isAscending}, {"hive_db where (name = \"Reporting\") orderby hive_db.name", 1, "name", isAscending}, {"hive_db where (name = \"Reporting\") orderby hive_db.name limit 10", 1, "name", isAscending}, {"hive_db where hive_db has name orderby hive_db.owner", 3, "owner", isAscending}, {"hive_db where hive_db has name orderby hive_db.owner limit 5", 3, "owner", isAscending}, {"hive_db where hive_db has name orderby hive_db.owner limit 2 offset 0", 2, "owner", isAscending}, {"hive_db where hive_db has name orderby hive_db.owner limit 2 offset 1", 2, "owner", isAscending}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1'", 1, "_col_1", isAscending}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10", 1, "_col_1", isAscending}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10 offset 1", 0, "_col_1", isAscending}, {"hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' ", 1, "_col_1", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 ", 1, "_col_1", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 0", 1, "_col_1", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime > \"2014-01-01\" ) select name as _col_0, createTime as _col_1 orderby '_col_1' limit 10 offset 5", 0, "_col_1", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' ", 1, "_col_0", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 0", 1, "_col_0", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10 offset 1", 0, "_col_0", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 10", 1, "_col_0", isAscending}, {"hive_table where (name = \"sales_fact\" and createTime >= \"2014-12-11T02:35:58.440Z\" ) select name as _col_0, createTime as _col_1 orderby '_col_0' limit 0 offset 1", 0, "_col_0", isAscending}, {"hive_column select hive_column.name orderby hive_column.name limit 10 withPath", 10, "hive_column.name", isAscending}, {"hive_column select hive_column.name orderby hive_column.name limit 10 withPath", 10, "hive_column.name", isAscending}, {"hive_table orderby 'hive_table.owner_notdefined'", 10, null, isAscending}, }; } @DataProvider(name = "dslGroupByQueriesProvider") private Object[][] createDSLGroupByQueries() { return new Object[][]{ { "from Person as p, mentor as m groupby(m.name) select m.name, count()", new FieldValueValidator().withFieldNames("m.name", "count()").withExpectedValues("Max", 1) .withExpectedValues("Julius", 1) }, // This variant of this query is currently failing. See OMS-335 for details. { "from Person as p, mentor groupby(mentor.name) select mentor.name, count()", new FieldValueValidator().withFieldNames("mentor.name", "count()").withExpectedValues("Max", 1) .withExpectedValues("Julius", 1) }, { "from Person, mentor groupby(mentor.name) select mentor.name, count()", new FieldValueValidator().withFieldNames("mentor.name", "count()").withExpectedValues("Max", 1) .withExpectedValues("Julius", 1) }, { "from Person, mentor as m groupby(m.name) select m.name, count()", new FieldValueValidator().withFieldNames("m.name", "count()").withExpectedValues("Max", 1) .withExpectedValues("Julius", 1) }, { "from Person groupby (isOrganDonor) select count()", new FieldValueValidator().withFieldNames("count()").withExpectedValues(2) .withExpectedValues(2) }, { "from Person groupby (isOrganDonor) select Person.isOrganDonor, count()", new FieldValueValidator().withFieldNames("Person.isOrganDonor", "count()") .withExpectedValues(true, 2).withExpectedValues(false, 2) }, { "from Person groupby (isOrganDonor) select Person.isOrganDonor as 'organDonor', count() as 'count', max(Person.age) as 'max', min(Person.age) as 'min'", new FieldValueValidator().withFieldNames("organDonor", "max", "min", "count") .withExpectedValues(true, 50, 36, 2).withExpectedValues(false, 0, 0, 2) }, { "from hive_db groupby (owner, name) select count() ", new FieldValueValidator() .withFieldNames("count()").withExpectedValues(1).withExpectedValues(1).withExpectedValues(1) }, { "from hive_db groupby (owner, name) select hive_db.owner, hive_db.name, count() ", new FieldValueValidator().withFieldNames("hive_db.owner", "hive_db.name", "count()") .withExpectedValues("Jane BI", "Reporting", 1) .withExpectedValues("Tim ETL", "Logging", 1) .withExpectedValues("John ETL", "Sales", 1) }, { "from hive_db groupby (owner) select count() ", new FieldValueValidator().withFieldNames("count()").withExpectedValues(1).withExpectedValues(1) .withExpectedValues(1) }, { "from hive_db groupby (owner) select hive_db.owner, count() ", new FieldValueValidator().withFieldNames("hive_db.owner", "count()") .withExpectedValues("Jane BI", 1).withExpectedValues("Tim ETL", 1) .withExpectedValues("John ETL", 1) }, { "from hive_db groupby (owner) select hive_db.owner, max(hive_db.name) ", new FieldValueValidator().withFieldNames("hive_db.owner", "max(hive_db.name)") .withExpectedValues("Tim ETL", "Logging").withExpectedValues("Jane BI", "Reporting") .withExpectedValues("John ETL", "Sales") }, { "from hive_db groupby (owner) select max(hive_db.name) ", new FieldValueValidator().withFieldNames("max(hive_db.name)").withExpectedValues("Logging") .withExpectedValues("Reporting").withExpectedValues("Sales") }, { "from hive_db groupby (owner) select owner, hive_db.name, min(hive_db.name) ", new FieldValueValidator().withFieldNames("owner", "hive_db.name", "min(hive_db.name)") .withExpectedValues("Tim ETL", "Logging", "Logging") .withExpectedValues("Jane BI", "Reporting", "Reporting") .withExpectedValues("John ETL", "Sales", "Sales") }, { "from hive_db groupby (owner) select owner, min(hive_db.name) ", new FieldValueValidator().withFieldNames("owner", "min(hive_db.name)") .withExpectedValues("Tim ETL", "Logging").withExpectedValues("Jane BI", "Reporting") .withExpectedValues("John ETL", "Sales") }, { "from hive_db groupby (owner) select min(name) ", new FieldValueValidator().withFieldNames("min(name)") .withExpectedValues("Reporting").withExpectedValues("Logging") .withExpectedValues("Sales") }, { "from hive_db groupby (owner) select min('name') ", new FieldValueValidator().withFieldNames("min(\"name\")").withExpectedValues("name") .withExpectedValues("name").withExpectedValues("name") }, //finding the minimum of a constant literal expression... { "from hive_db groupby (owner) select name ", new FieldValueValidator().withFieldNames("name").withExpectedValues("Reporting") .withExpectedValues("Sales").withExpectedValues("Logging") }, //implied group by { "from hive_db select count() ", new FieldValueValidator().withFieldNames("count()").withExpectedValues(3) }, //implied group by { "from Person select count() as 'count', max(Person.age) as 'max', min(Person.age) as 'min'", new FieldValueValidator().withFieldNames("max", "min", "count").withExpectedValues(50, 0, 4) }, //Sum { "from Person groupby (isOrganDonor) select count() as 'count', sum(Person.age) as 'sum'", new FieldValueValidator().withFieldNames("count", "sum").withExpectedValues(2, 0) .withExpectedValues(2, 86) }, { "from Person groupby (isOrganDonor) select Person.isOrganDonor as 'organDonor', count() as 'count', sum(Person.age) as 'sum'", new FieldValueValidator().withFieldNames("organDonor", "count", "sum").withExpectedValues(false, 2, 0) .withExpectedValues(true, 2, 86) }, { "from Person select count() as 'count', sum(Person.age) as 'sum'", new FieldValueValidator().withFieldNames("count", "sum").withExpectedValues(4, 86) }, // tests to ensure that group by works with order by and limit { "from hive_db groupby (owner) select min(name) orderby name limit 2 ", new FieldValueValidator().withFieldNames("min(name)") .withExpectedValues("Logging").withExpectedValues("Reporting") }, { "from hive_db groupby (owner) select min(name) orderby name desc limit 2 ", new FieldValueValidator().withFieldNames("min(name)") .withExpectedValues("Reporting").withExpectedValues("Sales") }, }; } @DataProvider(name = "dslObjectQueriesReturnIdProvider") private Object[][] createDSLObjectIdQueries() { return new Object[][] { { "from hive_db as h select h as id", new FieldValueValidator().withFieldNames("id") .withExpectedValues(idType).withExpectedValues(idType) .withExpectedValues(idType) } }; } @Test(dataProvider = "dslOrderByQueriesProvider") public void testSearchByDSLQueriesWithOrderBy(String dslQuery, Integer expectedNumRows, String orderBy, boolean ascending) throws Exception { System.out.println("Executing dslQuery = " + dslQuery); String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); assertEquals(results.length(), 3); Object query = results.get("query"); assertNotNull(query); JSONObject dataType = results.getJSONObject("dataType"); assertNotNull(dataType); String typeName = dataType.getString("typeName"); assertNotNull(typeName); JSONArray rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals(rows.length(), expectedNumRows.intValue()); // some queries may not have any results List<String> returnedList = new ArrayList<>(); for (int i = 0; i < rows.length(); i++) { JSONObject row = rows.getJSONObject(i); try { returnedList.add(row.get(orderBy).toString()); } catch(Exception ex) { System.out.println( " Exception occured " + ex.getMessage() + " found row: "+row); } } Iterator<String> iter = returnedList.iterator(); String _current = null, _prev = null; if (orderBy != null) { // Following code compares the results in rows and makes sure data // is sorted as expected while (iter.hasNext()) { _prev = _current; _current = iter.next().toLowerCase(); if (_prev != null && _prev.compareTo(_current) != 0) { if(ascending) { Assert.assertTrue(_prev.compareTo(_current) < 0, _prev + " is greater than " + _current); } else { Assert.assertTrue(_prev.compareTo(_current) > 0, _prev + " is less than " + _current); } } } } System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows"); } @Test(dataProvider = "dslQueriesProvider") public void testSearchByDSLQueries(String dslQuery, Integer expectedNumRows) throws Exception { runQuery(dslQuery, expectedNumRows, 40, 0); } @Test(dataProvider = "comparisonQueriesProvider") public void testDataTypeComparisonQueries(String dslQuery, Integer expectedNumRows) throws Exception { runQuery(dslQuery, expectedNumRows, 40, 0); } @Test(dataProvider = "dslExplicitLimitQueriesProvider") public void testSearchByDSLQueriesWithExplicitLimit(String dslQuery, Integer expectedNumRows, int limit, int offset) throws Exception { runQuery(dslQuery, expectedNumRows, limit, offset); } public void runQuery(String dslQuery, Integer expectedNumRows, int limitParam, int offsetParam) throws Exception { System.out.println("Executing dslQuery = " + dslQuery); String jsonResults = discoveryService.searchByDSL(dslQuery, new QueryParams(limitParam, offsetParam)); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); assertEquals(results.length(), 3); System.out.println("results = " + results); Object query = results.get("query"); assertNotNull(query); JSONObject dataType = results.getJSONObject("dataType"); assertNotNull(dataType); String typeName = dataType.getString("typeName"); assertNotNull(typeName); JSONArray rows = results.getJSONArray("rows"); assertNotNull(rows); assertEquals( rows.length(), expectedNumRows.intValue(), "query [" + dslQuery + "] returned [" + rows.length() + "] rows. Expected " + expectedNumRows + " rows."); // some queries may not have any results System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows"); } @Test(dataProvider = "dslLimitQueriesProvider") public void testSearchByDSLQueriesWithLimit(String dslQuery, Integer expectedNumRows) throws Exception { runQuery(dslQuery, expectedNumRows, 40, 0); } @DataProvider(name = "invalidDslQueriesProvider") private Object[][] createInvalidDSLQueries() { return new String[][]{{"from Unknown"}, {"Unknown"}, {"Unknown is Blah"},}; } @Test(dataProvider = "invalidDslQueriesProvider", expectedExceptions = DiscoveryException.class) public void testSearchByDSLInvalidQueries(String dslQuery) throws Exception { System.out.println("Executing dslQuery = " + dslQuery); searchByDSL(dslQuery); Assert.fail(); } @Test public void testSearchForTypeInheritance() throws Exception { createTypesWithMultiLevelInheritance(); createInstances(); String dslQuery = "from D where a = 1"; String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); System.out.println("results = " + results); } @Test public void testSearchForTypeWithReservedKeywordAttributes() throws Exception { createTypesWithReservedKeywordAttributes(); String dslQuery = "from OrderType where `order` = 1"; String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); System.out.println("results = " + results); } /* * Type Hierarchy is: * A(a) * B(b) extends A * C(c) extends B * D(d) extends C */ private void createTypesWithMultiLevelInheritance() throws Exception { HierarchicalTypeDefinition A = createClassTypeDef("A", null, createRequiredAttrDef("a", DataTypes.INT_TYPE)); HierarchicalTypeDefinition B = createClassTypeDef("B", ImmutableSet.of("A"), createOptionalAttrDef("b", DataTypes.BOOLEAN_TYPE)); HierarchicalTypeDefinition C = createClassTypeDef("C", ImmutableSet.of("B"), createOptionalAttrDef("c", DataTypes.BYTE_TYPE)); HierarchicalTypeDefinition D = createClassTypeDef("D", ImmutableSet.of("C"), createOptionalAttrDef("d", DataTypes.SHORT_TYPE)); TypeSystem.getInstance().defineClassTypes(A, B, C, D); } private void createTypesWithReservedKeywordAttributes() throws Exception { HierarchicalTypeDefinition orderType = createClassTypeDef("OrderType", null, createRequiredAttrDef("order", DataTypes.INT_TYPE)); HierarchicalTypeDefinition limitType = createClassTypeDef("LimitType", null, createOptionalAttrDef("limit", DataTypes.BOOLEAN_TYPE)); TypeSystem.getInstance().defineClassTypes(orderType, limitType); } private void createInstances() throws Exception { Referenceable instance = new Referenceable("D"); instance.set("d", 1); instance.set("c", 1); instance.set("b", true); instance.set("a", 1); ClassType deptType = TypeSystem.getInstance().getDataType(ClassType.class, "D"); ITypedReferenceableInstance typedInstance = deptType.convert(instance, Multiplicity.REQUIRED); repositoryService.createEntities(typedInstance); } private void runCountGroupByQuery(String dslQuery, ResultChecker checker) throws Exception { runAndValidateQuery(dslQuery, checker); } private void runAndValidateQuery(String dslQuery, ResultChecker checker) throws Exception { System.out.println("Executing dslQuery = " + dslQuery); String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); assertEquals(results.length(), 3); Object query = results.get("query"); assertNotNull(query); JSONArray rows = results.getJSONArray("rows"); assertNotNull(rows); if (checker != null) { checker.validateResult(dslQuery, rows); } System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows"); } @Test(dataProvider = "dslGroupByQueriesProvider") public void testSearchGroupByDSLQueries(String dslQuery, ResultChecker checker) throws Exception { runCountGroupByQuery(dslQuery, checker); } @Test(dataProvider = "dslObjectQueriesReturnIdProvider") public void testSearchObjectQueriesReturnId(String dslQuery, ResultChecker checker) throws Exception { runAndValidateQuery(dslQuery, checker); } private interface ResultChecker { void validateResult(String dslQuery, JSONArray foundRows) throws JSONException; } static class FieldValueValidator implements ResultChecker { static class ResultObject { private static String[] idTypeAttributes = { "id", "$typeName$", "state", "version" }; @Override public String toString() { return "ResultObject [fieldValues_=" + fieldValues_ + "]"; } Map<String, Object> fieldValues_ = new HashMap<>(); public void setFieldValue(String string, Object object) { fieldValues_.put(string, object); } public boolean matches(JSONObject object) throws JSONException { for (Map.Entry<String, Object> requiredFieldsEntry : fieldValues_.entrySet()) { String fieldName = requiredFieldsEntry.getKey(); Object expectedValue = requiredFieldsEntry.getValue(); Object foundValue = null; if (expectedValue.getClass() == Integer.class) { foundValue = object.getInt(fieldName); } else if (expectedValue == idType) { return validateObjectIdType(object, fieldName); } else { foundValue = object.get(fieldName); } if (foundValue == null || !expectedValue.equals(foundValue)) { return false; } } return true; } // validates that returned object id contains all the required attributes. private boolean validateObjectIdType(JSONObject object, String fieldName) throws JSONException { JSONObject foundJson = object.getJSONObject(fieldName); for (String idAttr : idTypeAttributes) { if (foundJson.get(idAttr) == null) { return false; } } return true; } } private String[] fieldNames_; private List<ResultObject> expectedObjects_ = new ArrayList<>(); public FieldValueValidator() { } public FieldValueValidator withFieldNames(String... fields) { fieldNames_ = fields; return this; } public FieldValueValidator withExpectedValues(Object... values) { ResultObject obj = new ResultObject(); for (int i = 0; i < fieldNames_.length; i++) { obj.setFieldValue(fieldNames_[i], values[i]); } expectedObjects_.add(obj); return this; } @Override public void validateResult(String dslQuery, JSONArray foundRows) throws JSONException { //make sure that all required rows are found Assert.assertEquals(foundRows.length(), expectedObjects_.size(), "The wrong number of objects was returned for query " + dslQuery + ". Expected " + expectedObjects_.size() + ", found " + foundRows.length()); for (ResultObject required : expectedObjects_) { //not exactly efficient, but this is test code boolean found = false; for (int i = 0; i < foundRows.length(); i++) { JSONObject row = foundRows.getJSONObject(i); System.out.println(" found row "+ row); if (required.matches(row)) { found = true; break; } } if (!found) { Assert.fail("The result for " + dslQuery + " is wrong. The required row " + required + " was not found in " + foundRows); } } } } static class CountOnlyValidator implements ResultChecker { private List<Integer> expectedCounts = new ArrayList<Integer>(); private int countColumn = 0; public CountOnlyValidator() { } public CountOnlyValidator withCountColumn(int col) { countColumn = col; return this; } public CountOnlyValidator withExpectedCounts(Integer... counts) { expectedCounts.addAll(Arrays.asList(counts)); return this; } @Override public void validateResult(String dslQuery, JSONArray foundRows) throws JSONException { assertEquals(foundRows.length(), expectedCounts.size()); for (int i = 0; i < foundRows.length(); i++) { JSONArray row = foundRows.getJSONArray(i); assertEquals(row.length(), 1); int foundCount = row.getInt(countColumn); // assertTrue(expectedCounts.contains(foundCount)); } } } @Test public void testSearchForTypeWithNoInstances() throws Exception { HierarchicalTypeDefinition EMPTY = createClassTypeDef("EmptyType", null, createRequiredAttrDef("a", DataTypes.INT_TYPE)); TypeSystem.getInstance().defineClassTypes(EMPTY); String dslQuery = "EmptyType"; String jsonResults = searchByDSL(dslQuery); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); assertEquals(results.length(), 3); JSONArray rows = results.getJSONArray("rows"); assertNotNull(rows); // query should not return any rows assertEquals(rows.length(), 0); } @Test public void testTypePreservedWhenFilterTraversesEdges() throws DiscoveryException, JSONException { String dsl = "hive_table db.name=\"Reporting\" limit 10"; ImmutableSet<String> expectedTableNames = ImmutableSet.of("table1", "table2", "sales_fact_monthly_mv", "sales_fact_daily_mv"); String jsonResults = discoveryService.searchByDSL(dsl, null); assertNotNull(jsonResults); JSONObject results = new JSONObject(jsonResults); JSONArray rows = results.getJSONArray("rows"); assertEquals(rows.length(), expectedTableNames.size()); for(int i = 0; i < rows.length(); i++) { JSONObject row = rows.getJSONObject(i); Assert.assertTrue(expectedTableNames.contains(row.get("name"))); } } private FieldValueValidator makeCountValidator(int count) { return new FieldValueValidator().withFieldNames("count()").withExpectedValues(count); } private FieldValueValidator makeNoResultsValidator() { return new FieldValueValidator(); } private boolean isGremlin3() { return TestUtils.getGraph().getSupportedGremlinVersion() == GremlinVersion.THREE; } }