/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.ml;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.mapred.TextInputFormat;
import lombok.extern.slf4j.Slf4j;
/**
* The Class ExampleUtils.
*/
@Slf4j
public final class ExampleUtils {
private ExampleUtils() {
}
/**
* Creates the example table.
*
* @param conf the conf
* @param database the database
* @param tableName the table name
* @param sampleDataFile the sample data file
* @param labelColumn the label column
* @param features the features
* @throws HiveException the hive exception
*/
public static void createTable(HiveConf conf, String database, String tableName, String sampleDataFile,
String labelColumn, Map<String, String> tableParams, String... features) throws HiveException {
Path dataFilePath = new Path(sampleDataFile);
Path partDir = dataFilePath.getParent();
// Create table
List<FieldSchema> columns = new ArrayList<FieldSchema>();
// Label is optional. Not used for unsupervised models.
// If present, label will be the first column, followed by features
if (labelColumn != null) {
columns.add(new FieldSchema(labelColumn, "double", "Labelled Column"));
}
for (String feature : features) {
columns.add(new FieldSchema(feature, "double", "Feature " + feature));
}
Table tbl = Hive.get(conf).newTable(database + "." + tableName);
tbl.setTableType(TableType.MANAGED_TABLE);
tbl.getTTable().getSd().setCols(columns);
tbl.getTTable().getParameters().putAll(tableParams);
tbl.setInputFormatClass(TextInputFormat.class);
tbl.setSerdeParam(serdeConstants.LINE_DELIM, "\n");
tbl.setSerdeParam(serdeConstants.FIELD_DELIM, " ");
List<FieldSchema> partCols = new ArrayList<FieldSchema>(1);
partCols.add(new FieldSchema("dummy_partition_col", "string", ""));
tbl.setPartCols(partCols);
Hive.get(conf).createTable(tbl, false);
log.info("Created table {}", tableName);
// Add partition for the data file
AddPartitionDesc partitionDesc = new AddPartitionDesc(database, tableName, false);
Map<String, String> partSpec = new HashMap<String, String>();
partSpec.put("dummy_partition_col", "dummy_val");
partitionDesc.addPartition(partSpec, partDir.toUri().toString());
Hive.get(conf).createPartitions(partitionDesc);
log.info("{}: Added partition {}", tableName, partDir.toUri().toString());
}
}