/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.view.hive20.resources.uploads; import com.google.common.base.Optional; import com.sun.jersey.core.header.FormDataContentDisposition; import com.sun.jersey.multipart.FormDataParam; import org.apache.ambari.view.ViewContext; import org.apache.ambari.view.hive20.BaseService; import org.apache.ambari.view.hive20.ConnectionFactory; import org.apache.ambari.view.hive20.ConnectionSystem; import org.apache.ambari.view.hive20.client.DDLDelegator; import org.apache.ambari.view.hive20.client.DDLDelegatorImpl; import org.apache.ambari.view.hive20.client.Row; import org.apache.ambari.view.hive20.exceptions.ServiceException; import org.apache.ambari.view.hive20.internal.dto.ColumnInfo; import org.apache.ambari.view.hive20.internal.dto.TableMeta; import org.apache.ambari.view.hive20.internal.query.generators.CreateTableQueryGenerator; import org.apache.ambari.view.hive20.internal.query.generators.DeleteTableQueryGenerator; import org.apache.ambari.view.hive20.internal.query.generators.InsertFromQueryGenerator; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.Job; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.JobController; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.JobImpl; import org.apache.ambari.view.hive20.resources.jobs.viewJobs.JobResourceManager; import org.apache.ambari.view.hive20.resources.uploads.parsers.DataParser; import org.apache.ambari.view.hive20.resources.uploads.parsers.ParseOptions; import org.apache.ambari.view.hive20.resources.uploads.parsers.PreviewData; import org.apache.ambari.view.hive20.resources.uploads.query.DeleteQueryInput; import org.apache.ambari.view.hive20.resources.uploads.query.InsertFromQueryInput; import org.apache.ambari.view.hive20.utils.ServiceFormattedException; import org.apache.ambari.view.hive20.utils.SharedObjectsFactory; import org.apache.ambari.view.utils.ambari.AmbariApi; import org.apache.commons.io.input.ReaderInputStream; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.type.TypeReference; import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.inject.Inject; import javax.ws.rs.*; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.List; import java.util.Map; /** * UI driven end points for creation of new hive table and inserting data into it. * It uploads a file, parses it partially based on its type, generates preview, * creates temporary hive table for storage as CSV and actual hive table, * uploads the file again, parses it, create CSV stream and upload to hdfs in temporary table, * insert rows from temporary table to actual table, delete temporary table. * <p/> * API: * POST /preview : takes stream, parses it and returns preview rows, headers and column type suggestions * POST /createTable : runs hive query to create table in hive * POST /upload : takes stream, parses it and converts it into CSV and uploads it to the temporary table * POST /insertIntoTable : runs hive query to insert data from temporary table to actual hive table * POST /deleteTable : deletes the temporary table */ public class UploadService extends BaseService { private final static Logger LOG = LoggerFactory.getLogger(UploadService.class); @Inject protected ViewContext context; private AmbariApi ambariApi; protected JobResourceManager resourceManager; final private static String HIVE_METASTORE_LOCATION_KEY = "hive.metastore.warehouse.dir"; final private static String HIVE_SITE = "hive-site"; final private static String HIVE_METASTORE_LOCATION_KEY_VIEW_PROPERTY = HIVE_METASTORE_LOCATION_KEY; private static final String HIVE_DEFAULT_METASTORE_LOCATION = "/apps/hive/warehouse"; final private static String HIVE_DEFAULT_DB = "default"; public void validateForUploadFile(UploadFromHdfsInput input){ if( null == input.getInputFileType()){ throw new IllegalArgumentException("inputFileType parameter cannot be null."); } if( null == input.getHdfsPath()){ throw new IllegalArgumentException("hdfsPath parameter cannot be null."); } if( null == input.getTableName()){ throw new IllegalArgumentException("tableName parameter cannot be null."); } if( null == input.getDatabaseName()){ throw new IllegalArgumentException("databaseName parameter cannot be null."); } if( input.getIsFirstRowHeader() == null ){ input.setIsFirstRowHeader(false); } } public void validateForPreview(UploadFromHdfsInput input){ if( input.getIsFirstRowHeader() == null ){ input.setIsFirstRowHeader(false); } if( null == input.getInputFileType()){ throw new IllegalArgumentException("inputFileType parameter cannot be null."); } if( null == input.getHdfsPath()){ throw new IllegalArgumentException("hdfsPath parameter cannot be null."); } } @POST @Path("/previewFromHdfs") @Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public Response uploadForPreviewFromHDFS(UploadFromHdfsInput input) { InputStream uploadedInputStream = null; try { uploadedInputStream = getHDFSFileStream(input.getHdfsPath()); this.validateForPreview(input); CSVParams csvParams = getCsvParams(input.getCsvDelimiter(), input.getCsvQuote(), input.getCsvEscape()); PreviewData pd = generatePreview(input.getIsFirstRowHeader(), input.getInputFileType(), csvParams, uploadedInputStream); String tableName = getBasenameFromPath(input.getHdfsPath()); return createPreviewResponse(pd, input.getIsFirstRowHeader(), tableName); } catch (WebApplicationException e) { LOG.error(getErrorMessage(e), e); throw e; } catch (Exception e) { LOG.error(e.getMessage(), e); throw new ServiceFormattedException(e); } finally { if (null != uploadedInputStream) { try { uploadedInputStream.close(); } catch (IOException e) { LOG.error("Exception occured while closing the HDFS file stream for path " + input.getHdfsPath(), e); } } } } @PUT @Path("/preview") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response uploadForPreview( @FormDataParam("file") InputStream uploadedInputStream, @FormDataParam("file") FormDataContentDisposition fileDetail, @FormDataParam("isFirstRowHeader") Boolean isFirstRowHeader, @FormDataParam("inputFileType") String inputFileType, @FormDataParam("csvDelimiter") String csvDelimiter, @FormDataParam("csvEscape") String csvEscape, @FormDataParam("csvQuote") String csvQuote ) { try { if( null == inputFileType) throw new IllegalArgumentException("inputFileType parameter cannot be null."); if( null == isFirstRowHeader ) isFirstRowHeader = false; CSVParams csvParams = getCsvParams(csvDelimiter, csvQuote, csvEscape); PreviewData pd = generatePreview(isFirstRowHeader, inputFileType, csvParams, uploadedInputStream); return createPreviewResponse(pd, isFirstRowHeader, getBasename(fileDetail.getFileName())); } catch (WebApplicationException e) { LOG.error(getErrorMessage(e), e); throw e; } catch (Exception e) { LOG.error(e.getMessage(), e); throw new ServiceFormattedException(e); } } private CSVParams getCsvParams(String csvDelimiter, String csvQuote, String csvEscape) { char csvq = CSVParams.DEFAULT_QUOTE_CHAR; char csvd = CSVParams.DEFAULT_DELIMITER_CHAR; char csve = CSVParams.DEFAULT_ESCAPE_CHAR; if(null != csvDelimiter){ char[] csvdArray = csvDelimiter.toCharArray(); if(csvdArray.length > 0 ) { csvd = csvdArray[0]; } } if(null != csvQuote){ char[] csvqArray = csvQuote.toCharArray(); if(csvqArray.length > 0 ) { csvq = csvqArray[0]; } } if(null != csvEscape){ char[] csveArray = csvEscape.toCharArray(); if(csveArray.length > 0 ) { csve = csveArray[0]; } } return new CSVParams(csvd, csvq, csve); } @Path("/uploadFromHDFS") @POST @Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public Response uploadFileFromHdfs(UploadFromHdfsInput input) { // create stream and upload InputStream hdfsStream = null; try { hdfsStream = getHDFSFileStream(input.getHdfsPath()); CSVParams csvParams = getCsvParams(input.getCsvDelimiter(), input.getCsvQuote(), input.getCsvEscape()); String path = uploadFileFromStream(hdfsStream, input.getIsFirstRowHeader(), input.getInputFileType(), input.getTableName(), input.getDatabaseName(), input.getHeader(), input.isContainsEndlines(), csvParams); JSONObject jo = new JSONObject(); jo.put("uploadedPath", path); return Response.ok(jo).build(); } catch (WebApplicationException e) { LOG.error(getErrorMessage(e), e); throw e; } catch (Exception e) { LOG.error(e.getMessage(), e); throw new ServiceFormattedException(e); } finally { if (null != hdfsStream) try { hdfsStream.close(); } catch (IOException e) { LOG.error("Exception occured while closing the HDFS stream for path : " + input.getHdfsPath(), e); } } } @Path("/upload") @PUT @Consumes(MediaType.MULTIPART_FORM_DATA) @Produces(MediaType.APPLICATION_JSON) public Response uploadFile( @FormDataParam("file") InputStream uploadedInputStream, @FormDataParam("file") FormDataContentDisposition fileDetail, @FormDataParam("isFirstRowHeader") Boolean isFirstRowHeader, @FormDataParam("inputFileType") String inputFileType, // the format of the file uploaded. CSV/JSON etc. @FormDataParam("tableName") String tableName, @FormDataParam("databaseName") String databaseName, @FormDataParam("header") String header, @FormDataParam("containsEndlines") boolean containsEndlines, @FormDataParam("csvDelimiter") String csvDelimiter, @FormDataParam("csvEscape") String csvEscape, @FormDataParam("csvQuote") String csvQuote ) { try { CSVParams csvParams = getCsvParams(csvDelimiter, csvQuote, csvEscape); ObjectMapper mapper = new ObjectMapper(); List<ColumnInfo> columnList = mapper.readValue(header, new TypeReference<List<ColumnInfo>>(){}); String path = uploadFileFromStream(uploadedInputStream, isFirstRowHeader, inputFileType, tableName, databaseName, columnList, containsEndlines, csvParams); JSONObject jo = new JSONObject(); jo.put("uploadedPath", path); return Response.ok(jo).build(); } catch (WebApplicationException e) { LOG.error(getErrorMessage(e), e); throw e; } catch (Exception e) { LOG.error(e.getMessage(), e); throw new ServiceFormattedException(e); } } @Path("/insertIntoTable") @POST @Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public Response insertFromTempTable(InsertFromQueryInput input) { try { String insertQuery = generateInsertFromQuery(input); LOG.info("insertQuery : {}", insertQuery); Job job = createJob(insertQuery, input.getFromDatabase(), "Insert from " + input.getFromDatabase() + "." + input.getFromTable() + " to " + input.getToDatabase() + "." + input.getToTable()); LOG.info("Job created for insert from temp table : {}", job); return Response.ok(job).build(); } catch (WebApplicationException e) { LOG.error(getErrorMessage(e), e); throw e; } catch (Throwable e) { LOG.error(e.getMessage(), e); throw new ServiceFormattedException(e); } } private String uploadIntoTable(Reader reader, String databaseName, String tempTableName) { try { String fullPath = getHiveMetaStoreLocation(databaseName, tempTableName); LOG.info("Uploading file into : {}", fullPath); uploadFile(fullPath, new ReaderInputStream(reader)); return fullPath; } catch (WebApplicationException e) { LOG.error(getErrorMessage(e), e); throw e; } catch (Exception e) { LOG.error(e.getMessage(), e); throw new ServiceFormattedException(e); } } private synchronized JobResourceManager getResourceManager() { if (resourceManager == null) { SharedObjectsFactory connectionsFactory = getSharedObjectsFactory(); resourceManager = new JobResourceManager(connectionsFactory, context); } return resourceManager; } private synchronized AmbariApi getAmbariApi() { if (null == ambariApi) { ambariApi = new AmbariApi(this.context); } return ambariApi; } private String generateCreateQuery(TableMeta ti) throws ServiceException { CreateTableQueryGenerator createTableQueryGenerator = new CreateTableQueryGenerator(ti); Optional<String> query = createTableQueryGenerator.getQuery(); if(query.isPresent()){ return query.get(); }else{ throw new ServiceException("Failed to generate create table query."); } } private String generateInsertFromQuery(InsertFromQueryInput input) throws ServiceException { InsertFromQueryGenerator queryGenerator = new InsertFromQueryGenerator(input); Optional<String> query = queryGenerator.getQuery(); if(query.isPresent()){ return query.get(); }else{ throw new ServiceException("Failed to generate Insert From Query."); } } private String generateDeleteQuery(DeleteQueryInput deleteQueryInput) throws ServiceException { DeleteTableQueryGenerator deleteQuery = new DeleteTableQueryGenerator(deleteQueryInput.getDatabase(), deleteQueryInput.getTable()); Optional<String> query = deleteQuery.getQuery(); if(query.isPresent()){ return query.get(); }else{ throw new ServiceException("Failed to generate delete table query."); } } private Job createJob(String query, String databaseName, String jobTitle) throws Throwable{ Map jobInfo = new HashMap<>(); jobInfo.put("title", jobTitle); jobInfo.put("forcedContent", query); jobInfo.put("dataBase", databaseName); jobInfo.put("referrer", JobImpl.REFERRER.INTERNAL.name()); Job job = new JobImpl(jobInfo); LOG.info("creating job : {}", job); getResourceManager().create(job); JobController createdJobController = getResourceManager().readController(job.getId()); createdJobController.submit(); getResourceManager().saveIfModified(createdJobController); return job; } private String getHiveMetaStoreLocation(String db, String table) { String locationColValue = "Location:"; String urlString = null; DDLDelegator delegator = new DDLDelegatorImpl(context, ConnectionSystem.getInstance().getActorSystem(), ConnectionSystem.getInstance().getOperationController(context)); List<Row> result = delegator.getTableDescriptionFormatted(ConnectionFactory.create(context), db, table); for (Row row : result) { if (row != null && row.getRow().length > 1 && row.getRow()[0] != null && row.getRow()[0].toString().trim().equals(locationColValue)) { urlString = row.getRow()[1] == null ? null : row.getRow()[1].toString(); break; } } String tablePath = null; if (null != urlString) { try { URI uri = new URI(urlString); tablePath = uri.getPath(); } catch (URISyntaxException e) { LOG.debug("Error occurred while parsing as url : ", urlString, e); } } else { String basePath = getHiveMetaStoreLocation(); if (!basePath.endsWith("/")) { basePath = basePath + "/"; } if (db != null && !db.equals(HIVE_DEFAULT_DB)) { basePath = basePath + db + ".db/"; } tablePath = basePath + table; } return tablePath + "/" + table ; } private String getHiveMetaStoreLocation() { String dir = context.getProperties().get(HIVE_METASTORE_LOCATION_KEY_VIEW_PROPERTY); if (dir != null && !dir.trim().isEmpty()) { return dir; } else { LOG.debug("Neither found associated cluster nor found the view property {}. Returning default location : {}", HIVE_METASTORE_LOCATION_KEY_VIEW_PROPERTY, HIVE_DEFAULT_METASTORE_LOCATION); return HIVE_DEFAULT_METASTORE_LOCATION; } } private void uploadFile(final String filePath, InputStream uploadedInputStream) throws IOException, InterruptedException { byte[] chunk = new byte[1024]; FSDataOutputStream out = getSharedObjectsFactory().getHdfsApi().create(filePath, false); int n = -1; while ((n = uploadedInputStream.read(chunk)) != -1) { out.write(chunk, 0, n); } out.close(); } private static String getErrorMessage(WebApplicationException e) { if (null != e.getResponse() && null != e.getResponse().getEntity()) return e.getResponse().getEntity().toString(); else return e.getMessage(); } private PreviewData generatePreview(Boolean isFirstRowHeader, String inputFileType, CSVParams csvParams, InputStream uploadedInputStream) throws Exception { ParseOptions parseOptions = new ParseOptions(); parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, inputFileType); if (inputFileType.equals(ParseOptions.InputFileType.CSV.toString())){ if(isFirstRowHeader) parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString()); else parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.NONE.toString()); parseOptions.setOption(ParseOptions.OPTIONS_CSV_DELIMITER, csvParams.getCsvDelimiter()); parseOptions.setOption(ParseOptions.OPTIONS_CSV_ESCAPE_CHAR, csvParams.getCsvEscape()); parseOptions.setOption(ParseOptions.OPTIONS_CSV_QUOTE, csvParams.getCsvQuote()); } else parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.EMBEDDED.toString()); LOG.info("isFirstRowHeader : {}, inputFileType : {}", isFirstRowHeader, inputFileType); DataParser dataParser = new DataParser(new InputStreamReader(uploadedInputStream), parseOptions); return dataParser.parsePreview(); } private Response createPreviewResponse(PreviewData pd, Boolean isFirstRowHeader, String tableName) { Map<String, Object> retData = new HashMap<>(); retData.put("header", pd.getHeader()); retData.put("rows", pd.getPreviewRows()); retData.put("isFirstRowHeader", isFirstRowHeader); retData.put("tableName", tableName); JSONObject jsonObject = new JSONObject(retData); return Response.ok(jsonObject).build(); } private InputStream getHDFSFileStream(String path) throws IOException, InterruptedException { FSDataInputStream fsStream = getSharedObjectsFactory().getHdfsApi().open(path); return fsStream; } private String uploadFileFromStream( InputStream uploadedInputStream, Boolean isFirstRowHeader, String inputFileType, // the format of the file uploaded. CSV/JSON etc. String tableName, String databaseName, List<ColumnInfo> header, boolean containsEndlines, CSVParams csvParams ) throws Exception { LOG.info(" uploading file into databaseName {}, tableName {}", databaseName, tableName); ParseOptions parseOptions = new ParseOptions(); parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, inputFileType); if(isFirstRowHeader){ parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.FIRST_RECORD.toString()); }else{ parseOptions.setOption(ParseOptions.OPTIONS_HEADER, ParseOptions.HEADER.NONE.toString()); } if(null != csvParams){ parseOptions.setOption(ParseOptions.OPTIONS_CSV_DELIMITER, csvParams.getCsvDelimiter()); parseOptions.setOption(ParseOptions.OPTIONS_CSV_ESCAPE_CHAR, csvParams.getCsvEscape()); parseOptions.setOption(ParseOptions.OPTIONS_CSV_QUOTE, csvParams.getCsvQuote()); } DataParser dataParser = new DataParser(new InputStreamReader(uploadedInputStream), parseOptions); Reader csvReader = new TableDataReader(dataParser.iterator(), header, containsEndlines); // encode column values into HEX so that \n etc dont appear in the hive table data String path = uploadIntoTable(csvReader, databaseName, tableName); return path; } private String getBasenameFromPath(String path) { String fileName = new File(path).getName(); return getBasename(fileName); } private String getBasename(String fileName) { int index = fileName.indexOf("."); if (index != -1) { return fileName.substring(0, index); } return fileName; } }