/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.view.hive20.resources.uploads.parsers; import org.apache.ambari.view.hive20.client.ColumnDescription; import org.apache.ambari.view.hive20.client.Row; import org.apache.ambari.view.hive20.internal.dto.ColumnInfo; import org.apache.ambari.view.hive20.resources.uploads.ColumnDescriptionImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Reader; import java.util.ArrayList; import java.util.List; import java.util.NoSuchElementException; /** * provides general implementation for parsing JSON,CSV,XML file * to generate preview rows, headers and column types * also provides TableDataReader for converting any type to CSV. */ public abstract class Parser implements IParser { protected final static Logger LOG = LoggerFactory.getLogger(Parser.class); public static final String COLUMN_PREFIX = "column"; protected Reader reader; // same as CSV reader in this case protected ParseOptions parseOptions; private int numberOfPreviewRows = 10; public Parser(Reader originalReader, ParseOptions parseOptions) { this.reader = originalReader; this.parseOptions = parseOptions; } /** * returns which datatype is valid for all the values */ /** * * @param rows : non empty list of rows * @param colNum : to detect datatype for this column number. * @return data type for that column */ private ColumnDescription.DataTypes getLikelyDataType(List<Row> rows, int colNum) { // order of detection BOOLEAN,INT,BIGINT,DOUBLE,DATE,CHAR,STRING List<Object> colValues = new ArrayList<>(rows.size()); for( Row row : rows ){ colValues.add(row.getRow()[colNum]); } return ParseUtils.detectHiveColumnDataType(colValues); } @Override public PreviewData parsePreview() { LOG.info("generating preview for : {}", this.parseOptions ); ArrayList<Row> previewRows; List<ColumnInfo> header; try { numberOfPreviewRows = (Integer) parseOptions.getOption(ParseOptions.OPTIONS_NUMBER_OF_PREVIEW_ROWS); } catch (Exception e) { LOG.debug("Illegal number of preview columns supplied {}",parseOptions.getOption(ParseOptions.OPTIONS_NUMBER_OF_PREVIEW_ROWS) ); } int numberOfRows = numberOfPreviewRows; previewRows = new ArrayList<>(numberOfPreviewRows); Row headerRow = null; Integer numOfCols = null; if (parseOptions.getOption(ParseOptions.OPTIONS_HEADER) != null && ( parseOptions.getOption(ParseOptions.OPTIONS_HEADER).equals(ParseOptions.HEADER.FIRST_RECORD.toString()) || parseOptions.getOption(ParseOptions.OPTIONS_HEADER).equals(ParseOptions.HEADER.EMBEDDED.toString()) )) { headerRow = extractHeader(); numOfCols = headerRow.getRow().length; } Row r; if (iterator().hasNext()) { r = iterator().next(); if( null == numOfCols ) { numOfCols = r.getRow().length; } } else { LOG.error("No rows found in the file. returning error."); throw new NoSuchElementException("No rows in the file."); } while (true) { // create Header definition from row Object[] values = r.getRow(); Object[] newValues= new Object[numOfCols]; // adds null if less columns detected and removes extra columns if any for (int colNum = 0; colNum < numOfCols; colNum++) { if(colNum < values.length) { newValues[colNum] = values[colNum]; }else{ newValues[colNum] = null; } } previewRows.add(new Row(newValues)); numberOfRows--; if (numberOfRows <= 0 || !iterator().hasNext()) break; r = iterator().next(); } if (previewRows.size() <= 0) { LOG.error("No rows found in the file. returning error."); throw new NoSuchElementException("Does not contain any rows."); } // find data types. header = generateHeader(headerRow,previewRows,numOfCols); return new PreviewData(header, previewRows); } private List<ColumnInfo> generateHeader(Row headerRow, List<Row> previewRows, int numOfCols) { List<ColumnInfo> header = new ArrayList<>(); for (int colNum = 0; colNum < numOfCols; colNum++) { ColumnDescription.DataTypes type = getLikelyDataType(previewRows,colNum); LOG.info("datatype detected for column {} : {}", colNum, type); String colName = COLUMN_PREFIX + (colNum + 1); if (null != headerRow) colName = (String) headerRow.getRow()[colNum]; ColumnInfo cd = new ColumnInfo(colName, type.toString()); header.add(cd); } LOG.debug("return headers : {} ", header); return header; } }