package com.splout.db.hadoop.engine; /* * #%L * Splout SQL Hadoop library * %% * Copyright (C) 2012 - 2013 Datasalt Systems S.L. * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.datasalt.pangool.io.ITuple; import com.datasalt.pangool.io.Schema.Field; import com.datasalt.pangool.io.Schema.Field.Type; import com.splout.db.hadoop.TableSpec; import com.splout.db.hadoop.TableSpec.FieldIndex; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * Abstract class that can be extended for generating arbitrary outputformats in Splout. */ @SuppressWarnings("serial") public abstract class SploutSQLOutputFormat implements Serializable, Configurable { public final static String PARTITION_TUPLE_FIELD = "_partition"; public static class SploutSQLOutputFormatException extends Exception { public SploutSQLOutputFormatException(String cause) { super(cause); } public SploutSQLOutputFormatException(String cause, Exception e) { super(cause, e); } } public abstract String getCreateTable(TableSpec tableSpec) throws SploutSQLOutputFormatException; public abstract void initPartition(int partition, Path localDbFile) throws IOException, InterruptedException; public abstract void write(ITuple tuple) throws IOException, InterruptedException; public abstract void close() throws IOException, InterruptedException; private int batchSize; private TableSpec[] dbSpec; private transient Configuration conf; /** * This OutputFormat receives a list of {@link TableSpec}. These are the different tables that will be created. They * will be identified by Pangool Tuples. The batch size is the number of SQL statements to execute before a COMMIT. */ public SploutSQLOutputFormat(Integer batchSize, TableSpec... dbSpec) throws SploutSQLOutputFormatException { this.batchSize = batchSize; this.dbSpec = dbSpec; } public int getBatchSize() { return batchSize; } public String[] getPostSQL() throws SploutSQLOutputFormatException { return getCreateIndexes(dbSpec); } public String[] getPreSQL() throws SploutSQLOutputFormatException { return getCreateTables(dbSpec); } public static Field getPartitionField() { return Field.create(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, Type.INT); } // Get all the CREATE TABLE... for a list of {@link TableSpec} protected String[] getCreateTables(TableSpec... tableSpecs) throws SploutSQLOutputFormatException { List<String> createTables = new ArrayList<String>(); // First the initSQL provided by user for (TableSpec tableSpec : tableSpecs) { if (tableSpec.getInitialSQL() != null) { createTables.addAll(Arrays.asList(tableSpec.getInitialSQL())); } } // CREATE TABLE statements for (TableSpec tableSpec : tableSpecs) { createTables.add(getCreateTable(tableSpec)); } // Add user preInsertsSQL if exists just after the CREATE TABLE's for (TableSpec tableSpec : tableSpecs) { if (tableSpec.getPreInsertsSQL() != null) { createTables.addAll(Arrays.asList(tableSpec.getPreInsertsSQL())); } } return createTables.toArray(new String[0]); } // Get a list of CREATE INDEX... Statements for a {@link TableSpec} list. protected static String[] getCreateIndexes(TableSpec... tableSpecs) throws SploutSQLOutputFormatException { List<String> createIndexes = new ArrayList<String>(); // Add user postInsertsSQL if exists just before the CREATE INDEX statements for (TableSpec tableSpec : tableSpecs) { if (tableSpec.getPostInsertsSQL() != null) { createIndexes.addAll(Arrays.asList(tableSpec.getPostInsertsSQL())); } } for (TableSpec tableSpec : tableSpecs) { for (FieldIndex index : tableSpec.getIndexes()) { for (Field field : index.getIndexFields()) { if (!tableSpec.getSchema().getFields().contains(field)) { throw new SploutSQLOutputFormatException("Field to index (" + index + ") not contained in input schema (" + tableSpec.getSchema() + ")"); } } // The following code is able to create indexes for one field or for multiple fields String createIndex = "CREATE INDEX idx_" + tableSpec.getSchema().getName() + "_"; for (Field field : index.getIndexFields()) { createIndex += field.getName(); } createIndex += " ON " + tableSpec.getSchema().getName() + "("; for (Field field : index.getIndexFields()) { createIndex += "`" + field.getName() + "`, "; } createIndex = createIndex.substring(0, createIndex.length() - 2) + ");"; createIndexes.add(createIndex); } } // Add user finalSQL if exists just after the CREATE INDEX statements for (TableSpec tableSpec : tableSpecs) { if (tableSpec.getFinalSQL() != null) { createIndexes.addAll(Arrays.asList(tableSpec.getFinalSQL())); } } return createIndexes.toArray(new String[0]); } @Override public void setConf(Configuration conf) { this.conf = conf; } @Override public Configuration getConf() { return conf; } }