/** * Copyright 2011 Nube Technologies * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. */ package co.nubetech.hiho.mapreduce; import java.io.IOException; import java.sql.Types; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.log4j.Logger; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.type.TypeReference; import co.nubetech.hiho.common.HIHOConf; import co.nubetech.hiho.mapreduce.lib.db.ColumnInfo; import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable; public class GenericDBLoadDataMapper<K, V> extends Mapper<K, V, GenericDBWritable, NullWritable> { final static Logger logger = Logger .getLogger(co.nubetech.hiho.mapreduce.GenericDBLoadDataMapper.class); private ArrayList values; private ArrayList<ColumnInfo> tableInfo; private String delimiter; public ArrayList<ColumnInfo> getTableInfo() { return tableInfo; } public String getDelimiter() { return delimiter; } public void setTableInfo(ArrayList<ColumnInfo> tableInfo) { this.tableInfo = tableInfo; } public void setDelimiter(String delimiter) { this.delimiter = delimiter; } protected void setup(Mapper.Context context) throws IOException, InterruptedException { delimiter = context.getConfiguration().get( HIHOConf.INPUT_OUTPUT_DELIMITER); logger.debug("delimiter is: " + delimiter); String columnInfoJsonString = context.getConfiguration().get( HIHOConf.COLUMN_INFO); logger.debug("columnInfoJsonString is: " + columnInfoJsonString); ObjectMapper mapper = new ObjectMapper(); tableInfo = mapper.readValue(columnInfoJsonString, new TypeReference<ArrayList<ColumnInfo>>() { }); } public void map(K key, V val, Context context) throws IOException, InterruptedException { values = new ArrayList(); logger.debug("Key is: " + key); logger.debug("Value is: " + val); StringTokenizer rowValue = new StringTokenizer(val.toString(), delimiter); if (rowValue.countTokens() == tableInfo.size()) { Iterator<ColumnInfo> iterator = tableInfo.iterator(); while (iterator.hasNext()) { ColumnInfo columnInfo = iterator.next(); String columnValue = rowValue.nextToken(); if (columnValue == null || columnValue.trim().equals("")) { values.add(null); } else { logger.debug("Adding value : " + columnValue); int type = columnInfo.getType(); if (type == Types.VARCHAR) { values.add(columnValue); } else if (type == Types.BIGINT) { values.add(Long.parseLong(columnValue)); } else if (type == Types.INTEGER) { values.add(Integer.parseInt(columnValue)); } else if (type == Types.DOUBLE) { values.add(Double.parseDouble(columnValue)); } else if (type == Types.FLOAT) { values.add(Float.parseFloat(columnValue)); } else if (type == Types.BOOLEAN) { values.add(Boolean.parseBoolean(columnValue)); } else if (type == Types.DATE) { DateFormat df = new SimpleDateFormat(); try { values.add(df.parse(columnValue)); } catch (ParseException e) { e.printStackTrace(); throw new IOException(e); } } } } } else { throw new IOException( "Number of columns specified in table is not equal to the columns contains in the file."); } GenericDBWritable gdw = new GenericDBWritable(tableInfo, values); context.write(gdw, null); } }