package de.l3s.common.features.hadoop; /* * TIMETool - Large-scale Temporal Search in MapReduce * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ /* * THIS SOFTWARE IS PROVIDED BY THE LEMUR PROJECT AS PART OF THE CLUEWEB09 * PROJECT AND OTHER CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * @author */ import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.log4j.Logger; import de.l3s.common.models.timeseries.Timeseries; public class TimeSeriesMapper extends Mapper<LongWritable, Text, Text, Timeseries> { private static final String DATE_FORMAT = "yyyy-MM-dd"; private static SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT); public static final String DATE = "date"; private static final Logger logger = Logger .getLogger(TimeSeriesMapper.class); /** * Parse filename as key, file content (time series) as value */ public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // Get the name of the file from the inputsplit in the context String fileName = ((FileSplit) context.getInputSplit()).getPath().getName(); logger.info("fname: " + fileName); String[] lines = value.toString().split("\\n"); Timeseries timeSeries = new Timeseries(lines.length - 1); logger.info("timeseries length: " + timeSeries.ts_points.size()); int idx = 0; for (String line : lines) { String[] values = line.split("\\t"); logger.info("date: " + values[0]); logger.info("freq: " + values[1]); if (values[0].equals(DATE)) continue; try { logger.info("date: " + values[0]); timeSeries.ts_points.get(idx).key.set(fileName, sdf.parse(values[0]).getTime()); timeSeries.ts_points.get(idx).dataPoint.fValue = Float.parseFloat(values[1]); timeSeries.ts_points.get(idx).dataPoint.lDateTime = sdf.parse(values[0]).getTime(); idx ++; } catch (ParseException e) { return; } Text mapKey = new Text(); mapKey.set(fileName); //key to group related time series context.write(mapKey, timeSeries); } } }