/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.aliyun.odps.mapred.local.lib;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.mapred.MapperBase;
import com.aliyun.odps.mapred.conf.JobConf;
/**
* RegexMapper
*
* @author mingdi
*/
public class RegexMapper extends MapperBase {
private Pattern pattern;
private int group;
private Record word;
private Record one;
@Override
public void setup(TaskContext context) throws IOException {
JobConf job = (JobConf) context.getJobConf();
pattern = Pattern.compile(job.get("mapred.mapper.regex"));
group = job.getInt("mapred.mapper.regex.group", 0);
word = context.createMapOutputKeyRecord();
one = context.createMapOutputValueRecord();
one.set(new Object[]{1L});
}
@Override
public void map(long recordNum, Record record, TaskContext context)
throws IOException {
for (int i = 0; i < record.getColumnCount(); ++i) {
String text = record.get(i).toString();
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
word.set(new Object[]{matcher.group(group)});
context.write(word, one);
}
}
}
// @Override
// protected void setup(MapContext<Text, LongWritable> context)
// throws IOException, InterruptedException {
// JobConf job = (JobConf) context.getConfiguration();
// pattern = Pattern.compile(job.get("mapred.mapper.regex"));
// group = job.getInt("mapred.mapper.regex.group", 0);
// }
//
// @Override
// public void map(LongWritable key, Record value,
// MapContext<Text, LongWritable> context) throws IOException,
// InterruptedException {
//
// for (int i = 0; i < value.size(); ++i) {
// String text = ((Text) value.get(i)).toString();
// Matcher matcher = pattern.matcher(text);
// while (matcher.find()) {
// context.write(new Text(matcher.group(group)), new LongWritable(1));
// }
// }
// }
}