import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
public class ArrestJoinMapper extends Mapper<LongWritable, Text, Text, Text> {
Logger logger = Logger.getLogger(ArrestJoinMapper.class);
private static final char OUTPUT_SEPARATOR = '\t';
/** Maps a team by season to the players who were arrested that season */
HashMap<String, ArrayList<String>> teamSeasonToPlayersArrested = new HashMap<String, ArrayList<String>>();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
// Create hash map for Map-side join
FileSystem fileSystem = FileSystem.get(context.getConfiguration());
FSDataInputStream dataInputStream = fileSystem.open(new Path(context.getConfiguration().get("arrestfile",
"arrests.csv")));
String line;
while ((line = dataInputStream.readLine()) != null) {
String[] pieces = line.split(",");
String key = getKey(pieces[0], pieces[1]);
ArrayList<String> arrestsPerSeasonAndTeam = teamSeasonToPlayersArrested.get(key);
if (arrestsPerSeasonAndTeam == null) {
arrestsPerSeasonAndTeam = new ArrayList<String>();
teamSeasonToPlayersArrested.put(key, arrestsPerSeasonAndTeam);
}
arrestsPerSeasonAndTeam.add(pieces[2]);
}
dataInputStream.close();
}
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
boolean playerArrested = false, defensePlayerArrested = false, offensePlayerArrested = false;
String[] pieces = value.toString().split("\\t");
boolean[] arrests = checkArrests(pieces, pieces[4]);
offensePlayerArrested = arrests[0];
playerArrested = arrests[1];
arrests = checkArrests(pieces, pieces[5]);
defensePlayerArrested = arrests[0];
playerArrested = playerArrested || arrests[1];
StringBuilder output = new StringBuilder();
output.append(playerArrested).append(OUTPUT_SEPARATOR);
output.append(defensePlayerArrested).append(OUTPUT_SEPARATOR);
output.append(offensePlayerArrested).append(OUTPUT_SEPARATOR);
boolean homeTeamPlayerArrested = false, awayTeamPlayerArrest = false;
if (pieces[4].equals(pieces[22])) {
// Offense is home team
homeTeamPlayerArrested = offensePlayerArrested;
awayTeamPlayerArrest = defensePlayerArrested;
} else {
// Defense is home team
homeTeamPlayerArrested = defensePlayerArrested;
awayTeamPlayerArrest = offensePlayerArrested;
}
output.append(homeTeamPlayerArrested).append(OUTPUT_SEPARATOR);
output.append(awayTeamPlayerArrest);
if (pieces[0].equals("20121104_CAR@WAS")) {
logger.info(pieces[0] + " " + homeTeamPlayerArrested + " " + awayTeamPlayerArrest + " " + playerArrested + " " + value.toString());
}
context.write(value, new Text(output.toString()));
}
/**
* Checks to see if the team has any arrests
*
* @param pieces
* @param teamName
* @return A boolean array with {teamPlayerArrested, playerArrested}
*/
private boolean[] checkArrests(String[] pieces, String teamName) {
boolean playerArrested = false, teamPlayerArrested = false;
String season = pieces[12];
// Check defense for arrests
ArrayList<String> arrestedPlayers = teamSeasonToPlayersArrested.get(getKey(season, teamName));
if (arrestedPlayers != null) {
teamPlayerArrested = true;
for (int i = 13; i < 17; i++) {
if (pieces[i].length() == 0) {
continue;
}
for (String arrestedPlayer : arrestedPlayers) {
// See if the regular name is there
if (pieces[i].equals(arrestedPlayer)) {
playerArrested = true;
break;
}
// Try it again with the initial
String firstInitial = arrestedPlayer.substring(0, 1) + "."
+ arrestedPlayer.substring(arrestedPlayer.indexOf(" "));
if (firstInitial.equals(pieces[i])) {
playerArrested = true;
break;
}
// Try one more time in the play description in case it wasn't parsed
if (pieces[9].indexOf(firstInitial) != -1 || pieces[9].indexOf(arrestedPlayer) != -1) {
playerArrested = true;
break;
}
}
if (playerArrested == true) {
break;
}
}
}
return new boolean[] { teamPlayerArrested, playerArrested };
}
private String getKey(String season, String team) {
return season + "-" + team;
}
}