package com.caseystella.util.common.hadoop.ingest; import com.google.common.base.Predicate; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.type.TypeReference; import javax.annotation.Nullable; import java.io.*; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Created by cstella on 9/6/14. */ public class Config implements Predicate<File> { /** * Returns the result of applying this predicate to {@code input}. This method is <i>generally * expected</i>, but not absolutely required, to have the following properties: * <p/> * <ul> * <li>Its execution does not cause any observable side effects. * <li>The computation is <i>consistent with equals</i>; that is, {@code (a, b)} implies that {@code predicate.apply(a) == * predicate.apply(b))}. * </ul> * * @param input * @throws NullPointerException if {@code input} is null and this predicate does not accept null * arguments */ @Override public boolean apply(@Nullable File input) { boolean match = true; if(getNumPartitions() != null) { int hash = input.getName().hashCode(); match = hash % getNumPartitions() == getPartitionId(); } return match; } public static class Mapping implements Predicate<File> { private String destination; private String source; private String blacklistFile; private List<Pattern> blacklistPatterns = new ArrayList<Pattern>(); public String getDestination() { return destination; } public void setDestination(String destination) { this.destination = destination; } public String getSource() { return source; } public void setSource(String source) { this.source = source; } public String getBlacklistFile() { return blacklistFile; } public void setBlacklistFile(String blacklistFile) { this.blacklistFile = blacklistFile; } void initialize() throws IOException { if(blacklistFile != null) { BufferedReader reader = new BufferedReader(new FileReader(blacklistFile)); for(String line = null;(line = reader.readLine()) != null;) { Pattern pattern = Pattern.compile(line.trim()); blacklistPatterns.add(pattern); } } if(getSource() == null || getSource().length() == 0) { throw new AssertionError("Source is empty or null"); } if(getDestination() == null || getDestination().length() == 0) { throw new AssertionError("Source is empty or null"); } } /** * Returns the result of applying this predicate to {@code input}. This method is <i>generally * expected</i>, but not absolutely required, to have the following properties: * <p/> * <ul> * <li>Its execution does not cause any observable side effects. * <li>The computation is <i>consistent with equals</i>; that is, {@code (a, b)} implies that {@code predicate.apply(a) == * predicate.apply(b))}. * </ul> * * @param input * @throws NullPointerException if {@code input} is null and this predicate does not accept null * arguments */ @Override public boolean apply(File input) { for(Pattern p : blacklistPatterns) { try { Matcher m = p.matcher(input.getCanonicalPath()); if(m.matches()) { return false; } } catch (IOException e) { throw new RuntimeException("Unable to canonicalize the path for " + input); } } return true; } } static ObjectMapper mapper = new ObjectMapper(); private Mapping[] mappings; private Integer partitionId; private Integer numPartitions; public Mapping[] getMappings() { return mappings; } public void setMappings(Mapping[] mappings) { this.mappings = mappings; } public Integer getPartitionId() { return partitionId; } public void setPartitionId(Integer partitionId) { this.partitionId = partitionId; } public Integer getNumPartitions() { return numPartitions; } public void setNumPartitions(Integer numPartitions) { this.numPartitions = numPartitions; } public static Config load(Reader input) throws IOException { return mapper.readValue(input, new TypeReference<Config>(){}); } public void initialize() throws IOException { if(getNumPartitions() != null && getNumPartitions() > 0 && getPartitionId() == null) { throw new AssertionError("If you specify a number of partitions, you have to specify a partition ID"); } for(Mapping m : getMappings()) { m.initialize(); } } }