package com.thinkbiganalytics.spark.policy;
/*-
* #%L
* thinkbig-spark-validate-cleanse-api
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.policy.FieldPoliciesJsonTransformer;
import com.thinkbiganalytics.policy.FieldPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Serializable;
import java.util.Map;
@Component
public class FieldPolicyLoader implements Serializable {
private static final Logger log = LoggerFactory.getLogger(FieldPolicyLoader.class);
/**
* read the JSON file path and return the JSON string
*
* @param path path to field policy JSON file
*/
public Map<String, FieldPolicy> loadFieldPolicy(String path) {
log.info("Loading Field Policy JSON file at {} ", path);
String policyJson = "[]";
/**
* If spark is running in yarn-cluster mode, the policyJson file will be passed via --files param to be
* added into driver classpath in Application Master. The "path" won't be valid in that case,
* as it would be pointing to local file system. To enable this, we should be checking the fieldPolicyFile
* in the current location ie classpath for "yarn-cluster" mode as well as the path for "yarn-client" mode
* You can also use sparkcontext object to get the value of sparkContext.getConf().get("spark.submit.deployMode")
* and use this to decide which readFieldPolicyJsonPath to choose.
*/
File policyFile = new File(path);
if (policyFile.exists() && policyFile.isFile()) {
log.info("Loading field policies at {} ", path);
} else {
log.info("Couldn't find field policy file at {} will check classpath.", path);
String fileName = policyFile.getName();
path = "./" + fileName;
}
try (BufferedReader br = new BufferedReader(new FileReader(path))) {
StringBuilder sb = new StringBuilder();
String line = br.readLine();
if (line == null) {
log.error("Field policies file at {} is empty ", path);
}
while (line != null) {
sb.append(line);
line = br.readLine();
}
policyJson = sb.toString();
} catch (Exception e) {
log.error("Error parsing field policy file. Please verify valid JSON at path {}", e.getMessage(), e);
}
FieldPoliciesJsonTransformer fieldPoliciesJsonTransformer = new FieldPoliciesJsonTransformer(policyJson);
fieldPoliciesJsonTransformer.augmentPartitionColumnValidation();
Map<String, FieldPolicy> map = fieldPoliciesJsonTransformer.buildPolicies();
log.info("Finished building field policies for file: {} with entity that has {} fields ", path, map.size());
return map;
}
}