/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.api.mlcontext; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.runtime.util.LocalFileUtils; /** * Factory for creating DML and PYDML Script objects from strings, files, URLs, * and input streams. * */ public class ScriptFactory { /** * Create a DML Script object based on a string path to a file. * * @param scriptFilePath * path to DML script file (local or HDFS) * @return DML Script object */ public static Script dmlFromFile(String scriptFilePath) { return scriptFromFile(scriptFilePath, ScriptType.DML); } /** * Create a DML Script object based on an input stream. * * @param inputStream * input stream to DML * @return DML Script object */ public static Script dmlFromInputStream(InputStream inputStream) { return scriptFromInputStream(inputStream, ScriptType.DML); } /** * Creates a DML Script object based on a file in the local file system. To * create a DML Script object from a local file or HDFS, please use * {@link #dmlFromFile(String)}. * * @param localScriptFile * the local DML file * @return DML Script object */ public static Script dmlFromLocalFile(File localScriptFile) { return scriptFromLocalFile(localScriptFile, ScriptType.DML); } /** * Create a DML Script object based on a string. * * @param scriptString * string of DML * @return DML Script object */ public static Script dmlFromString(String scriptString) { return scriptFromString(scriptString, ScriptType.DML); } /** * Create a DML Script object based on a URL path. * * @param scriptUrlPath * URL path to DML script * @return DML Script object */ public static Script dmlFromUrl(String scriptUrlPath) { return scriptFromUrl(scriptUrlPath, ScriptType.DML); } /** * Create a DML Script object based on a URL. * * @param scriptUrl * URL to DML script * @return DML Script object */ public static Script dmlFromUrl(URL scriptUrl) { return scriptFromUrl(scriptUrl, ScriptType.DML); } /** * Create a DML Script object based on a resource path. * * @param resourcePath * path to a resource on the classpath * @return DML Script object */ public static Script dmlFromResource(String resourcePath) { return scriptFromResource(resourcePath, ScriptType.DML); } /** * Create a PYDML Script object based on a string path to a file. * * @param scriptFilePath * path to PYDML script file (local or HDFS) * @return PYDML Script object */ public static Script pydmlFromFile(String scriptFilePath) { return scriptFromFile(scriptFilePath, ScriptType.PYDML); } /** * Create a PYDML Script object based on an input stream. * * @param inputStream * input stream to PYDML * @return PYDML Script object */ public static Script pydmlFromInputStream(InputStream inputStream) { return scriptFromInputStream(inputStream, ScriptType.PYDML); } /** * Creates a PYDML Script object based on a file in the local file system. * To create a PYDML Script object from a local file or HDFS, please use * {@link #pydmlFromFile(String)}. * * @param localScriptFile * the local PYDML file * @return PYDML Script object */ public static Script pydmlFromLocalFile(File localScriptFile) { return scriptFromLocalFile(localScriptFile, ScriptType.PYDML); } /** * Create a PYDML Script object based on a string. * * @param scriptString * string of PYDML * @return PYDML Script object */ public static Script pydmlFromString(String scriptString) { return scriptFromString(scriptString, ScriptType.PYDML); } /** * Creat a PYDML Script object based on a URL path. * * @param scriptUrlPath * URL path to PYDML script * @return PYDML Script object */ public static Script pydmlFromUrl(String scriptUrlPath) { return scriptFromUrl(scriptUrlPath, ScriptType.PYDML); } /** * Create a PYDML Script object based on a URL. * * @param scriptUrl * URL to PYDML script * @return PYDML Script object */ public static Script pydmlFromUrl(URL scriptUrl) { return scriptFromUrl(scriptUrl, ScriptType.PYDML); } /** * Create a PYDML Script object based on a resource path. * * @param resourcePath * path to a resource on the classpath * @return PYDML Script object */ public static Script pydmlFromResource(String resourcePath) { return scriptFromResource(resourcePath, ScriptType.PYDML); } /** * Create a DML or PYDML Script object based on a string path to a file. * * @param scriptFilePath * path to DML or PYDML script file (local or HDFS) * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromFile(String scriptFilePath, ScriptType scriptType) { String scriptString = getScriptStringFromFile(scriptFilePath); return scriptFromString(scriptString, scriptType).setName(scriptFilePath); } /** * Create a DML or PYDML Script object based on an input stream. * * @param inputStream * input stream to DML or PYDML * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromInputStream(InputStream inputStream, ScriptType scriptType) { String scriptString = getScriptStringFromInputStream(inputStream); return scriptFromString(scriptString, scriptType); } /** * Creates a DML or PYDML Script object based on a file in the local file * system. To create a Script object from a local file or HDFS, please use * {@link scriptFromFile(String, ScriptType)}. * * @param localScriptFile * The local DML or PYDML file * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromLocalFile(File localScriptFile, ScriptType scriptType) { String scriptString = getScriptStringFromFile(localScriptFile); return scriptFromString(scriptString, scriptType).setName(localScriptFile.getName()); } /** * Create a DML or PYDML Script object based on a string. * * @param scriptString * string of DML or PYDML * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromString(String scriptString, ScriptType scriptType) { return new Script(scriptString, scriptType); } /** * Creat a DML or PYDML Script object based on a URL path. * * @param scriptUrlPath * URL path to DML or PYDML script * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromUrl(String scriptUrlPath, ScriptType scriptType) { String scriptString = getScriptStringFromUrl(scriptUrlPath); return scriptFromString(scriptString, scriptType).setName(scriptUrlPath); } /** * Create a DML or PYDML Script object based on a URL. * * @param scriptUrl * URL to DML or PYDML script * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromUrl(URL scriptUrl, ScriptType scriptType) { String scriptString = getScriptStringFromUrl(scriptUrl); return scriptFromString(scriptString, scriptType).setName(scriptUrl.toString()); } /** * Create a DML or PYDML Script object based on a resource path. * * @param resourcePath * path to a resource on the classpath * @param scriptType * {@code ScriptType.DML} or {@code ScriptType.PYDML} * @return DML or PYDML Script object */ private static Script scriptFromResource(String resourcePath, ScriptType scriptType) { if (resourcePath == null) { return null; } if (!resourcePath.startsWith("/")) { resourcePath = "/" + resourcePath; } InputStream inputStream = ScriptFactory.class.getResourceAsStream(resourcePath); return scriptFromInputStream(inputStream, scriptType).setName(resourcePath); } /** * Create a DML Script object based on a string. * * @param scriptString * string of DML * @return DML Script object */ public static Script dml(String scriptString) { return dmlFromString(scriptString); } /** * Obtain a script string from a file in the local file system. To obtain a * script string from a file in HDFS, please use * getScriptStringFromFile(String scriptFilePath). * * @param file * The script file. * @return The script string. * @throws MLContextException * If a problem occurs reading the script string from the file. */ private static String getScriptStringFromFile(File file) { if (file == null) { throw new MLContextException("Script file is null"); } String filePath = file.getPath(); try { if (!LocalFileUtils.validateExternalFilename(filePath, false)) { throw new MLContextException("Invalid (non-trustworthy) local filename: " + filePath); } String scriptString = FileUtils.readFileToString(file); return scriptString; } catch (IllegalArgumentException e) { throw new MLContextException("Error trying to read script string from file: " + filePath, e); } catch (IOException e) { throw new MLContextException("Error trying to read script string from file: " + filePath, e); } } /** * Obtain a script string from a file. * * @param scriptFilePath * The file path to the script file (either local file system or * HDFS) * @return The script string * @throws MLContextException * If a problem occurs reading the script string from the file */ private static String getScriptStringFromFile(String scriptFilePath) { if (scriptFilePath == null) { throw new MLContextException("Script file path is null"); } try { if (scriptFilePath.startsWith("hdfs:") || scriptFilePath.startsWith("gpfs:")) { if (!LocalFileUtils.validateExternalFilename(scriptFilePath, true)) { throw new MLContextException("Invalid (non-trustworthy) hdfs/gpfs filename: " + scriptFilePath); } FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); Path path = new Path(scriptFilePath); FSDataInputStream fsdis = fs.open(path); return IOUtils.toString(fsdis); } else {// from local file system if (!LocalFileUtils.validateExternalFilename(scriptFilePath, false)) { throw new MLContextException("Invalid (non-trustworthy) local filename: " + scriptFilePath); } File scriptFile = new File(scriptFilePath); return FileUtils.readFileToString(scriptFile); } } catch (IllegalArgumentException e) { throw new MLContextException("Error trying to read script string from file: " + scriptFilePath, e); } catch (IOException e) { throw new MLContextException("Error trying to read script string from file: " + scriptFilePath, e); } } /** * Obtain a script string from an InputStream. * * @param inputStream * The InputStream from which to read the script string * @return The script string * @throws MLContextException * If a problem occurs reading the script string from the URL */ private static String getScriptStringFromInputStream(InputStream inputStream) { if (inputStream == null) { throw new MLContextException("InputStream is null"); } try { return IOUtils.toString(inputStream); } catch (IOException e) { throw new MLContextException("Error trying to read script string from InputStream", e); } } /** * Obtain a script string from a URL. * * @param scriptUrlPath * The URL path to the script file * @return The script string * @throws MLContextException * If a problem occurs reading the script string from the URL */ private static String getScriptStringFromUrl(String scriptUrlPath) { if (scriptUrlPath == null) { throw new MLContextException("Script URL path is null"); } try { URL url = new URL(scriptUrlPath); return getScriptStringFromUrl(url); } catch (MalformedURLException e) { throw new MLContextException("Error trying to read script string from URL path: " + scriptUrlPath, e); } } /** * Obtain a script string from a URL. * * @param url * The script URL * @return The script string * @throws MLContextException * If a problem occurs reading the script string from the URL */ private static String getScriptStringFromUrl(URL url) { if (url == null) { throw new MLContextException("URL is null"); } String urlString = url.toString(); if ((!urlString.toLowerCase().startsWith("http:")) && (!urlString.toLowerCase().startsWith("https:"))) { throw new MLContextException("Currently only reading from http and https URLs is supported"); } try { InputStream is = url.openStream(); return IOUtils.toString(is); } catch (IOException e) { throw new MLContextException("Error trying to read script string from URL: " + url, e); } } /** * Create a PYDML script object based on a string. * * @param scriptString * string of PYDML * @return PYDML Script object */ public static Script pydml(String scriptString) { return pydmlFromString(scriptString); } }