package com.thinkbiganalytics.spark.rest.controller;
/*-
* #%L
* Spark Shell Service Controllers
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.feedmgr.security.FeedServicesAccessControl;
import com.thinkbiganalytics.feedmgr.service.datasource.DatasourceModelTransform;
import com.thinkbiganalytics.metadata.api.MetadataAccess;
import com.thinkbiganalytics.metadata.api.datasource.DatasourceProvider;
import com.thinkbiganalytics.rest.model.RestResponseStatus;
import com.thinkbiganalytics.security.AccessController;
import com.thinkbiganalytics.spark.rest.model.Datasource;
import com.thinkbiganalytics.spark.rest.model.JdbcDatasource;
import com.thinkbiganalytics.spark.rest.model.RegistrationRequest;
import com.thinkbiganalytics.spark.rest.model.TransformRequest;
import com.thinkbiganalytics.spark.rest.model.TransformResponse;
import com.thinkbiganalytics.spark.shell.SparkShellProcess;
import com.thinkbiganalytics.spark.shell.SparkShellProcessManager;
import com.thinkbiganalytics.spark.shell.SparkShellRestClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.security.core.Authentication;
import org.springframework.security.core.context.SecurityContextHolder;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.MissingResourceException;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.ws.rs.BadRequestException;
import javax.ws.rs.Consumes;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import io.swagger.annotations.ApiParam;
import io.swagger.annotations.ApiResponse;
import io.swagger.annotations.ApiResponses;
import io.swagger.annotations.SwaggerDefinition;
import io.swagger.annotations.Tag;
/**
* Provides an endpoint for proxying to the actual Spark Shell service.
*/
@Api(tags = "Feed Manager - Data Wrangler")
@Component
@Path("/v1/spark/shell")
@SwaggerDefinition(tags = @Tag(name = "Feed Manager - Data Wrangler", description = "data transformations"))
public class SparkShellProxyController {
private static final Logger log = LoggerFactory.getLogger(SparkShellProxyController.class);
/**
* Resources for error messages
*/
private static final ResourceBundle STRINGS = ResourceBundle.getBundle("spark-shell");
/**
* Ensures the user has the correct permissions
*/
@Inject
private AccessController accessController;
/**
* Provides access to {@code Datasource} objects
*/
@Inject
private DatasourceProvider datasourceProvider;
/**
* The {@code Datasource} transformer
*/
@Inject
private DatasourceModelTransform datasourceTransform;
/**
* Metadata access service
*/
@Inject
private MetadataAccess metadata;
/**
* Manages Spark Shell processes
*/
@Inject
private SparkShellProcessManager processManager;
/**
* Communicates with Spark Shell processes
*/
@Inject
private SparkShellRestClient restClient;
/**
* Requests the status of a transformation.
*
* @param id the destination table name
* @return the transformation status
*/
@GET
@Path("/transform/{table}")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Fetches the status of a transformation.")
@ApiResponses({
@ApiResponse(code = 200, message = "Returns the status of the transformation.", response = TransformResponse.class),
@ApiResponse(code = 404, message = "The transformation does not exist.", response = RestResponseStatus.class),
@ApiResponse(code = 500, message = "There was a problem accessing the data.", response = RestResponseStatus.class)
})
@Nonnull
public Response getTable(@Nonnull @PathParam("table") final String id) {
// Forward to the Spark Shell process
final SparkShellProcess process = getSparkShellProcess();
final Optional<TransformResponse> response;
try {
response = restClient.getTable(process, id);
} catch (final Exception e) {
throw error(Response.Status.INTERNAL_SERVER_ERROR, "transform.error", e);
}
// Return response
if (response.isPresent()) {
return Response.ok(response.get()).build();
} else {
throw error(Response.Status.NOT_FOUND, "getTable.unknownTable", null);
}
}
/**
* Ensures a Spark Shell process has been started for the current user.
*
* @return 202 Accepted
*/
@POST
@Path("/start")
@ApiOperation("Starts a new Spark Shell process for the current user if one is not already running.")
@ApiResponses({
@ApiResponse(code = 202, message = "The Spark Shell process will be started."),
@ApiResponse(code = 500, message = "The Spark Shell process could not be started.", response = RestResponseStatus.class)
})
@Nonnull
public Response start() {
final Authentication auth = SecurityContextHolder.getContext().getAuthentication();
try {
processManager.start(auth.getName());
return Response.accepted().build();
} catch (final Exception e) {
throw error(Response.Status.INTERNAL_SERVER_ERROR, "start.error", e);
}
}
/**
* Registers a Spark Shell process.
*
* @param registration the process information
* @return 204 No Content
*/
@POST
@Path("/register")
@Consumes(MediaType.APPLICATION_JSON)
@ApiOperation("Registers a new Spark Shell process with Kylo.")
@ApiResponses({
@ApiResponse(code = 204, message = "The Spark Shell process has been successfully registered with this server."),
@ApiResponse(code = 401, message = "The provided credentials are invalid.", response = RestResponseStatus.class),
@ApiResponse(code = 403, message = "The Spark Shell process does not have permission to register with this server.", response = RestResponseStatus.class),
@ApiResponse(code = 500, message = "The Spark Shell process could not be registered with this server.", response = RestResponseStatus.class)
})
@Nonnull
public Response register(@Nonnull final RegistrationRequest registration) {
final Authentication auth = SecurityContextHolder.getContext().getAuthentication();
try {
processManager.register(auth.getPrincipal().toString(), auth.getCredentials().toString(), registration);
return Response.noContent().build();
} catch (final IllegalArgumentException e) {
throw error(Response.Status.FORBIDDEN, "register.forbidden", null);
}
}
/**
* Executes a Spark script that performs transformations using a {@code DataFrame}.
*
* @param request the transformation request
* @return the transformation status
*/
@POST
@Path("/transform")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Queries a Hive table and applies a series of transformations on the rows.")
@ApiResponses({
@ApiResponse(code = 200, message = "Returns the status of the transformation.", response = TransformResponse.class),
@ApiResponse(code = 400, message = "The requested data source does not exist.", response = RestResponseStatus.class),
@ApiResponse(code = 500, message = "There was a problem processing the data.", response = RestResponseStatus.class)
})
@Nonnull
public Response transform(@ApiParam(value = "The request indicates the transformations to apply to the source table and how the user wishes the results to be displayed. Exactly one parent or"
+ " source must be specified.", required = true)
@Nullable final TransformRequest request) {
// Validate request
if (request == null || request.getScript() == null) {
throw error(Response.Status.BAD_REQUEST, "transform.missingScript", null);
}
if (request.getParent() != null) {
if (request.getParent().getScript() == null) {
throw error(Response.Status.BAD_REQUEST, "transform.missingParentScript", null);
}
if (request.getParent().getTable() == null) {
throw error(Response.Status.BAD_REQUEST, "transform.missingParentTable", null);
}
}
// Add data source details
if (request.getDatasources() != null && !request.getDatasources().isEmpty()) {
// Verify access to data sources
accessController.checkPermission(AccessController.SERVICES, FeedServicesAccessControl.ACCESS_DATASOURCES);
final List<com.thinkbiganalytics.metadata.api.datasource.Datasource.ID> datasourceIds = metadata.read(
() -> request.getDatasources().stream()
.map(com.thinkbiganalytics.metadata.datasource.Datasource::getId)
.map(datasourceProvider::resolve)
.map(id -> {
final com.thinkbiganalytics.metadata.api.datasource.Datasource datasource = datasourceProvider.getDatasource(id);
if (datasource != null) {
return datasource.getId();
} else {
throw new BadRequestException("No datasource exists with the given ID: " + id);
}
})
.collect(Collectors.toList())
);
// Retrieve table names using system user
final List<Datasource> datasources = metadata.read(
() -> datasourceIds.stream()
.map(datasourceProvider::getDatasource)
.map(datasource -> {
if (datasource instanceof com.thinkbiganalytics.metadata.api.datasource.UserDatasource) {
return (com.thinkbiganalytics.metadata.datasource.Datasource) datasourceTransform.toDatasource(datasource, DatasourceModelTransform.Level.ADMIN);
} else {
throw new BadRequestException("Not a supported datasource: " + datasource.getClass().getSimpleName() + " " + datasource.getId());
}
})
.map(datasource -> {
if (datasource instanceof com.thinkbiganalytics.metadata.datasource.JdbcDatasource) {
return new JdbcDatasource((com.thinkbiganalytics.metadata.datasource.JdbcDatasource) datasource);
} else {
throw new BadRequestException("Not a supported datasource: " + datasource.getClass().getSimpleName());
}
})
.collect(Collectors.toList()),
MetadataAccess.SERVICE);
request.setDatasources(datasources);
}
// Execute request
final SparkShellProcess process = getSparkShellProcess();
try {
final TransformResponse response = restClient.transform(process, request);
return Response.ok(response).build();
} catch (final Exception e) {
throw error(Response.Status.INTERNAL_SERVER_ERROR, "transform.error", e);
}
}
/**
* Generates an error response for the specified message.
*
* @param key the resource key or the error message
* @return the error response
*/
@Nonnull
private WebApplicationException error(@Nonnull final Response.Status status, @Nonnull final String key, @Nullable final Throwable cause) {
// Create entity
final TransformResponse entity = new TransformResponse();
entity.setStatus(TransformResponse.Status.ERROR);
try {
entity.setMessage(STRINGS.getString(key));
} catch (final MissingResourceException e) {
log.warn("Missing resource message: {}", key, e);
entity.setMessage(key);
}
// Generate the response
final Response response = Response.status(status).entity(entity).build();
if (cause != null) {
return new WebApplicationException(cause, response);
} else {
return new WebApplicationException(response);
}
}
/**
* Retrieves the Spark Shell process for the current user.
*
* @return the Spark Shell process
*/
@Nonnull
private SparkShellProcess getSparkShellProcess() {
final Authentication auth = SecurityContextHolder.getContext().getAuthentication();
try {
return processManager.getProcessForUser(auth.getPrincipal().toString());
} catch (final Exception e) {
throw error(Response.Status.INTERNAL_SERVER_ERROR, "start.error", e);
}
}
}