package org.fenixedu.bennu.scheduler.log; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.stream.Stream; import org.fenixedu.bennu.scheduler.domain.SchedulerSystem; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.hash.Hashing; import com.google.common.io.Files; import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonPrimitive; /** * This log repository implementation stores the execution logs in the file system, scattering them in the following way: * * <pre> * ├ index.json * ├ org_fenixedu_bennu_scheduler_example_ExampleTask * │ ├── 4ea3e6 * │ │   └── e0f643 * │ │   └── 4cc7b2 * │ │   └── 381e01 * │ │   └── 4ea6f7 * │ │   └── 1d * │ │   ├── execution.json * │ │   ├── files * │ │   │   ├── 02bbcba853c0c09a8df49feaf6799842 * │ │   └── output * </pre> * * In the root of the repository, an {@code index.json} file contains the latest execution id for every different task. * * For every execution, a set of folders is generated based on the execution's id, dispersing the id among multiple * sub-directories (in this case, each sub-directory has 6-character names, meaning the repository is configured with a dispersion * factor of 6). * * Inside each execution's folder, there are three main components: * <ul> * <li>An {@code execution.json} file, containing the JSON for the associated {@link ExecutionLog}, as well as the id for the * previous execution of this task. This effectively creates a linked-list of executions with O(1) insertion complexity.</li> * <li>An optional {@code output} file, containing the task log, if it is created by the task.</li> * <li>A {@code files} folder, containing all the files added to by the task during its execution. To prevent path-traversal * attacks, the filename is hashed.</li> * </ul> * * @author João Carvalho (joao.pedro.carvalho@tecnico.ulisboa.pt) * */ public class FileSystemLogRepository implements ExecutionLogRepository { private static final JsonParser parser = new JsonParser(); private final String basePath; private final int dispersionFactor; /** * Creates a new {@link FileSystemLogRepository} with the given dispersion factor, and the given base path. * * The dispersion factor is used to determine the maximum length that each execution's folder name has, thus dispersing the * log files in a file tree, instead of a flat list. * * @param basePath * The base path to use to store the logs * @param dispersionFactor * The dispersion factor to be used */ public FileSystemLogRepository(String basePath, int dispersionFactor) { this.basePath = basePath; this.dispersionFactor = dispersionFactor; } /** * Creates a new {@link FileSystemLogRepository} with the given dispersion factor, and the default logs path. * * @param dispersionFactor * The dispersion factor to be used */ public FileSystemLogRepository(int dispersionFactor) { this(SchedulerSystem.getLogsPath(), dispersionFactor); } /** * {@inheritDoc} */ @Override public void update(ExecutionLog log) { store(log, readJson(logFileFor(log.getTaskName(), log.getId())).map(obj -> obj.getAsJsonPrimitive("previous")).map( JsonPrimitive::getAsString)); } private void store(ExecutionLog log, Optional<String> previous) { JsonObject json = log.json(); previous.ifPresent(prev -> json.addProperty("previous", prev)); write(logFileFor(log.getTaskName(), log.getId()), json.toString().getBytes(StandardCharsets.UTF_8), false); } /** * {@inheritDoc} */ @Override public void newExecution(ExecutionLog log) { synchronized (this) { JsonObject json = readIndexJson(); Optional<String> previous = Optional.ofNullable(json.getAsJsonPrimitive(log.getTaskName())).map(JsonPrimitive::getAsString); json.addProperty(log.getTaskName(), log.getId()); store(log, previous); write(indexFilePath(), json.toString().getBytes(StandardCharsets.UTF_8), false); } } /** * {@inheritDoc} */ @Override public void appendTaskLog(ExecutionLog log, String text) { write(outputFileFor(log.getTaskName(), log.getId()), text.getBytes(StandardCharsets.UTF_8), true); } /** * {@inheritDoc} */ @Override public void storeFile(ExecutionLog log, String fileName, byte[] contents, boolean append) { write(fullPathFor(log.getTaskName(), log.getId(), fileName), contents, append); } /** * {@inheritDoc} */ @Override public Stream<ExecutionLog> latest() { return readIndexJson().entrySet().stream() .map(entry -> getLog(entry.getKey(), entry.getValue().getAsString()).orElse(null)).filter(Objects::nonNull); } /** * {@inheritDoc} */ @Override public Stream<ExecutionLog> executionsFor(String taskName, Optional<String> start, int max) { String id; if (start.isPresent()) { id = start.get(); } else { JsonObject index = readIndexJson(); if (!index.has(taskName)) { return Stream.empty(); } id = index.get(taskName).getAsString(); } List<ExecutionLog> logs = new ArrayList<>(Math.min(max, 100)); while (id != null && max > 0) { Optional<JsonObject> optional = readJson(logFileFor(taskName, id)); if (optional.isPresent()) { JsonObject json = optional.get(); id = json.has("previous") ? json.get("previous").getAsString() : null; logs.add(new ExecutionLog(json)); max--; } else { break; } } return logs.stream(); } /** * {@inheritDoc} */ @Override public Optional<String> getTaskLog(String taskName, String id) { return read(outputFileFor(taskName, id)).map(bytes -> new String(bytes, StandardCharsets.UTF_8)); } /** * {@inheritDoc} */ @Override public Optional<byte[]> getFile(String taskName, String id, String fileName) { return read(fullPathFor(taskName, id, fileName)); } /** * {@inheritDoc} */ @Override public Optional<ExecutionLog> getLog(String taskName, String id) { return readJson(logFileFor(taskName, id)).map(ExecutionLog::new); } // Helpers private JsonObject readIndexJson() { return readJson(indexFilePath()).orElseGet(JsonObject::new); } private String indexFilePath() { return basePath + "/index.json"; } private String fullPathFor(String taskName, String id, String fileName) { return basePathFor(taskName, id) + "/files/" + Hashing.sha1().hashString(fileName, StandardCharsets.UTF_8).toString(); } private String logFileFor(String taskName, String id) { return basePathFor(taskName, id) + "/execution.json"; } private String outputFileFor(String taskName, String id) { return basePathFor(taskName, id) + "/output"; } private String basePathFor(String taskName, String id) { return basePath + "/" + taskName.replace('.', '_') + "/" + Joiner.on('/').join(Splitter.fixedLength(dispersionFactor).split(id)); } // Readers private static void write(String path, byte[] bytes, boolean append) { File file = new File(path); file.getParentFile().mkdirs(); try (FileOutputStream stream = new FileOutputStream(file, append)) { stream.write(bytes); } catch (IOException e) { e.printStackTrace(); } } private static Optional<byte[]> read(String path) { File file = new File(path); if (!file.exists()) { return Optional.empty(); } try { return Optional.of(Files.toByteArray(file)); } catch (IOException e) { e.printStackTrace(); return Optional.empty(); } } private static Optional<JsonObject> readJson(String path) { return read(path).map(bytes -> parser.parse(new String(bytes, StandardCharsets.UTF_8)).getAsJsonObject()); } }