/** * Copyright French Prime minister Office/SGMAP/DINSIC/Vitam Program (2015-2019) * * contact.vitam@culture.gouv.fr * * This software is a computer program whose purpose is to implement a digital archiving back-office system managing * high volumetry securely and efficiently. * * This software is governed by the CeCILL 2.1 license under French law and abiding by the rules of distribution of free * software. You can use, modify and/ or redistribute the software under the terms of the CeCILL 2.1 license as * circulated by CEA, CNRS and INRIA at the following URL "http://www.cecill.info". * * As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, * users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the * successive licensors have only limited liability. * * In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or * developing or reproducing the software by the user in light of its specific status of free software, that may mean * that it is complicated to manipulate, and that also therefore means that it is reserved for developers and * experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the * software's suitability as regards their requirements in conditions enabling the security of their systems and/or data * to be ensured and, more generally, to use and operate it in the same conditions as regards security. * * The fact that you are presently reading this means that you have had knowledge of the CeCILL 2.1 license and that you * accept its terms. */ package fr.gouv.vitam.common.format.identification.siegfried; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import fr.gouv.vitam.common.ParametersChecker; import fr.gouv.vitam.common.format.identification.FormatIdentifier; import fr.gouv.vitam.common.format.identification.exception.FileFormatNotFoundException; import fr.gouv.vitam.common.format.identification.exception.FormatIdentifierBadRequestException; import fr.gouv.vitam.common.format.identification.exception.FormatIdentifierNotFoundException; import fr.gouv.vitam.common.format.identification.exception.FormatIdentifierTechnicalException; import fr.gouv.vitam.common.format.identification.model.FormatIdentifierInfo; import fr.gouv.vitam.common.format.identification.model.FormatIdentifierResponse; import fr.gouv.vitam.common.logging.VitamLogger; import fr.gouv.vitam.common.logging.VitamLoggerFactory; import org.apache.commons.lang.BooleanUtils; import javax.ws.rs.core.MediaType; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * Siegfried implementation of format identifier */ public class FormatIdentifierSiegfried implements FormatIdentifier { /** * Pronom namespace */ public static final String PRONOM_NAMESPACE = "pronom"; /** * Unknown namespace */ public static final String UNKNOW_NAMESPACE = "UNKNOWN"; private final SiegfriedClient client; private final Path versionPath; private static final VitamLogger LOGGER = VitamLoggerFactory.getInstance(FormatIdentifierSiegfried.class); /** * Configuration should come with 'client', 'rootPath' and 'versionPath' mandatory parameters. If client is 'http': * 'host' and 'port' are mandatory. If not, mock client is used. * * @param configurationProperties the configuration properties needed to instantiate Siegfried format identifier * @throws FormatIdentifierTechnicalException If a technical error occures when the version path is created * @throws IllegalArgumentException if mandatory parameter are not given or null */ public FormatIdentifierSiegfried(Map<String, Object> configurationProperties) throws FormatIdentifierTechnicalException { ParametersChecker.checkParameter("Client type cannot be null", configurationProperties.get("client")); ParametersChecker.checkParameter("Root path cannot be null", configurationProperties.get("rootPath")); ParametersChecker.checkParameter("Version pathcannot be null", configurationProperties.get("versionPath")); final String clientType = (String) configurationProperties.get("client"); final String root = (String) configurationProperties.get("rootPath"); final String version = (String) configurationProperties.get("versionPath"); final SiegfriedClientFactory factory = SiegfriedClientFactory.getInstance(); if ("http".equals(clientType)) { ParametersChecker.checkParameter("Host cannot be null", configurationProperties.get("host")); ParametersChecker.checkParameter("Port cannot be null", configurationProperties.get("port")); final String host = (String) configurationProperties.get("host"); final int port = (Integer) configurationProperties.get("port"); factory.changeConfiguration(host, port); client = factory.getClient(); versionPath = Paths.get(version); final Boolean createVersionPath = (Boolean) configurationProperties.get("createVersionPath"); if (BooleanUtils.isNotFalse(createVersionPath)) { try { // Create directory already check for file existance and possibility to create the directory. Files.createDirectories(versionPath); } catch (final IOException e) { throw new FormatIdentifierTechnicalException(e); } } } else { // Mock configuration LOGGER.info("Bad value of client. Use mock"); factory.changeConfiguration(null, 0); client = factory.getClient(); versionPath = Paths.get(version); } } /** * For JUnit ONLY * * @param mockedClient a custom instance of siegfried client * @param versionPath the version request path */ FormatIdentifierSiegfried(SiegfriedClient mockedClient, Path versionPath) { client = mockedClient; this.versionPath = versionPath; } @Override public FormatIdentifierInfo status() throws FormatIdentifierTechnicalException, FormatIdentifierNotFoundException { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Check Siegfried status"); } final JsonNode response = client.status(versionPath); final String version = response.get("siegfried").asText(); return new FormatIdentifierInfo(version, "Siegfried"); } @Override public List<FormatIdentifierResponse> analysePath(Path path) throws FileFormatNotFoundException, FormatIdentifierTechnicalException, FormatIdentifierBadRequestException, FormatIdentifierNotFoundException { if (LOGGER.isDebugEnabled()) { LOGGER.debug("identify format for " + path); } final JsonNode response = client.analysePath(path); return extractFormat(response, path); } private List<FormatIdentifierResponse> extractFormat(JsonNode siegfriedResponse, Path path) throws FileFormatNotFoundException, FormatIdentifierBadRequestException { if (LOGGER.isDebugEnabled()) { LOGGER.debug("extract format from siegfried response"); } final List<FormatIdentifierResponse> matchesFormats = new ArrayList<>(); final ArrayNode files = (ArrayNode) siegfriedResponse.get("files"); if (files == null || files.size() != 1) { throw new FormatIdentifierBadRequestException("The given path is not link to an unique file"); } final JsonNode file = files.get(0); final ArrayNode matches = (ArrayNode) file.get("matches"); for (final JsonNode match : matches) { LOGGER.debug("Check match {}", match); final String formatId = match.get("id").asText(); final String namespace = match.get("ns").asText(); if (formatResolved(formatId, namespace)) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Find a format " + formatId + " for " + namespace); } final String mimetype = match.get("mime").asText(); final String format = match.get("format").asText(); final FormatIdentifierResponse formatIdentifier = new FormatIdentifierResponse(format, mimetype, formatId, namespace); matchesFormats.add(formatIdentifier); } else if (PRONOM_NAMESPACE.equals(namespace)) { final JsonNode warnNode = match.get("warning"); if (warnNode != null) { final String warn = warnNode.asText(); final int pos = warn.indexOf("fmt/"); final int xpos = warn.indexOf("x-fmt/"); int start = -1; if (pos > 0 && xpos > 0) { start = pos < xpos ? pos : xpos; } else if (pos > 0) { start = pos; } else { start = xpos; } if (start > 0) { int end = warn.indexOf(',', start); if (end == -1) { end = warn.length(); } if (end > start) { final String newFormatId = warn.substring(start, end); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Find a format " + formatId + " for " + namespace); } final String mimetype = MediaType.APPLICATION_OCTET_STREAM; final String format = "Approximative format: " + newFormatId; final FormatIdentifierResponse formatIdentifier = new FormatIdentifierResponse(format, mimetype, newFormatId, namespace); matchesFormats.add(formatIdentifier); } } } } } if (matchesFormats.isEmpty()) { LOGGER.warn("No format match found for file " + path); throw new FileFormatNotFoundException("No match found"); } return matchesFormats; } private boolean formatResolved(String formatId, String nameSpace) { if (PRONOM_NAMESPACE.equals(nameSpace) && UNKNOW_NAMESPACE.equals(formatId)) { return false; } return true; } @Override public void close() { client.close(); } }