/* * Copyright Bosch Software Innovations GmbH, 2016. * Copyright Siemens AG, 2016-2017. * Part of the SW360 Portal Project. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package org.eclipse.sw360.licenseinfo.parsers; import com.google.common.collect.ImmutableList; import org.apache.log4j.Logger; import org.apache.thrift.TException; import org.eclipse.sw360.datahandler.couchdb.AttachmentConnector; import org.eclipse.sw360.datahandler.thrift.SW360Exception; import org.eclipse.sw360.datahandler.thrift.attachments.Attachment; import org.eclipse.sw360.datahandler.thrift.attachments.AttachmentContent; import org.eclipse.sw360.datahandler.thrift.licenseinfo.LicenseInfo; import org.eclipse.sw360.datahandler.thrift.licenseinfo.LicenseInfoParsingResult; import org.eclipse.sw360.datahandler.thrift.licenseinfo.LicenseInfoRequestStatus; import org.eclipse.sw360.datahandler.thrift.licenseinfo.LicenseNameWithText; import org.spdx.rdfparser.InvalidSPDXAnalysisException; import org.spdx.rdfparser.SPDXDocumentFactory; import org.spdx.rdfparser.license.*; import org.spdx.rdfparser.model.SpdxDocument; import java.io.File; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; import static org.eclipse.sw360.datahandler.common.CommonUtils.closeQuietly; import static org.eclipse.sw360.datahandler.common.CommonUtils.isNullEmptyOrWhitespace; /** * @author: alex.borodin@evosoft.com * @author: maximilian.huber@tngtech.com */ public class SPDXParser extends LicenseInfoParser { protected static final String FILETYPE_SPDX_INTERNAL = "RDF/XML"; protected static final List<String> ACCEPTABLE_ATTACHMENT_FILE_EXTENSIONS = ImmutableList.of( "rdf", "spdx" // usually used for tag:value format ); private static final Logger log = Logger.getLogger(CLIParser.class); public SPDXParser(AttachmentConnector attachmentConnector, AttachmentContentProvider attachmentContentProvider) { super(attachmentConnector, attachmentContentProvider); } @Override public boolean isApplicableTo(Attachment attachment) throws TException { boolean isAcceptable = true; AttachmentContent attachmentContent = attachmentContentProvider.getAttachmentContent(attachment); String lowerFileName = attachmentContent.getFilename().toLowerCase(); isAcceptable &= ACCEPTABLE_ATTACHMENT_FILE_EXTENSIONS.stream() .map(extension -> lowerFileName.endsWith(extension)) .reduce(false, (b1, b2) -> b1 || b2); // TODO: test for namespace `spdx` in rdf file (maybe to much overhead? Better try parsing and die?) return isAcceptable; } @Override public List<LicenseInfoParsingResult> getLicenseInfos(Attachment attachment) throws TException { AttachmentContent attachmentContent = attachmentContentProvider.getAttachmentContent(attachment); LicenseInfo emptyResult = new LicenseInfo() .setFilenames(Arrays.asList(attachmentContent.getFilename())); Optional<LicenseInfo> licenseInfo = parseAsSpdx(attachmentContent) .flatMap(d -> addSpdxContentToCLI(emptyResult, d)); if(licenseInfo.isPresent()){ return Collections.singletonList(new LicenseInfoParsingResult() .setLicenseInfo(licenseInfo.get()) .setStatus(LicenseInfoRequestStatus.SUCCESS)); }else{ return Collections.singletonList(new LicenseInfoParsingResult() .setStatus(LicenseInfoRequestStatus.FAILURE)); } } protected String getUriOfAttachment(AttachmentContent attachmentContent) throws URISyntaxException { String filename = attachmentContent.getFilename(); String filePath = "///" + new File(filename).getAbsoluteFile().toString().replace('\\', '/'); return new URI("file", filePath, null).toString(); } protected Stream<LicenseNameWithText> getAllLicenseTextsFromInfo(AnyLicenseInfo spdxLicenseInfo) { if (spdxLicenseInfo instanceof LicenseSet) { LicenseSet LicenseSet = (LicenseSet) spdxLicenseInfo; return Arrays.stream(LicenseSet.getMembers()) .flatMap(this::getAllLicenseTextsFromInfo); } else if (spdxLicenseInfo instanceof ExtractedLicenseInfo) { ExtractedLicenseInfo extractedLicenseInfo = (ExtractedLicenseInfo) spdxLicenseInfo; return Stream.of(new LicenseNameWithText() .setLicenseName(extractLicenseName(extractedLicenseInfo)) .setLicenseText(extractedLicenseInfo.getExtractedText())); } else if (spdxLicenseInfo instanceof License) { License license = (License) spdxLicenseInfo; return Stream.of(new LicenseNameWithText() .setLicenseName(extractLicenseName(license)) .setLicenseText(license.getLicenseText())); } else if (spdxLicenseInfo instanceof OrLaterOperator) { OrLaterOperator orLaterOperator = (OrLaterOperator) spdxLicenseInfo; return getAllLicenseTextsFromInfo(orLaterOperator.getLicense()); } else if (spdxLicenseInfo instanceof WithExceptionOperator) { WithExceptionOperator withExceptionOperator = (WithExceptionOperator) spdxLicenseInfo; String licenseExceptionText = withExceptionOperator.getException() .getLicenseExceptionText(); return getAllLicenseTextsFromInfo(withExceptionOperator.getLicense()) .map(licenseNWT -> licenseNWT .setLicenseText(licenseNWT.getLicenseText() + "\n\n" + licenseExceptionText) .setLicenseName(licenseNWT.getLicenseName() + " with " + withExceptionOperator.getException().getName())); } return Stream.empty(); } protected Set<LicenseNameWithText> getAllLicenseTexts(SpdxDocument spdxDocument) throws InvalidSPDXAnalysisException { Stream<LicenseNameWithText> licenseTexts = Arrays.stream(spdxDocument.getDocumentDescribes()) .flatMap(spdxItem -> Stream.concat( getAllLicenseTextsFromInfo(spdxItem.getLicenseConcluded()), Arrays.stream(spdxItem.getLicenseInfoFromFiles()) .flatMap(this::getAllLicenseTextsFromInfo))); Stream<LicenseNameWithText> extractedLicenseTexts = Arrays.stream(spdxDocument.getExtractedLicenseInfos()) .flatMap(this::getAllLicenseTextsFromInfo); return Stream.concat(licenseTexts, extractedLicenseTexts) .collect(Collectors.toSet()); } protected Optional<LicenseInfo> addSpdxContentToCLI(LicenseInfo result, SpdxDocument doc) { if(! result.isSetLicenseNamesWithTexts()){ result.setLicenseNamesWithTexts(new HashSet<>()); } try { result.setLicenseNamesWithTexts(getAllLicenseTexts(doc)); Arrays.stream(doc.getDocumentDescribes()).forEach( spdxItem -> result.addToCopyrights(spdxItem.getCopyrightText()) ); } catch (InvalidSPDXAnalysisException e) { e.printStackTrace(); } return Optional.of(result); } private String extractLicenseName(AnyLicenseInfo licenseConcluded) { return licenseConcluded.getResource().getLocalName(); } private String extractLicenseName(ExtractedLicenseInfo extractedLicenseInfo){ return ! isNullEmptyOrWhitespace(extractedLicenseInfo.getName()) ? extractedLicenseInfo.getName() : extractedLicenseInfo.getLicenseId(); } protected Optional<SpdxDocument> parseAsSpdx(AttachmentContent attachmentContent){ InputStream attachmentStream = null; try { attachmentStream = attachmentConnector.getAttachmentStream(attachmentContent); SpdxDocument doc = SPDXDocumentFactory.createSpdxDocument(attachmentStream, getUriOfAttachment(attachmentContent), FILETYPE_SPDX_INTERNAL); return Optional.of(doc); } catch (SW360Exception e) { log.error("Unable to get attachment stream for attachment=" + attachmentContent.getFilename() + " with id=" + attachmentContent.getId(), e); } catch (InvalidSPDXAnalysisException e) { log.error("Unable to parse SPDX for attachment=" + attachmentContent.getFilename() + " with id=" + attachmentContent.getId(), e); } catch (URISyntaxException e) { log.error("Invalid URI syntax for attachment=" + attachmentContent.getFilename() + " with id=" + attachmentContent.getId(), e); } finally { closeQuietly(attachmentStream, log); } return Optional.empty(); } }