package org.molgenis.data.importer.emx; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.molgenis.data.*; import org.molgenis.data.i18n.model.I18nString; import org.molgenis.data.i18n.model.Language; import org.molgenis.data.importer.EntityImportReport; import org.molgenis.data.importer.ParsedMetaData; import org.molgenis.data.meta.AttributeType; import org.molgenis.data.meta.EntityTypeDependencyResolver; import org.molgenis.data.meta.model.*; import org.molgenis.data.meta.model.Package; import org.molgenis.data.support.QueryImpl; import org.molgenis.data.validation.ConstraintViolation; import org.molgenis.data.validation.MolgenisValidationException; import org.molgenis.security.core.MolgenisPermissionService; import org.molgenis.security.core.Permission; import org.molgenis.security.core.utils.SecurityUtils; import org.molgenis.security.permission.PermissionSystemService; import org.molgenis.util.HugeSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.security.core.context.SecurityContextHolder; import org.springframework.transaction.annotation.Transactional; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Lists.newArrayList; import static java.lang.String.format; import static java.util.Collections.emptyList; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.*; import static java.util.stream.StreamSupport.stream; import static org.molgenis.data.EntityManager.CreationMode.POPULATE; import static org.molgenis.data.i18n.model.I18nStringMetaData.I18N_STRING; import static org.molgenis.data.i18n.model.LanguageMetadata.LANGUAGE; import static org.molgenis.data.meta.model.EntityTypeMetadata.ENTITY_TYPE_META_DATA; import static org.molgenis.data.meta.model.Package.PACKAGE_SEPARATOR; import static org.molgenis.security.core.runas.RunAsSystemProxy.runAsSystem; /** * Writes the imported metadata and data to target {@link RepositoryCollection}. */ public class ImportWriter { private static final Logger LOG = LoggerFactory.getLogger(ImportWriter.class); private final DataService dataService; private final PermissionSystemService permissionSystemService; private final MolgenisPermissionService molgenisPermissionService; private final EntityManager entityManager; private final EntityTypeDependencyResolver entityTypeDependencyResolver; /** * Creates the ImportWriter * * @param dataService {@link DataService} to query existing repositories and transform entities * @param permissionSystemService {@link PermissionSystemService} to give permissions on uploaded entities * @param entityManager entity manager to create new entities * @param entityTypeDependencyResolver entity type dependency resolver */ public ImportWriter(DataService dataService, PermissionSystemService permissionSystemService, MolgenisPermissionService molgenisPermissionService, EntityManager entityManager, EntityTypeDependencyResolver entityTypeDependencyResolver) { this.dataService = requireNonNull(dataService); this.permissionSystemService = requireNonNull(permissionSystemService); this.molgenisPermissionService = requireNonNull(molgenisPermissionService); this.entityManager = requireNonNull(entityManager); this.entityTypeDependencyResolver = requireNonNull(entityTypeDependencyResolver); } @Transactional public EntityImportReport doImport(EmxImportJob job) { // languages first importLanguages(job.report, job.parsedMetaData.getLanguages(), job.dbAction, job.metaDataChanges); runAsSystem(() -> { importTags(job.parsedMetaData); importPackages(job.parsedMetaData); }); importEntityTypes(job.parsedMetaData.getEntities(), job.report); List<EntityType> resolvedEntityTypes = entityTypeDependencyResolver.resolve(job.parsedMetaData.getEntities()); importData(job.report, resolvedEntityTypes, job.source, job.dbAction, job.defaultPackage); importI18nStrings(job.report, job.parsedMetaData.getI18nStrings(), job.dbAction); return job.report; } private void importEntityTypes(ImmutableCollection<EntityType> entityTypes, EntityImportReport importReport) { GroupedEntityTypes groupedEntityTypes = groupEntityTypes(entityTypes); if (!SecurityUtils.currentUserIsSu()) { validateEntityTypePermissions(groupedEntityTypes.getUpdatedEntityTypes()); createEntityTypePermissions(groupedEntityTypes.getNewEntityTypes()); } upsertEntityTypes(groupedEntityTypes); groupedEntityTypes.getNewEntityTypes().stream().map(EntityType::getName).forEach(importReport::addNewEntity); } private void validateEntityTypePermissions(ImmutableCollection<EntityType> entityTypes) { entityTypes.forEach(this::validateEntityTypePermission); } private void validateEntityTypePermission(EntityType entityType) { String entityTypeName = entityType.getName(); if (!molgenisPermissionService.hasPermissionOnEntity(entityTypeName, Permission.READ)) { throw new MolgenisValidationException( new ConstraintViolation(format("Permission denied on existing entity type [%s]", entityTypeName))); } } private void createEntityTypePermissions(ImmutableCollection<EntityType> entityTypes) { List<String> entityTypeNames = entityTypes.stream().map(EntityType::getName).collect(toList()); permissionSystemService.giveUserEntityPermissions(SecurityContextHolder.getContext(), entityTypeNames); } private GroupedEntityTypes groupEntityTypes(ImmutableCollection<EntityType> entities) { return runAsSystem(() -> { Map<String, EntityType> existingEntityTypeMap = dataService .findAll(EntityTypeMetadata.ENTITY_TYPE_META_DATA, entities.stream().map(EntityType::getName), new Fetch().field(EntityTypeMetadata.FULL_NAME), EntityType.class) .collect(toMap(EntityType::getName, Function.identity())); ImmutableCollection<EntityType> newEntityTypes = entities.stream() .filter(entityType -> !existingEntityTypeMap.containsKey(entityType.getName())) .collect(collectingAndThen(toList(), ImmutableList::copyOf)); ImmutableCollection<EntityType> existingEntityTypes = entities.stream() .filter(entityType -> existingEntityTypeMap.containsKey(entityType.getName())) .collect(collectingAndThen(toList(), ImmutableList::copyOf)); return new GroupedEntityTypes(newEntityTypes, existingEntityTypes); }); } private static class GroupedEntityTypes { private final ImmutableCollection<EntityType> newEntityTypes; private final ImmutableCollection<EntityType> updatedEntityTypes; public GroupedEntityTypes(ImmutableCollection<EntityType> newEntityTypes, ImmutableCollection<EntityType> updatedEntityTypes) { this.newEntityTypes = requireNonNull(newEntityTypes); this.updatedEntityTypes = requireNonNull(updatedEntityTypes); } public ImmutableCollection<EntityType> getNewEntityTypes() { return newEntityTypes; } public ImmutableCollection<EntityType> getUpdatedEntityTypes() { return updatedEntityTypes; } } private void importLanguages(EntityImportReport report, Map<String, Language> languages, DatabaseAction dbAction, MetaDataChanges metaDataChanges) { if (!languages.isEmpty()) { Repository<Language> repo = dataService.getRepository(LANGUAGE, Language.class); // Find new ones languages.values().stream().map(Entity::getIdValue).forEach(id -> { if (repo.findOneById(id) == null) { metaDataChanges.addLanguage(languages.get(id)); } }); int count = update(repo, languages.values(), dbAction); report.addEntityCount(LANGUAGE, count); } } private void importI18nStrings(EntityImportReport report, Map<String, I18nString> i18nStrings, DatabaseAction dbAction) { if (!i18nStrings.isEmpty()) { Repository<I18nString> repo = dataService.getRepository(I18N_STRING, I18nString.class); int count = update(repo, i18nStrings.values(), dbAction); report.addEntityCount(I18N_STRING, count); } } /** * Imports entity data for all entities in resolved from source */ private void importData(EntityImportReport report, Iterable<EntityType> resolved, RepositoryCollection source, DatabaseAction dbAction, String defaultPackage) { for (final EntityType entityType : resolved) { String name = entityType.getName(); // Languages and i18nstrings are already done if (!name.equalsIgnoreCase(LANGUAGE) && !name.equalsIgnoreCase(I18N_STRING) && dataService .hasRepository(name)) { Repository<Entity> repository = dataService.getRepository(name); Repository<Entity> emxEntityRepo = source.getRepository(entityType.getName()); // Try without default package if ((emxEntityRepo == null) && (defaultPackage != null) && entityType.getName().toLowerCase() .startsWith(defaultPackage.toLowerCase() + PACKAGE_SEPARATOR)) { emxEntityRepo = source.getRepository(entityType.getName().substring(defaultPackage.length() + 1)); } // check to prevent nullpointer when importing metadata only if (emxEntityRepo != null) { // transforms entities so that they match the entity meta data of the output repository Iterable<Entity> entities = Iterables .transform(emxEntityRepo, emxEntity -> toEntity(entityType, emxEntity)); int count = update(repository, entities, dbAction); report.addEntityCount(name, count); } } } } /** * Create an entity from the EMX entity * * @param entityType entity meta data * @param emxEntity EMX entity * @return MOLGENIS entity */ private Entity toEntity(EntityType entityType, Entity emxEntity) { Entity entity = entityManager.create(entityType, POPULATE); for (Attribute attr : entityType.getAtomicAttributes()) { if (attr.getExpression() == null && !attr.isMappedBy()) { String attrName = attr.getName(); Object emxValue = emxEntity.get(attrName); AttributeType attrType = attr.getDataType(); switch (attrType) { case BOOL: case DATE: case DATE_TIME: case DECIMAL: case EMAIL: case ENUM: case HTML: case HYPERLINK: case INT: case LONG: case SCRIPT: case STRING: case TEXT: Object value = emxValue != null ? DataConverter.convert(emxValue, attr) : null; if ((!attr.isAuto() || value != null) && (!attr.hasDefaultValue() || value != null)) { entity.set(attrName, value); } break; case CATEGORICAL: case FILE: case XREF: // DataConverter.convert performs no conversion for reference types Entity refEntity; if (emxValue != null) { if (emxValue instanceof Entity) { refEntity = toEntity(attr.getRefEntity(), (Entity) emxValue); } else { EntityType xrefEntity = attr.getRefEntity(); Object entityId = DataConverter.convert(emxValue, xrefEntity.getIdAttribute()); refEntity = entityManager.getReference(xrefEntity, entityId); } } else { refEntity = null; } // do not set generated auto refEntities to null if ((!attr.isAuto() || refEntity != null) && (!attr.hasDefaultValue() || refEntity != null)) { entity.set(attrName, refEntity); } break; case CATEGORICAL_MREF: case MREF: List<Entity> refEntities; // DataConverter.convert performs no conversion for reference types if (emxValue != null) { if (emxValue instanceof Iterable<?>) { List<Entity> mrefEntities = new ArrayList<>(); for (Object emxValueItem : (Iterable<?>) emxValue) { Entity entityValue; if (emxValueItem instanceof Entity) { entityValue = toEntity(attr.getRefEntity(), (Entity) emxValueItem); } else { EntityType xrefEntity = attr.getRefEntity(); Object entityId = DataConverter .convert(emxValueItem, xrefEntity.getIdAttribute()); entityValue = entityManager.getReference(xrefEntity, entityId); } mrefEntities.add(entityValue); } refEntities = mrefEntities; } else { EntityType mrefEntity = attr.getRefEntity(); Attribute refIdAttr = mrefEntity.getIdAttribute(); String[] tokens = StringUtils.split(emxValue.toString(), ','); List<Entity> mrefEntities = new ArrayList<>(); for (String token : tokens) { Object entityId = DataConverter.convert(token.trim(), refIdAttr); mrefEntities.add(entityManager.getReference(mrefEntity, entityId)); } refEntities = mrefEntities; } } else { refEntities = emptyList(); } // do not set generated auto refEntities to null if ((!attr.isAuto() || !refEntities.isEmpty()) && (!attr.hasDefaultValue() || refEntities .isEmpty())) { entity.set(attrName, refEntities); } break; case COMPOUND: throw new RuntimeException(format("Illegal attribute type [%s]", attrType.toString())); default: throw new RuntimeException(format("Unknown attribute type [%s]", attrType.toString())); } } } return entity; } private void upsertEntityTypes(GroupedEntityTypes groupedEntityTypes) { // retrieve existing entity types Fetch entityTypeFetch = createEntityTypeWithAttributesFetch(); ImmutableCollection<EntityType> updatedEntityTypes = groupedEntityTypes.getUpdatedEntityTypes(); Map<String, EntityType> existingEntityTypeMap = dataService .findAll(ENTITY_TYPE_META_DATA, updatedEntityTypes.stream().map(EntityType::getName), entityTypeFetch, EntityType.class).collect(toMap(EntityType::getName, Function.identity())); // inject attribute identifiers in entity types to import updatedEntityTypes.forEach(entityType -> { EntityType existingEntityType = existingEntityTypeMap.get(entityType.getName()); entityType.getOwnAllAttributes().forEach(ownAttr -> { Attribute existingAttr = existingEntityType.getAttribute(ownAttr.getName()); if (existingAttr != null) { ownAttr.setIdentifier(existingAttr.getIdentifier()); } }); }); // add or update entity types List<EntityType> entityTypes = newArrayList(concat(updatedEntityTypes, groupedEntityTypes.getNewEntityTypes())); runAsSystem(() -> dataService.getMeta().upsertEntityTypes(entityTypes)); } private static Fetch createEntityTypeWithAttributesFetch() { return new Fetch().field(EntityTypeMetadata.FULL_NAME).field(EntityTypeMetadata.ATTRIBUTES, new Fetch().field(AttributeMetadata.ID).field(AttributeMetadata.NAME)); } /** * Adds the packages from the packages sheet to the {@link org.molgenis.data.meta.MetaDataService}. */ private void importPackages(ParsedMetaData parsedMetaData) { ImmutableCollection<Package> packages = parsedMetaData.getPackages().values(); dataService.getMeta().upsertPackages(packages.stream().filter(Objects::nonNull)); } /** * Imports the tags from the tag sheet. */ // FIXME: can everybody always update a tag? private void importTags(ParsedMetaData parsedMetaData) { ImmutableCollection<Tag> tags = parsedMetaData.getTags().values(); dataService.getMeta().upsertTags(tags); } /** * Updates a repository with entities. * * @param repo the {@link Repository} to update * @param entities the entities to * @param dbAction {@link DatabaseAction} describing how to merge the existing entities * @return number of updated entities */ private <E extends Entity> int update(Repository<E> repo, Iterable<E> entities, DatabaseAction dbAction) { if (entities == null) return 0; if (!molgenisPermissionService.hasPermissionOnEntity(repo.getName(), Permission.WRITE)) { throw new MolgenisDataAccessException("No WRITE permission on entity '" + repo.getName() + "'. Is this entity already imported by another user who did not grant you WRITE permission?"); } int count = 0; switch (dbAction) { case ADD: count = repo.add(stream(entities.spliterator(), false)); break; case ADD_IGNORE_EXISTING: { HugeSet<Object> existingIds = getExistingEntityIds(repo, entities); try { String idAttributeName = repo.getEntityType().getIdAttribute().getName(); int batchSize = 1000; List<E> newEntities = newArrayList(); Iterator<E> it = entities.iterator(); while (it.hasNext()) { E entity = it.next(); count++; Object id = entity.get(idAttributeName); if (!existingIds.contains(id)) { newEntities.add(entity); if (newEntities.size() == batchSize) { repo.add(newEntities.stream()); newEntities.clear(); } } } if (!newEntities.isEmpty()) { repo.add(newEntities.stream()); } } finally { IOUtils.closeQuietly(existingIds); } break; } case ADD_UPDATE_EXISTING: { HugeSet<Object> existingIds = getExistingEntityIds(repo, entities); try { String idAttributeName = repo.getEntityType().getIdAttribute().getName(); int batchSize = 1000; List<E> existingEntities = new ArrayList<>(batchSize); List<Integer> existingEntitiesRowIndex = new ArrayList<>(batchSize); List<E> newEntities = new ArrayList<>(batchSize); List<Integer> newEntitiesRowIndex = new ArrayList<>(batchSize); Iterator<E> it = entities.iterator(); while (it.hasNext()) { E entity = it.next(); count++; Object id = entity.get(idAttributeName); if (existingIds.contains(id)) { existingEntitiesRowIndex.add(count); existingEntities.add(entity); if (existingEntities.size() == batchSize) { updateInRepo(repo, existingEntities, existingEntitiesRowIndex); } } else { newEntitiesRowIndex.add(count); newEntities.add(entity); if (newEntities.size() == batchSize) { insertIntoRepo(repo, newEntities, newEntitiesRowIndex); } } } if (!existingEntities.isEmpty()) { updateInRepo(repo, existingEntities, existingEntitiesRowIndex); } if (!newEntities.isEmpty()) { insertIntoRepo(repo, newEntities, newEntitiesRowIndex); } } finally { IOUtils.closeQuietly(existingIds); } break; } case UPDATE: AtomicInteger atomicCount = new AtomicInteger(0); repo.update(stream(entities.spliterator(), false).filter(entity -> { atomicCount.incrementAndGet(); return true; })); count = atomicCount.get(); break; default: throw new RuntimeException(format("Unknown database action [%s]", dbAction.toString())); } return count; } private static <E extends Entity> HugeSet<Object> getExistingEntityIds(Repository<E> repo, Iterable<E> entities) { String idAttributeName = repo.getEntityType().getIdAttribute().getName(); HugeSet<Object> ids = new HugeSet<>(); HugeSet<Object> existingIds = new HugeSet<>(); try { for (Entity entity : entities) { Object id = entity.get(idAttributeName); if (id != null) { ids.add(id); } } if (!ids.isEmpty()) { // Check if the ids already exist if (repo.count() > 0) { int batchSize = 100; Query<E> q = new QueryImpl<>(); Iterator<Object> it = ids.iterator(); int batchCount = 0; while (it.hasNext()) { Object id = it.next(); q.eq(idAttributeName, id); batchCount++; if (batchCount == batchSize || !it.hasNext()) { repo.findAll(q).forEach(existing -> existingIds.add(existing.getIdValue())); q = new QueryImpl<>(); batchCount = 0; } else { q.or(); } } } } } catch (RuntimeException e) { IOUtils.closeQuietly(existingIds); throw e; } finally { IOUtils.closeQuietly(ids); } return existingIds; } private <E extends Entity> void updateInRepo(Repository<E> repo, List<E> existingEntities, List<Integer> existingEntitiesRowIndex) { try { repo.update(existingEntities.stream()); } catch (MolgenisValidationException mve) { mve.renumberViolationRowIndices(existingEntitiesRowIndex); throw mve; } existingEntities.clear(); existingEntitiesRowIndex.clear(); } private <E extends Entity> void insertIntoRepo(Repository<E> repo, List<E> newEntities, List<Integer> newEntitiesRowIndex) { try { repo.add(newEntities.stream()); } catch (MolgenisValidationException mve) { mve.renumberViolationRowIndices(newEntitiesRowIndex); throw mve; } newEntities.clear(); newEntitiesRowIndex.clear(); } }