package com.constellio.sdk; import com.constellio.app.modules.rm.services.RMSchemasRecordsServices; import com.constellio.app.modules.rm.wrappers.Document; import com.constellio.app.services.factories.AppLayerFactory; import com.constellio.app.ui.entities.ContentVersionVO; import com.constellio.app.ui.framework.builders.ContentVersionToVOBuilder; import com.constellio.app.ui.framework.components.converters.RecordIdToCaptionConverter; import com.constellio.model.entities.records.Content; import com.constellio.model.entities.records.ContentVersion; import com.constellio.model.entities.records.Record; import com.constellio.model.entities.records.wrappers.User; import com.constellio.model.entities.schemas.MetadataSchemaType; import com.constellio.model.entities.schemas.MetadataSchemaTypes; import com.constellio.model.entities.schemas.MetadataSchemasRuntimeException; import com.constellio.model.services.factories.ModelLayerFactory; import com.constellio.model.services.records.extractions.RecordPopulateServices; import com.constellio.model.services.schemas.MetadataSchemasManager; import com.constellio.model.services.search.SPEQueryResponse; import com.constellio.model.services.search.SearchServices; import com.constellio.model.services.search.query.logical.LogicalSearchQuery; import com.constellio.model.services.users.UserServices; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.util.*; import static com.constellio.model.services.search.query.logical.LogicalSearchQueryOperators.from; /** * Created by Majid on 2016-07-07. */ public class ExportToJSonFiles { final int BATCH_SIZE = 1000; private final AppLayerFactory appLayerFactory; private final ModelLayerFactory modelLayerFactory; private final UserServices userServices; private final RecordIdToCaptionConverter recordCaptionConverter = new RecordIdToCaptionConverter();; private RMSchemasRecordsServices rmSchemasRecordsServices ; private final Gson gson; private final File contentDir; //stats private int nullContentDocCnt; private int multiVersionDocs; private int totalDocs; private int fileNotFound; private List<Map<String, Object>> docsMetadatas = new ArrayList<>(); private static Logger LOGGER = LoggerFactory.getLogger(ExportToJSonFiles.class); public ExportToJSonFiles(File contentDir) { RecordPopulateServices.LOG_CONTENT_MISSING = true; appLayerFactory = SDKScriptUtils.startApplicationWithBatchProcesses(); modelLayerFactory = appLayerFactory.getModelLayerFactory(); userServices = new UserServices(appLayerFactory.getModelLayerFactory()); gson = new GsonBuilder().setPrettyPrinting().create(); this.contentDir = contentDir; } public void exportTo(File outputDir, boolean toMove) throws FileNotFoundException { if (!outputDir.exists()) outputDir.mkdir(); nullContentDocCnt = 0; multiVersionDocs = 0; totalDocs = 0; fileNotFound = 0; docsMetadatas.clear(); for (String collection: modelLayerFactory.getCollectionsListManager().getCollections()) { System.out.println(String.format("Exporting document of the collection '%s'", collection)); final List<User> allUsersInCollection = userServices.getAllUsersInCollection(collection); if (allUsersInCollection.size() == 0) { System.out.println(String.format("Ignore the collection '%s'. No user has been found in the collection.", collection)); continue; } rmSchemasRecordsServices = new RMSchemasRecordsServices(collection, appLayerFactory); MetadataSchemaType documentSchemaType; try { documentSchemaType = rmSchemasRecordsServices.documentSchemaType(); } catch (MetadataSchemasRuntimeException e) { System.out.println(String.format("Ignore the collection '%s'. The Document schema has not been found.", collection)); continue; } final SearchServices searchServices = modelLayerFactory.newSearchServices(); MetadataSchemasManager metadataSchemasManager = modelLayerFactory.getMetadataSchemasManager(); MetadataSchemaTypes types = metadataSchemasManager.getSchemaTypes(collection); // final SessionContext sessionContext = FakeSessionContext.forRealUserIncollection(allUsersInCollection.get(0)); LogicalSearchQuery query = new LogicalSearchQuery(from(documentSchemaType).returnAll()); exportDocuments(outputDir, searchServices, types, query); } System.out.println(String.format("Total Documents=%d\tDocuments with multiple versions=%d" + "\tDocument with null content=%d\tFile not found=%d", totalDocs, multiVersionDocs, nullContentDocCnt, fileNotFound)); File multiVersionDocsInfoFile = new File(outputDir, "multiVersionDocs.json"); PrintStream multiVersionDocsInfoStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(multiVersionDocsInfoFile))); gson.toJson(docsMetadatas, multiVersionDocsInfoStream); multiVersionDocsInfoStream.close(); File statFile = new File(outputDir, "stat.json"); Map<String, Integer> stat = new TreeMap<>(); stat.put("Total docs", totalDocs); stat.put("Multi version docs", multiVersionDocs); stat.put("Null content docs", nullContentDocCnt); stat.put("Not found docs", fileNotFound); PrintStream statStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(statFile))); gson.toJson(stat, statStream); statStream.close(); } protected void exportDocuments(final File outputDir, final SearchServices searchServices, final MetadataSchemaTypes types, final LogicalSearchQuery query) throws FileNotFoundException { // final DocumentToVOBuilder builder = new DocumentToVOBuilder(appLayerFactory.getModelLayerFactory()); query.setStartRow(0); query.setNumberOfRows(0); final SPEQueryResponse response = searchServices.query(query); final long numFound = response.getNumFound(); query.setNumberOfRows(BATCH_SIZE); for (int start = 0; start < numFound; start += BATCH_SIZE) { System.out.println(String.format("%d / %d", start, numFound)); query.setStartRow(start); final List<Record> documents = searchServices.search(query); int idx = totalDocs + start; for (Record record : documents) { // final DocumentVO documentVO = builder.build(record, RecordVO.VIEW_MODE.DISPLAY, sessionContext); File documentDir = new File(outputDir, "" + idx); documentDir.mkdir(); final Map<String, Object> docMetadata = extractMetatdatas(types, record, documentDir); if (docMetadata != null) { docMetadata.put("dir", "" + idx); File outputFile = new File(documentDir, "metadata.json"); PrintStream output = new PrintStream(new BufferedOutputStream(new FileOutputStream(outputFile))); gson.toJson(docMetadata, output); output.close(); if (((List<Map<String, String>>) docMetadata.get("versions")).size() > 1) docsMetadatas.add(docMetadata); ++idx; } else { documentDir.delete(); } } totalDocs = idx; } } private Map<String, Object> extractMetatdatas(MetadataSchemaTypes types, Record record, File documentDir) { Document document = new Document(record, types); Content content = document.getContent(); if (content != null) { Map<String, Object> documentMetadatas = extractMetadatas(document, documentDir); return documentMetadatas; } else { String type = "null"; if (document.getType() != null) type = rmSchemasRecordsServices.getDocumentType(document.getType()).getTitle(); ++nullContentDocCnt; System.err.println(String.format("%d- Document with no content! Document type = '%s'", nullContentDocCnt, type)); return null; } } private Map<String, Object> extractMetadatas(Document document, File documentDir) { Map<String, Object> documentMetadatas = new TreeMap<>(); documentMetadatas.put("id", document.getId()); documentMetadatas.put("title", document.getTitle()); documentMetadatas.put("author", document.getAuthor()); final String folderID = document.getFolder(); documentMetadatas.put("folder", rmSchemasRecordsServices.getFolder(folderID).getTitle()); documentMetadatas.put("folderId", document.getFolder()); List<Map<String, String>> versions = new ArrayList<>(); int versionIdx = 0; versions.add(getContentMetadata(document.getContent(), document.getContent().getCurrentVersion(), new File(documentDir, "" + versionIdx), versionIdx)); for (ContentVersion version : document.getContent().getHistoryVersions()) { versionIdx++; versions.add(getContentMetadata(document.getContent(), version, new File(documentDir, "" + versionIdx), versionIdx)); } documentMetadatas.put("versions", versions); if (versions.size() > 1) multiVersionDocs++; return documentMetadatas; } private Map<String, String> getContentMetadata(Content content, ContentVersion contentVersion, File versionFile, int versionIdx){ ContentVersionToVOBuilder builder = new ContentVersionToVOBuilder(modelLayerFactory); ContentVersionVO contentVersionVO = builder.build(content, contentVersion); Map<String, String> contentMetadata = new TreeMap<>(); contentMetadata.put("fileIndex", "" + versionIdx); contentMetadata.put("fileName", contentVersionVO.getFileName()); contentMetadata.put("mimeType", contentVersionVO.getMimeType()); contentMetadata.put("id", contentVersionVO.getHash()); contentMetadata.put("modificationBy", contentVersionVO.getLastModifiedBy()); contentMetadata.put("modificationByCaption", recordCaptionConverter.convertToPresentation( contentVersionVO.getLastModifiedBy(), String.class, Locale.ENGLISH)); contentMetadata.put("lastModificationDate", contentVersionVO.getLastModificationDateTime().toString()); archive(versionFile, contentVersionVO); return contentMetadata; } private void archive(File versionFile, ContentVersionVO contentVersionVO) { final File toMove = getFileOf(contentVersionVO.getHash()); if (toMove.exists()) { toMove.renameTo(versionFile); for (String ext: new String[]{"__parsed", ".preview"}){ new File(toMove.getAbsolutePath() + ext).renameTo(new File(versionFile.getAbsolutePath() + ext)); } } else { ++fileNotFound; System.err.println(String.format("%d- File <%s> is not found.", fileNotFound, toMove.getAbsolutePath())); } } private File getFileOf(String contentId) { if (contentId.contains("/")) { return new File(contentDir, contentId.replace("/", File.separator)); } else { String folderName = contentId.substring(0, 2); File folder = new File(contentDir, folderName); return new File(folder, contentId); } } public static void main(String[] args) throws FileNotFoundException { final File zeFolder = new File("/Volumes/encrypted/constellio/zeFolder"); final File contentDir = new File(zeFolder, "contents"); final File outputDir = new File(zeFolder, "json"); final ExportToJSonFiles exportToJSonFiles = new ExportToJSonFiles(contentDir); exportToJSonFiles.exportTo(outputDir, true); } }