/* * Copyright (C) 2000 - 2011 Silverpeas * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * As a special exception to the terms and conditions of version 3.0 of * the GPL, you may redistribute this Program in connection withWriter Free/Libre * Open Source Software ("FLOSS") applications as described in Silverpeas's * FLOSS exception. You should have recieved a copy of the text describing * the FLOSS exception, and it is also available here: * "http://www.silverpeas.org/legal/licensing" * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.silverpeas.migration.contentmanagement; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.MessageFormat; import java.util.ArrayList; import java.util.List; import org.silverpeas.dbbuilder.Console; import org.silverpeas.dbbuilder.dbbuilder_dl.DbBuilderDynamicPart; /** * DB migration to remove any duplicate content instances in the database underlying at Silverpeas. * The table sb_contentmanager_content table can contain duplicate rows with the same * contentInstanceId and internalContentId columns value but with a different silverContentId column * value. This means some contents in Silverpeas are linked with more than one content object and * this shouldn't occur. */ public class DuplicateContentRemoving extends DbBuilderDynamicPart { /** * SQL statement for querying the count of duplicated contents that are classified on the PdC. * Among the classified contents, one or more redundant instances of theses contents can be not * classified on the PdC. The Silverpeas contents are persisted into the sb_contentmanager_content * table. A content belongs to a Silverpeas component instance that is refered by the column * contentInstanceId in the sb_contentmanager_content table. The classification of the Silverpeas * contents are persisted into the sb_classifyengine_classify table. */ private static final String DUPLICATE_CLASSIFIED_CONTENT_COUNT_QUERY = "select count(distinct c1.internalContentId) from sb_contentmanager_content c1 where " + "1 < (select count(c2.internalContentId) from sb_contentmanager_content c2 where " + "c2.contentInstanceId=c1.contentInstanceId and c2.internalContentId = c1.internalContentId) " + "and c1.silverContentId in (select objectid from sb_classifyengine_classify)"; /** * SQL statement for querying the count of all duplicated contents. A content can be made up of * two or more redundant instances. The Silverpeas contents are persisted into the * sb_contentmanager_content table. A content belongs to a Silverpeas component instance that is * refered by the column contentInstanceId in the sb_contentmanager_content table. */ private static final String DUPLICATE_CONTENT_COUNT_QUERY = "select count(distinct c1.internalContentId) from sb_contentmanager_content c1 where " + "1 < (select count(c2.internalContentId) from sb_contentmanager_content c2 where " + "c2.contentInstanceId=c1.contentInstanceId and c2.internalContentId = c1.internalContentId) "; /** * SQL statement for querying the redundant instances of duplicated contents. A content can be * made up of two or more redundant instances. The Silverpeas contents are persisted into the * sb_contentmanager_content table. A content belongs to a Silverpeas component instance that is * refered by the column contentInstanceId in the sb_contentmanager_content table. This statement * is used for querying redundant instances that weren't deleted by the two below SQL statements. */ private static final String REDUNDANT_INSTANCE_OF_DUPLICATE_CONTENT_QUERY = "select c1.silverContentId, c1.internalContentId, c1.contentInstanceId from sb_contentmanager_content c1 where " + "1 < (select count(c2.internalContentId) from sb_contentmanager_content c2 where " + "c2.contentInstanceId=c1.contentInstanceId and c2.internalContentId = c1.internalContentId) " + "and c1.silverContentId < (select max(c3.silverContentId) from sb_contentmanager_content c3 where c1.contentInstanceId=c3.contentInstanceId and c1.internalContentId=c3.internalContentId and c1.silverContentId != c3.silverContentId)"; /** * SQL statement for querying in the sb_contentmanager_content table all the unclassified * redundant instances of duplicate contents. Theses contents can have one instance that is * classified on the PdC; theses aren't fetched. Only the instance of duplicate content with the * higher silver content identifier (thus the more recent silver object registered) are kept as * the single valid content instance. Theses queried contents is for their deletion. */ private static final String UNCLASSIFIED_REDUNDANT_CONTENT_INSTANCES_TO_DELETE = "select c1.silverContentId from " + "sb_contentmanager_content c1 where " + "1 < (select count(c2.internalContentId) from sb_contentmanager_content c2 where c2.contentInstanceId=c1.contentInstanceId and c2.internalContentId = c1.internalContentId) " + "and c1.silverContentId not in (select objectid from sb_classifyengine_classify)" + "and c1.silverContentId < (select max(c3.silverContentId) from sb_contentmanager_content c3 where c1.contentInstanceId=c3.contentInstanceId and c1.internalContentId=c3.internalContentId)"; /** * SQL statement for querying in the sb_contentmanager_content table the unclassified redundant * instances of the duplicate contents that were not taken into account by the previous request. * Theses one are classified redundant instances with a lower silver content identifier. Theses * queried contents is for their deletion. */ private static final String UNCLASSIFIED_REDUNDANT_CONTENT_INSTANCE_WITH_HIGHER_ID_TO_DELETE = "select c1.silverContentId from " + "sb_contentmanager_content c1 where " + "1 < (select count(c2.internalContentId) from sb_contentmanager_content c2 where c2.contentInstanceId=c1.contentInstanceId and c2.internalContentId = c1.internalContentId) " + "and c1.silverContentId not in (select objectid from sb_classifyengine_classify)"; /** * SQL statement for deleting explicitly some given contents in the sb_contentmanager_content * table. */ private static final String CONTENT_INSTANCE_DELETION = "delete from " + "sb_contentmanager_content where silverContentId in ({0})"; /** * SQL statement for deleting explicitly the classification of some given contents in the * sb_classifyengine_classify table. This statement will be use in the exceptional case where two * instances of a duplicate content are classified; in this case, the redundant instance isn't * deleted by the above statement and it is then necessary to delete its classification before * deleting it. */ private static final String CONTENT_INSTANCE_CLASSIFICATION_DELETION = "delete from " + "sb_classifyengine_classify where objectId in ({0})"; /** * Migrates the sb_contentmanager_content table by removing all duplicated Silverpeas contents * @throws Exception if an error occurs while migrating the sb_contentmanager_content table. */ public void migrate() throws Exception { Console console = getConsole(); if (console == null) { console = new Console(); } Connection connection = getConnection(); boolean autocommit = connection.getAutoCommit(); if (autocommit) { connection.setAutoCommit(false); } int duplicateContentCount = executeQuery(DUPLICATE_CONTENT_COUNT_QUERY).get(0); String duplicateContents = "Number of duplicate content: " + duplicateContentCount; console.printMessageln(duplicateContents); System.out.println(); System.out.println(duplicateContents); int classifiedContents = executeQuery(DUPLICATE_CLASSIFIED_CONTENT_COUNT_QUERY).get(0); console.printMessageln("Number of duplicate content that are classified on the PdC: " + classifiedContents); console.printMessageln( "Delete the unclassified redundant instances of duplicate contents"); List<Integer> contentsToDelete = executeQuery(UNCLASSIFIED_REDUNDANT_CONTENT_INSTANCES_TO_DELETE); int deletedContents1 = executeDeletion(CONTENT_INSTANCE_DELETION, contentsToDelete); assertEquals(contentsToDelete.size(), deletedContents1); console.printMessageln("-> number of redundant instances deleted: " + deletedContents1); console.printMessageln( "Delete the rest of unclassified redundant instances of duplicate contents"); contentsToDelete = executeQuery(UNCLASSIFIED_REDUNDANT_CONTENT_INSTANCE_WITH_HIGHER_ID_TO_DELETE); int deletedContents2 = executeDeletion(CONTENT_INSTANCE_DELETION, contentsToDelete); assertEquals(contentsToDelete.size(), deletedContents2); console.printMessageln("-> number of redundant instances deleted: " + deletedContents2); console.printMessageln( "Delete the exceptional redundant instances of duplicate classified content"); int deletedContents3 = deleteRedundantClassifiedInstances(); console.printMessageln("-> number of redundant instances deleted: " + deletedContents3); String deletedContents = "Total number of deleted redundant instances: " + (deletedContents1 + deletedContents2 + deletedContents3); console.printMessageln(deletedContents); System.out.println(); System.out.println(deletedContents); connection.commit(); connection.setAutoCommit(autocommit); } // private int executeQuery(String query) throws SQLException { // Connection connection = getConnection(); // Statement statement = connection.createStatement(); // ResultSet resultSet = statement.executeQuery(query); // resultSet.next(); // return resultSet.getInt(1); // } private List<Integer> executeQuery(String query) throws SQLException { List<Integer> result = new ArrayList<Integer>(); Connection connection = getConnection(); Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(query); while (resultSet.next()) { result.add(resultSet.getInt(1)); } return result; } private int executeDeletion(String query, final List<Integer> objectsToDelete) throws SQLException { if (objectsToDelete.isEmpty()) { return 0; } Connection connection = getConnection(); Statement statement = connection.createStatement(); StringBuilder parameterBuilder = new StringBuilder(); for (Integer anObjectToDelete : objectsToDelete) { parameterBuilder.append(anObjectToDelete).append(','); } String sqlRequest = MessageFormat.format(query, parameterBuilder.toString().substring(0, parameterBuilder.length() - 1)); return statement.executeUpdate(sqlRequest); } private int deleteRedundantClassifiedInstances() throws SQLException { Connection connection = getConnection(); Statement statement = connection.createStatement(); ResultSet rs = statement.executeQuery(REDUNDANT_INSTANCE_OF_DUPLICATE_CONTENT_QUERY); // As it should have only a few (or no) results from the query above, we can execute the // deletion // for each of the retrieved result. List<Integer> contentsToDelete = new ArrayList<Integer>(); while (rs.next()) { int silverContentId = rs.getInt("silverContentId"); contentsToDelete.add(silverContentId); } executeDeletion(CONTENT_INSTANCE_CLASSIFICATION_DELETION, contentsToDelete); int deletedCount = executeDeletion(CONTENT_INSTANCE_DELETION, contentsToDelete); assertEquals(contentsToDelete.size(), deletedCount); return deletedCount; } private static void assertEquals(int expected, int actual) { if (expected != actual) { throw new AssertionError("Expected deletion: " + expected + ", actual deletion: " + actual); } } }