/* * Copyright 2013 Research Studios Austria Forschungsgesellschaft mBH * * This file is part of easyrec. * * easyrec is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * easyrec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with easyrec. If not, see <http://www.gnu.org/licenses/>. */ package org.easyrec.plugin.profileduke; import no.priv.garshol.duke.Column; import no.priv.garshol.duke.Configuration; import no.priv.garshol.duke.Processor; import no.priv.garshol.duke.Property; import no.priv.garshol.duke.ConfigLoader; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.easyrec.model.core.ItemVO; import org.easyrec.plugin.model.Version; import org.easyrec.plugin.profileduke.duke.datasource.EasyrecXMLFormatDataSource; import org.easyrec.plugin.profileduke.duke.matchers.EasyrecProfileMatcher; import org.easyrec.plugin.support.GeneratorPluginSupport; import org.easyrec.service.core.ItemAssocService; import org.easyrec.service.core.ProfileService; import org.easyrec.service.domain.TypeMappingService; import org.easyrec.store.dao.core.ItemAssocDAO; import org.xml.sax.SAXException; import java.io.File; import java.io.IOException; import java.net.URI; import java.util.*; // TODO: Description /** * @author Soheil Khosravipour * @author Fabian Salcher */ public class ProfileDukeGenerator extends GeneratorPluginSupport<ProfileDukeConfiguration, ProfileDukeStats> { // ------------------------------ FIELDS ------------------------------ // the display name is the name of the generator that will show up in the admin tool when the plugin has been loaded. public static final String DISPLAY_NAME = "Profile Similarity Calculator (Duke)"; // version of the generator, should be ascending for each new release // you might increment the versioning components (major,minor,misc) like this: // major - complete reworks of your generator, major new features // minor - small feature improvements // misc - bugfix releases or anything else public static final Version VERSION = new Version("0.98"); // The URI that uniquely identifies the plugin. While any valid URI is technically ok here, implementors // should choose their URIs wisely, ideally the URI should be 'cool' // (@see <a href="http://www.dfki.uni-kl.de/~sauermann/2006/11/cooluris/#cooluris">Cool URIs for the // Semantic Web</a>) If unsure, use an all-lowercase http URI pointing to a host/path that you control, // ending with '#[plugin-name]'. public static final URI ID = URI.create("http://www.easyrec.org/plugins/profileSimilarity"); public static final Log logger = LogFactory.getLog(org.easyrec.plugin.profileduke.ProfileDukeGenerator.class); // the service will be auto-wired when the plugin is loaded, see {@link #setActionService(ActionService)}. private ProfileService profileService; private int numberOfAssociationsCreated = 0; private double lastThreshold = 0; private int tenantId; private int itemType; // --------------------------- CONSTRUCTORS --------------------------- public ProfileDukeGenerator() { // we need to call the constructor of GeneratorPluginSupport to provide the name, id and version //additionally, we have to pass the class objects of config and stats classes. super(DISPLAY_NAME, ID, VERSION, ProfileDukeConfiguration.class, ProfileDukeStats.class); } // --------------------- GETTER / SETTER METHODS --------------------- // this method will be called when the plugin is being loaded and Spring injects the service, you need to make sure // that everything after the "set" part of the method name is named exactly like the Spring-bean. // For all beans that can be injected look in the wiki. public void setProfileService(ProfileService profileService) { this.profileService = profileService; } public void setNumberOfAssociationsCreated(int numberOfAssociationsCreated) { this.numberOfAssociationsCreated = numberOfAssociationsCreated; } public int getItemType(){ return itemType; } public int getTenantId() { return tenantId; } // ------------------------ INTERFACE METHODS ------------------------ @Override public String getPluginDescription() { return "<p>Generator plugin that generates rules based on the item profiles.<p/>\n" + "<p>The record linkage engine Duke is used to calculate the similarities between the item profiles.</p>"; } // -------------------------- MAIN METHODS -------------------------- @Override protected void doCleanup() throws Exception { logger.info("The plugin is now being uninstalled."); // remove all tables/files/resources you created in {@link #doInitialize()}. // optional - you don't have to implement this method } @Override protected void doExecute(ExecutionControl executionControl, ProfileDukeStats stats) throws Exception { // when doExecute() is called, the generator has been initialized with the configuration we should use ProfileDukeConfiguration config = getConfiguration(); TypeMappingService typeMappingService = (TypeMappingService) super.getTypeMappingService(); int sourceType = typeMappingService.getIdOfSourceType(config.getTenantId(), this.getId().toString()); itemType = typeMappingService.getIdOfItemType(config.getTenantId(), config.getItemType()); tenantId = config.getTenantId(); ItemAssocDAO itemAssocDAO = getItemAssocDAO(); // the generator needs to check periodically if abort was requested and stop operation in a clean manner if (executionControl.isAbortRequested()) return; List<ItemVO<Integer, Integer>> itemList = profileService.getItemsByItemType( config.getTenantId(), config.getItemType(), 0); stats.setNumberOfItems(itemList.size()); logger.info("BlockMode: " + config.getBlockCalculationMode()); logger.info("BlockModeBlockSize: " + config.getBlockCalculationBlockSize()); logger.info("ItemListSize: " + itemList.size()); logger.info("Duke Configuration: \n" + config.getDukeConfiguration()); if ("true".equals(config.getBlockCalculationMode())) { LinkedList<ItemVO<Integer, Integer>> itemPot = new LinkedList<ItemVO<Integer, Integer>>(); for (ItemVO<Integer, Integer> item : itemList) { itemPot.add(item); } Random random = new Random(); int blockSize = config.getBlockCalculationBlockSize(); List<ItemVO<Integer, Integer>> itemTempList = new Vector<ItemVO<Integer, Integer>>(); while (itemPot.size() > 0) { itemTempList.add(itemPot.remove(random.nextInt(itemPot.size()))); if (itemPot.size() == 0 || (itemTempList.size() >= blockSize && itemPot.size() > ((float) blockSize * 0.1))) { executionControl.updateProgress(itemList.size() - itemPot.size(), itemList.size() * 2, "calculating item similarity"); if (!prepareAndStartDuke(itemTempList)) { executionControl.updateProgress(1, 1, "Due to an error execution has been aborted!"); return; } itemTempList.clear(); } } } else { executionControl.updateProgress(1, 2, "calculating item similarity"); if (!prepareAndStartDuke(itemList)) { executionControl.updateProgress(1, 1, "Due to an error execution has been aborted!"); return; } } stats.setNumberOfRulesCreated(numberOfAssociationsCreated); // delete associations below the threshold int associationType = typeMappingService.getIdOfAssocType(tenantId, config.getAssociationType()); itemAssocDAO.removeItemAssocByTenantAndThreshold(config.getTenantId(), associationType, sourceType, stats.getStartDate(), lastThreshold); } @Override protected void doInitialize() throws Exception { // This method will be run each time easyrec starts-up and you can do some preinitialization of your plugin here. logger.info("The plugin is now being initialized by easyrec."); // optional - you don't have to implement this method } @Override protected void doInstall() throws Exception { // This method will only be called once when the plugin is uploaded to easyrec. // You can set-up your database here or do some other run-once tasks. logger.info("The plugin is now being installed."); // optional - you don't have to implement this method } @Override protected void doUninstall() throws Exception { // This method will only be called once when the plugin is deleted from easyrec. // You need to remove all resources created by your plugin (including entries in easyrec database tables.) logger.info("The plugin is now being uninstalled."); // optional - you don't have to implement this method } /** * This procedure calculates loads the duke configuration, * reads the item profiles, starts duke to calculate the * similarities and writes them as associations in the DB. * * @param items a list of items which should contain all users of your tenant * @return <code>true</code> if the calculation succeeds and <code>false</code> if an * error occurs * @throws IOException */ private boolean prepareAndStartDuke(List<ItemVO<Integer, Integer>> items) throws IOException { ProfileDukeConfiguration config = getConfiguration(); String dukeConfigurationString = config.getDukeConfiguration(); TypeMappingService typeMappingService = (TypeMappingService) super.getTypeMappingService(); Integer associationType = typeMappingService.getIdOfAssocType( config.getTenantId(), config.getAssociationType()); Integer sourceType = typeMappingService.getIdOfSourceType( config.getTenantId(), this.getId().toString()); Integer viewType = typeMappingService.getIdOfViewType( config.getTenantId(), config.getViewType()); Configuration dukeConfig = null; try { dukeConfig = ConfigLoader.loadFromString(dukeConfigurationString); } catch (SAXException e) { logger.error("An error occurred while parsing Duke configuration: " + e.getMessage()); logger.debug(e.getStackTrace()); return false; } catch (IOException e) { logger.error("An error occurred while parsing Duke configuration: " + e.getMessage()); logger.debug(e.getStackTrace()); return false; } List<Property> propertyList = dukeConfig.getProperties(); propertyList.add(new Property("ID")); Property itemIDProperty = new Property("ItemID"); itemIDProperty.setIgnoreProperty(true); propertyList.add(itemIDProperty); dukeConfig.setProperties(propertyList); lastThreshold = dukeConfig.getThreshold(); //read properties List<Property> props = dukeConfig.getProperties(); EasyrecXMLFormatDataSource dataSource = new EasyrecXMLFormatDataSource(); dataSource.setItems(items); dataSource.setProfileService(profileService); for (Property prop : props) { if (prop.getName().equals("ID")) { dataSource.addColumn(new Column("?uri", "ID", null, null)); } else { dataSource.addColumn(new Column(prop.getName(), prop.getName(), null, null)); } } dataSource.setProps(props); dukeConfig.addDataSource(0, dataSource); Processor proc = new Processor(dukeConfig); EasyrecProfileMatcher easyrecProfileMatcher = new EasyrecProfileMatcher(false, true, this); ItemAssocService itemAssocService = getItemAssocService(); easyrecProfileMatcher.setItemAssocService(itemAssocService); easyrecProfileMatcher.setAssocType(associationType); easyrecProfileMatcher.setConfTanantId(config.getTenantId()); easyrecProfileMatcher.setSourceType(sourceType); easyrecProfileMatcher.setViewType(viewType); proc.addMatchListener(easyrecProfileMatcher); proc.deduplicate(); proc.close(); return true; } }