/*
* version 1.0 - MUSES prototype software
* Copyright MUSES project (European Commission FP7) - 2013
*
*/
package eu.musesproject.server.dataminer;
/*
* #%L
* MUSES Server
* %%
* Copyright (C) 2013 - 2014 UGR
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Random;
import java.sql.*;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.math.BigInteger;
import eu.musesproject.server.scheduler.ModuleType;
import org.apache.log4j.Logger;
import weka.associations.Apriori;
import weka.attributeSelection.AttributeSelection;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Utils;
import weka.core.converters.ArffSaver;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NumericToNominal;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.attributeSelection.CfsSubsetEval;
import weka.attributeSelection.GreedyStepwise;
import weka.classifiers.rules.JRip;
import weka.classifiers.rules.PART;
import weka.classifiers.trees.*;
import weka.classifiers.evaluation.Evaluation;
import eu.musesproject.server.db.handler.DBManager;
import eu.musesproject.server.entity.AccessRequest;
import eu.musesproject.server.entity.Applications;
import eu.musesproject.server.entity.Assets;
import eu.musesproject.server.entity.Decision;
import eu.musesproject.server.entity.DecisionTrustvalues;
import eu.musesproject.server.entity.Devices;
import eu.musesproject.server.entity.EventType;
import eu.musesproject.server.entity.PatternsKrs;
import eu.musesproject.server.entity.Roles;
import eu.musesproject.server.entity.SecurityRules;
import eu.musesproject.server.entity.SecurityViolation;
import eu.musesproject.server.entity.SimpleEvents;
import eu.musesproject.server.entity.SystemLogKrs;
import eu.musesproject.server.entity.Users;
import eu.musesproject.server.eventprocessor.impl.EventProcessorImpl;
import eu.musesproject.server.eventprocessor.util.Constants;
/**
* The Class DataMiner.
*
* @author Sergio Zamarripa (S2) & Paloma de las Cuevas (UGR)
* @version Sep 30, 2013
*/
public class DataMiner {
private static DBManager dbManager = new DBManager(ModuleType.KRS);
private static final String MUSES_TAG = "MUSES_TAG";
private static ParsingUtils parser = new ParsingUtils();
private static DataMiningUtils dmUtils = new DataMiningUtils();
private Logger logger = Logger.getLogger(DataMiner.class);
/**
* Method ruleComparison in which existing security rules are compared with rules obtained by the
* classifier in dataClassification() method. It proposes new rules to the Knowledge Compiler.
* This is the main output of the Data Miner.
*
*
* @return void
*/
public void ruleComparison(){
List<PatternsKrs> patternList = dbManager.getPatternsKRS();
List<String> classifierRules = null;
List<String> droolsRules = null;
List<SecurityRules> alreadyDraftRules = dbManager.getSecurityRulesByStatus(Constants.DRAFT);
if (patternList.size()>0){
logger.info("Initialising Data Miner rule generation...");
Instances data = this.buildInstancesFromPatterns(patternList);
if (data != null) {
int[] indexes = new int[data.numAttributes()];
int[] selectedIndexes = this.featureSelection(data);
for (int i = 0; i < data.numAttributes(); i++) {
indexes[i] = i;
}
int[] indexesReview = new int[data.numAttributes()];
indexesReview[0] = 11;
indexesReview[1] = 13;
indexesReview[2] = 14;
indexesReview[3] = 18;
indexesReview[4] = 38;
logger.info("Classifying...");
String notParsedClassifierRules = this.dataClassification(data, indexesReview);
String[] ruleLines = notParsedClassifierRules.split("\\n+");
logger.info("Obtaining rules from association algorithm...");
String associationRules = this.associationRules(data, indexesReview);
logger.info("Obtaining rules from best classifier...");
if (ruleLines[0].contains("JRIP")) {
classifierRules = parser.JRipParser(notParsedClassifierRules);
} else if (ruleLines[0].contains("PART")) {
classifierRules = parser.PARTParser(notParsedClassifierRules);
} else if (ruleLines[0].contains("J48")) {
classifierRules = parser.J48Parser(notParsedClassifierRules);
} else if (ruleLines[0].contains("REPTree")) {
classifierRules = parser.REPTreeParser(notParsedClassifierRules);
}
logger.info("Obtaining rules from DB...");
droolsRules = parser.DBRulesParser();
logger.info("Comparing...");
if (classifierRules != null && droolsRules != null) {
Iterator<String> i1 = droolsRules.iterator();
Iterator<String> i2 = classifierRules.iterator();
boolean same = false;
while (i1.hasNext()) {
String dbRule = i1.next();
//logger.info("DB rule-----"+dbRule);
while (i2.hasNext()) {
String proposedRule = i2.next();
//logger.info(proposedRule);
same = parser.isAlike(dbRule, proposedRule);
if (!same && alreadyDraftRules.size() > 0) {
Iterator<SecurityRules> i3 = alreadyDraftRules.iterator();
while (i3.hasNext()) {
SecurityRules draftRule = i3.next();
String ruleString = draftRule.getDescription();
//logger.info(ruleString);
same = parser.isAlike(proposedRule, ruleString);
if (same) {
//logger.info(ruleString+" VS. "+proposedRule+" ARE THE SAME? ->"+same);
break;
}
}
}
if (!same) {
SecurityRules finalRule = new SecurityRules();
finalRule.setDescription(proposedRule);
finalRule.setStatus(Constants.DRAFT);
finalRule.setModification(new Date());
finalRule.setName("Proposed Rule by Data Miner");
byte[] refined = new byte[1];
refined[0] = 0;
finalRule.setRefined(refined);
dbManager.setSecurityRule(finalRule);
}
}
}
}
if (indexes.length > 0) {
logger.info("=== Results after feature selection ===");
this.dataClassification(data, selectedIndexes);
} else {
logger.error("Feature selection not being properly performed");
}
} else {
logger.error("Instances not being properly built.");
}
} else {
logger.error("There are no patterns in the table.");
}
}
/**
* Info DB
*
* Interaction with the database, retrieving events in bulk, and fills the system_log_krs table in the server database. This table helps the CSO having an overview of the status of the system.
*
*
* @param events Complete list of simple events, stored in the simple_events table of the database.
*
*
* @return void
*/
public void retrievePendingEvents(List<SimpleEvents> events){
//List<SimpleEvents> Events = dbManager.getEvent();
/* Fields in system_log_krs:
* previous_event_id, current_event_id, decision_id, user_behaviour_id,
* security_incident_id, device_security_state, risk_treatment, start_time,
* finish_time.
*/
List<SystemLogKrs> list = new ArrayList<SystemLogKrs>();
if (events.size() > 0) {
Iterator<SimpleEvents> i = events.iterator();
while (i.hasNext()) {
SystemLogKrs logEntry = new SystemLogKrs();
SimpleEvents event = i.next();
BigInteger eventID = null;
String user = null;
if ((event != null)&&(event.getEventId() != null)){
eventID = new BigInteger(event.getEventId());
user = event.getUser().getUserId();
}
logEntry.setCurrentEventId(eventID);
/* Previous event is the last event the user made */
Date day = null;
String time = null;
if ((event != null) && (event.getDate() != null)) {
day = event.getDate();
time = event.getTime().toString();
SimpleEvents userLastEvent = dbManager.findEventsByUserId(user, day.toString(), time, Boolean.TRUE);
if (userLastEvent != null) {
BigInteger lastEvent = new BigInteger(userLastEvent.getEventId());
logEntry.setPreviousEventId(lastEvent);
} else {
//logger.warn("No previous events by this user, assigning 0...");
logEntry.setPreviousEventId(BigInteger.ZERO);
}
} else {
logEntry.setPreviousEventId(BigInteger.ZERO);
}
/* Looking for decision_id in table access_request */
BigInteger decisionID = BigInteger.ZERO;
if (eventID == null){
eventID = BigInteger.valueOf(0);// Control added by S2
}
List<AccessRequest> accessRequests = dbManager.findAccessRequestByEventId(eventID.toString());
if (accessRequests.size() == 1) {
decisionID = accessRequests.get(0).getDecisionId();
logEntry.setDecisionId(decisionID);
} else {
//logger.warn("Decision Id not found, assigning 0...");
logEntry.setDecisionId(BigInteger.ZERO);
}
/* User behaviour as next event_id */
SimpleEvents userNextEvent = dbManager.findEventsByUserId(user, day.toString(), time, Boolean.FALSE);
if (userNextEvent != null) {
BigInteger nextEvent = new BigInteger(userNextEvent.getEventId());
logEntry.setUserBehaviourId(nextEvent);
} else {
//logger.warn("No more events by this user after this one, assigning 0...");
logEntry.setUserBehaviourId(BigInteger.ZERO);
}
/* Looking if that event caused a security violation */
List<SecurityViolation> securityViolations = dbManager.findSecurityViolationByEventId(event.getEventId());
if (securityViolations.size() > 0) {
BigInteger securityIncident = new BigInteger(securityViolations.get(0).getSecurityViolationId());
logEntry.setSecurityIncidentId(securityIncident);
} else {
//logger.warn("Security violation not found, or this event did not cause a security violation, assigning 0...");
logEntry.setSecurityIncidentId(BigInteger.ZERO);
}
/* Checking the device security state of the device */
Devices device = event.getDevice();
logEntry.setDeviceSecurityState(BigInteger.valueOf((long) device.getTrustValue()));
/* Looking for the risk treatment in case the event caused a security violation */
String riskTreatment = null;
if (securityViolations.size()>0)
riskTreatment = securityViolations.get(0).getMessage();
if (riskTreatment != null){
logEntry.setRiskTreatment(riskTreatment);
} else {
//logger.warn("Risk Treatment not found, or this event did not cause a security violation, assigning 0...");
logEntry.setRiskTreatment(null);
}
/* Time when the event was detected in the device */
Date eventDate = event.getDate();
logEntry.setStartTime(eventDate);
/* Time when was received and processed in the server */
logEntry.setFinishTime(eventDate);
list.add(logEntry);
}
}else{
logger.error("There are not simple events in the database, system_log_krs cannot be filled.");
}
dbManager.setSystemLogKRS(list);
}
/**
* minePatterns - Method for filling the patterns_krs table in the database. Each row of this table consists of all interesting information related to an event.
*
* @param event The simple event over which the data mining is going to be performed
*
* @return pattern The built pattern to be stored in the database
*
*/
@SuppressWarnings("null")
public List<PatternsKrs> minePatterns(SimpleEvents event){
List<PatternsKrs> patternList = new ArrayList<PatternsKrs>();
logger.info(event.getEventId());
/* Important variables and objects for the DM process, and common to several methods */
String eventID = event.getEventId();
Users user = event.getUser();
/* 1 event -> * decisions/access requests */
/* Then, for each event, the data mining process must be launched once per access request */
List<AccessRequest> accessRequests = dbManager.findAccessRequestByEventId(eventID);
int i = 0;
do {
PatternsKrs pattern = new PatternsKrs();
if (accessRequests.size() > 0) {
String accessRequestId = accessRequests.get(i).getAccessRequestId();
pattern.setLabel(dmUtils.obtainLabel(accessRequestId));
pattern.setDecisionCause(dmUtils.obtainDecisionCause(accessRequestId, eventID));
Double d = dmUtils.obtainingUserTrust(accessRequestId);
if (!d.isNaN()) {
pattern.setUserTrustValue(dmUtils.obtainingUserTrust(accessRequestId));
}
d = dmUtils.obtainingDeviceTrust(accessRequestId);
if (!d.isNaN()) {
pattern.setDeviceTrustValue(dmUtils.obtainingDeviceTrust(accessRequestId));
}
} else {
pattern.setLabel(dmUtils.obtainLabel("0"));
pattern.setDecisionCause(dmUtils.obtainDecisionCause("0", eventID));
Double d = dmUtils.obtainingUserTrust("0");
if (!d.isNaN()) {
pattern.setUserTrustValue(dmUtils.obtainingUserTrust("0"));
}
d = dmUtils.obtainingDeviceTrust("0");
if (!d.isNaN()) {
pattern.setDeviceTrustValue(dmUtils.obtainingDeviceTrust("0"));
}
}
pattern.setEventType(dmUtils.obtainEventType(event));
pattern.setEventLevel(dmUtils.obtainEventLevel(event));
pattern.setUsername(dmUtils.obtainUsername(user));
pattern.setPasswordLength(dmUtils.passwdLength(user));
pattern.setNumbersInPassword(dmUtils.passwdDigits(user));
pattern.setLettersInPassword(dmUtils.passwdLetters(user));
pattern.setPasswdHasCapitalLetters(dmUtils.passwdCapLetters(user));
pattern.setActivatedAccount(user.getEnabled());
pattern.setUserRole(dmUtils.obtainUserRole(user));
pattern.setEventTime(dmUtils.obtainTimestamp(event));
pattern.setSilentMode(dmUtils.silentModeTrials1(event));
pattern.setSilentMode(dmUtils.silentModeTrials2(event));
pattern.setDeviceType(dmUtils.obtainDeviceModel(event));
pattern.setDeviceOS(dmUtils.obtainDeviceOS(event));
pattern.setDeviceOwnedBy(dmUtils.obtainDeviceOwner(event));
pattern.setDeviceHasCertificate(dmUtils.obtainDeviceCertificate(event));
pattern.setAppName(dmUtils.obtainAppName(event));
pattern.setAppVendor(dmUtils.obtainAppVendor(event));
pattern.setAppMUSESAware(dmUtils.obtainMusesAwareness(event));
pattern.setAssetName(dmUtils.obtainAssetName(event));
Double d = dmUtils.obtainAssetValue(event);
if (!d.isNaN()) {
pattern.setAssetValue(dmUtils.obtainAssetValue(event));
}
pattern.setAssetConfidentialLevel(dmUtils.obtainAssetConfidentiality(event));
pattern.setAssetLocation(dmUtils.obtainAssetLocation(event));
List<Integer> configList = dmUtils.readConfigurationJSON(event);
if (configList.size() >= 5) {
pattern.setDeviceHasPassword(configList.get(0));
pattern.setDeviceScreenTimeout(BigInteger.valueOf(configList.get(1).intValue()));
pattern.setDeviceIsRooted(configList.get(2));
pattern.setDeviceHasAccessibility(configList.get(3));
pattern.setDeviceHasAntivirus(configList.get(4));
}
List<Integer> mailList = dmUtils.readMailJSON(event);
if (mailList.size() >= 4) {
pattern.setMailContainsBCC(mailList.get(0));
pattern.setMailContainsCC(mailList.get(1));
pattern.setMailRecipientAllowed(mailList.get(2));
pattern.setMailHasAttachment(mailList.get(3));
}
List<String> wifiList = dmUtils.readAssetJSON(event);
if (wifiList.size() >= 4) {
pattern.setWifiEncryption(wifiList.get(0));
pattern.setBluetoothConnected(Integer.parseInt(wifiList.get(1)));
pattern.setWifiEnabled(Integer.parseInt(wifiList.get(2)));
pattern.setWifiConnected(Integer.parseInt(wifiList.get(3)));
}
i++;
patternList.add(pattern);
} while (i < accessRequests.size());
return patternList;
}
/**
* Method buildInstancesFromPattern, in which data inside patterns_krs table is taken and then
* transformed into Instances data type, so Weka can manage them.
*
* @param dbPatterns List with all rows in patterns_krs table.
*
*
* @return newData The ordered set of instances to use with Weka methods.
*
*/
public Instances buildInstancesFromPatterns (List<PatternsKrs> dbPatterns) {
Instances data = null;
ArrayList<Attribute> atts = new ArrayList<Attribute>();
List<String> decisionCauses = dbManager.getDistinctDecisionCauses();
List<String> eventTypes = dbManager.getDistinctEventTypes();
List<String> eventLevels = dbManager.getDistinctEventLevels();
List<String> usernames = dbManager.getDistinctUsernames();
List<String> userRoles = dbManager.getDistinctUserRoles();
List<String> deviceTypes = dbManager.getDistinctDeviceType();
List<String> deviceOSs = dbManager.getDistinctDeviceOS();
List<String> deviceOwners = dbManager.getDistinctDeviceOwnedBy();
List<String> appNames = dbManager.getDistinctAppName();
List<String> appVendors = dbManager.getDistinctAppVendor();
List<String> assetNames = dbManager.getDistinctAssetName();
List<String> assetConfidentialLevels = dbManager.getDistinctAssetConfidentialLevel();
List<String> assetLocations = dbManager.getDistinctAssetLocation();
List<String> allLabels = dbManager.getDistinctLabels();
List<String> wifiEncryptions = dbManager.getDistinctWifiEncryptions();
atts.add(new Attribute("decision_cause", decisionCauses));
atts.add(new Attribute("silent_mode"));
atts.add(new Attribute("event_type", eventTypes));
atts.add(new Attribute("event_level", eventLevels));
atts.add(new Attribute("username", usernames));
atts.add(new Attribute("password_length"));
atts.add(new Attribute("letters_in_password"));
atts.add(new Attribute("numbers_in_password"));
atts.add(new Attribute("passwd_has_capital_letters"));
atts.add(new Attribute("user_trust_value"));
atts.add(new Attribute("activated_account"));
atts.add(new Attribute("user_role", userRoles));
atts.add(new Attribute("event_detection", "yyyy-MM-dd HH:mm:ss"));
atts.add(new Attribute("device_type", deviceTypes));
atts.add(new Attribute("device_OS", deviceOSs));
atts.add(new Attribute("device_has_antivirus"));
atts.add(new Attribute("device_has_certificate"));
atts.add(new Attribute("device_trust_value"));
atts.add(new Attribute("device_owned_by", deviceOwners));
atts.add(new Attribute("device_has_password"));
atts.add(new Attribute("device_screen_timeout"));
atts.add(new Attribute("device_has_accessibility"));
atts.add(new Attribute("device_is_rooted"));
atts.add(new Attribute("app_name", appNames));
atts.add(new Attribute("app_vendor", appVendors));
atts.add(new Attribute("app_is_MUSES_aware"));
atts.add(new Attribute("asset_name", assetNames));
atts.add(new Attribute("asset_value"));
atts.add(new Attribute("asset_confidential_level", assetConfidentialLevels));
atts.add(new Attribute("asset_location", assetLocations));
atts.add(new Attribute("mail_recipient_allowed"));
atts.add(new Attribute("mail_contains_cc_allowed"));
atts.add(new Attribute("mail_contains_bcc_allowed"));
atts.add(new Attribute("mail_has_attachment"));
atts.add(new Attribute("wifiEncryption", wifiEncryptions));
atts.add(new Attribute("wifiEnabled"));
atts.add(new Attribute("wifiConnected"));
atts.add(new Attribute("bluetoothConnected"));
atts.add(new Attribute("label", allLabels));
data = new Instances("patternsData", atts, 0);
Iterator<PatternsKrs> i = dbPatterns.iterator();
while(i.hasNext()){
PatternsKrs pattern = i.next();
double[] vals = new double[data.numAttributes()];
String eventType = pattern.getEventType();
if (eventType == null) {
continue;
} else {
if (eventType.contentEquals("SECURITY_PROPERTY_CHANGED") ||
eventType.contentEquals("ACTION_REMOTE_FILE_ACCESS") ||
eventType.contentEquals("ACTION_APP_OPEN") ||
eventType.contentEquals("ACTION_SEND_MAIL") ||
eventType.contentEquals("SAVE_ASSET") ||
eventType.contentEquals("VIRUS_FOUND") ||
eventType.contentEquals("CONTEXT_SENSOR_PACKAGE") ||
eventType.contentEquals("CONTEXT_SENSOR_CONNECTIVITY") ||
eventType.contentEquals("CONTEXT_SENSOR_PERIPHERAL") ||
eventType.contentEquals("CONTEXT_SENSOR_DEVICE_PROTECTION") ||
eventType.contentEquals("CONFIGURATION_CHANGE") ||
eventType.contentEquals("SECURITY_INCIDENT") ||
eventType.contentEquals("user_entered_password_field") ||
eventType.contentEquals("CONTEXT_SENSOR_PERIPHERAL") ||
eventType.contentEquals("USER_BEHAVIOR") ||
eventType.contentEquals("CONTEXT_SENSOR_APP")) {
String decisionCause = pattern.getDecisionCause();
if (decisionCause == null) {
vals[0] = Utils.missingValue();
} else {
vals[0] = decisionCauses.indexOf(decisionCause);
}
vals[1] = pattern.getSilentMode();
vals[2] = eventTypes.indexOf(eventType);
String eventLevel = pattern.getEventLevel();
if (eventLevel == null) {
vals[3] = Utils.missingValue();
} else {
vals[3] = eventLevels.indexOf(eventLevel);
}
String username = pattern.getUsername();
if (username == null) {
vals[4] = Utils.missingValue();
} else {
vals[4] = usernames.indexOf(username);
}
vals[5] = pattern.getPasswordLength();
vals[6] = pattern.getLettersInPassword();
vals[7] = pattern.getNumbersInPassword();
vals[8] = pattern.getPasswdHasCapitalLetters();
Double userTrust = pattern.getUserTrustValue();
if (userTrust.isNaN()) {
vals[9] = Utils.missingValue();
} else {
vals[9] = userTrust;
}
vals[10] = pattern.getActivatedAccount();
String userRole = pattern.getUserRole();
if (userRole == null) {
vals[11] = Utils.missingValue();
} else {
vals[11] = userRoles.indexOf(userRole);
}
try {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String strDate = sdf.format(pattern.getEventTime());
vals[12] = data.attribute(12).parseDate(strDate);
} catch (ParseException e) {
e.printStackTrace();
}
String deviceModel = pattern.getDeviceType();
if (deviceModel == null || deviceModel.equalsIgnoreCase("domemodel") || deviceModel.equalsIgnoreCase("1222")) {
vals[13] = Utils.missingValue();
} else {
vals[13] = deviceTypes.indexOf(deviceModel);
}
String deviceOS = pattern.getDeviceOS();
if (deviceOS == null || deviceOS.equalsIgnoreCase("a0")) {
vals[14] = Utils.missingValue();
} else {
vals[14] = deviceOSs.indexOf(deviceOS);
}
vals[15] = pattern.getDeviceHasAntivirus();
vals[16] = pattern.getDeviceHasCertificate();
Double deviceTrust = pattern.getDeviceTrustValue();
if (deviceTrust.isNaN()) {
vals[17] = Utils.missingValue();
} else {
vals[17] = deviceTrust;
}
String deviceOwner = pattern.getDeviceOwnedBy();
if (deviceOwner == null) {
vals[18] = Utils.missingValue();
} else {
vals[18] = deviceOwners.indexOf(deviceOwner);
}
vals[19] = pattern.getDeviceHasPassword();
BigInteger time = pattern.getDeviceScreenTimeout();
if (time == null) {
vals[20] = Utils.missingValue();
} else {
vals[20] = pattern.getDeviceScreenTimeout().doubleValue();
}
vals[21] = pattern.getDeviceHasAccessibility();
vals[22] = pattern.getDeviceIsRooted();
String appName = pattern.getAppName();
if (appName == null) {
vals[23] = Utils.missingValue();
} else {
vals[23] = appNames.indexOf(appName);
}
String appVendor = pattern.getAppVendor();
if (appVendor == null) {
vals[24] = Utils.missingValue();
} else {
vals[24] = appVendors.indexOf(appVendor);
}
vals[25] = pattern.getAppMUSESAware();
String assetName = pattern.getAssetName();
if (assetName == null || assetName.equalsIgnoreCase("")) {
vals[26] = Utils.missingValue();
} else {
vals[26] = assetNames.indexOf(assetName);
}
vals[27] = pattern.getAssetValue();
String assetConfidentialLevel = pattern.getAssetConfidentialLevel();
if (assetConfidentialLevel == null) {
vals[28] = Utils.missingValue();
} else {
vals[28] = assetConfidentialLevels.indexOf(assetConfidentialLevel);
}
String assetLocation = pattern.getAssetLocation();
if (assetLocation == null || assetLocation.equalsIgnoreCase("")) {
vals[29] = Utils.missingValue();
} else {
vals[29] = assetLocations.indexOf(assetLocation);
}
vals[30] = pattern.getMailRecipientAllowed();
vals[31] = pattern.getMailContainsCC();
vals[32] = pattern.getMailContainsBCC();
vals[33] = pattern.getMailHasAttachment();
String wifiEncryption = pattern.getWifiEncryption();
if (wifiEncryption == null) {
vals[34] = Utils.missingValue();
} else {
vals[34] = wifiEncryptions.indexOf(wifiEncryption);
}
vals[35] = pattern.getWifiEnabled();
vals[36] = pattern.getWifiConnected();
vals[37] = pattern.getBluetoothConnected();
String label = pattern.getLabel();
if (label == null) {
vals[38] = Utils.missingValue();
} else {
vals[38] = allLabels.indexOf(label);
}
data.add(new DenseInstance(1.0, vals));
}
}
}
/* As there will be missing data, is important to deal with it before continue working with the instances */
ReplaceMissingValues replaceMissingValues = new ReplaceMissingValues();
Instances newData = null;
try {
replaceMissingValues.setInputFormat(data);
newData = Filter.useFilter(data, replaceMissingValues);
} catch (Exception e) {
e.printStackTrace();
}
// OPTIONAL, only if we want the ARFF file
/*ArffSaver saver = new ArffSaver();
saver.setInstances(newData);
try {
saver.setFile(new File("./data/test.arff"));
saver.setDestination(new File("./data/test.arff"));
saver.writeBatch();
} catch (IOException e) {
e.printStackTrace();
}*/
return newData;
}
/**
* Method featureSelection, which uses an algorithm to select the most representative features of
* the data in patterns_krs table
*
* @param data The instances from patterns_krs table
*
* @return indexes The indexes of the attributes selected by the algorithm
*/
public int[] featureSelection(Instances data){
int[] indexes = null;
AttributeSelection attsel = new AttributeSelection();
//FuzzyRoughSubsetEval eval = new FuzzyRoughSubsetEval();
//HillClimber search = new HillClimber();
CfsSubsetEval eval = new CfsSubsetEval();
GreedyStepwise search = new GreedyStepwise();
attsel.setEvaluator(eval);
attsel.setSearch(search);
try {
attsel.SelectAttributes(data);
indexes = attsel.selectedAttributes();
logger.info("Selected Features: "+Utils.arrayToString(indexes));
} catch (Exception e) {
e.printStackTrace();
}
return indexes;
}
/**
* Method dataClassification in which first we erase the attributes that were not selected by
* featureSelection method and then performs classification over the rest of the data
*
* @param data The original set of instances
* @param indexes The selected indexes by the feature selection algorithm
*
* @return classifierRules Output of the classifier, consisting of rules
*/
public String dataClassification(Instances data, int[] indexes){
String classifierRules = null;
Instances newData = data;
String[] options = new String[2];
options[0] = "-R";
options[1] = "1";
Remove remove = new Remove();
remove.setAttributeIndicesArray(indexes);
remove.setInvertSelection(true);
try {
remove.setOptions(options);
remove.setInputFormat(data);
newData = Filter.useFilter(data, remove);
} catch (Exception e) {
e.printStackTrace();
}
/*Enumeration<Attribute> atts = newData.enumerateAttributes();
while (atts.hasMoreElements()) {
logger.info(atts.nextElement().toString());
} */
double percentageCorrect = 0;
/* (1) J48 */
String[] optionsJ48 = new String[1];
optionsJ48[0] = "-U"; // unpruned tree
J48 treeJ48 = new J48(); // new instance of tree
try {
//treeJ48.setOptions(optionsJ48); // set the options
treeJ48.buildClassifier(newData); // build classifier
Evaluation eval = new Evaluation(newData);
eval.crossValidateModel(treeJ48, newData, 10, new Random(1));
percentageCorrect = eval.pctCorrect();
System.out.println("Percentage of correctly classified instances for J48 classifier: "+eval.pctCorrect());
classifierRules = treeJ48.toString();
//System.out.println(treeJ48.toString());
} catch (Exception e) {
e.printStackTrace();
}
/* (2) JRip */
String[] optionsJRip = new String[1];
optionsJRip[0] = "-P"; // unpruned tree
JRip treeJRip = new JRip(); // new instance of tree
try {
treeJRip.setOptions(optionsJRip); // set the options
treeJRip.buildClassifier(newData); // build classifier
Evaluation eval = new Evaluation(newData);
eval.crossValidateModel(treeJRip, newData, 10, new Random(1));
if (eval.pctCorrect() > percentageCorrect) {
percentageCorrect = eval.pctCorrect();
classifierRules = treeJRip.toString();
}
System.out.println("Percentage of correctly classified instances for JRip classifier: "+eval.pctCorrect());
//System.out.println(treeJRip.toString());
} catch (Exception e) {
e.printStackTrace();
}
/* (3) PART */
String[] optionsPART = new String[1];
optionsPART[0] = "-U"; // unpruned tree
PART treePART = new PART(); // new instance of tree
try {
treePART.setOptions(optionsPART); // set the options
treePART.buildClassifier(newData); // build classifier
Evaluation eval = new Evaluation(newData);
eval.crossValidateModel(treePART, newData, 10, new Random(1));
if (eval.pctCorrect() > percentageCorrect) {
percentageCorrect = eval.pctCorrect();
classifierRules = treePART.toString();
}
System.out.println("Percentage of correctly classified instances for PART classifier: "+eval.pctCorrect());
//System.out.println(treePART.toString());
} catch (Exception e) {
e.printStackTrace();
}
/* (4) REPTree */
String[] optionsREPTree = new String[1];
optionsREPTree[0] = "-P"; // unpruned tree
REPTree treeREPTree = new REPTree(); // new instance of tree
try {
//treeREPTree.setOptions(optionsREPTree); // set the options
treeREPTree.buildClassifier(newData); // build classifier
Evaluation eval = new Evaluation(newData);
eval.crossValidateModel(treeREPTree, newData, 10, new Random(1));
if (eval.pctCorrect() > percentageCorrect) {
percentageCorrect = eval.pctCorrect();
classifierRules = treeREPTree.toString();
}
System.out.println("Percentage of correctly classified instances for REPTree classifier: "+eval.pctCorrect());
//System.out.println(treeREPTree.toSource("prueba"));
//System.out.println(treeREPTree.toString());
} catch (Exception e) {
e.printStackTrace();
}
return classifierRules;
}
/**
* Method associationRules in which first we erase the attributes that were not selected by
* featureSelection method and then obtains the set of rules through association algorithms.
*
* @param data The original set of instances
* @param indexes The selected indexes by the feature selection algorithm
*
* @return associationRules Output of the algorithm, consisting of rules
*/
public String associationRules(Instances data, int[] indexes){
String associationRules = null;
Instances newData = data;
String[] options = new String[2];
options[0] = "-R";
options[1] = "1";
Remove remove = new Remove();
remove.setAttributeIndicesArray(indexes);
remove.setInvertSelection(true);
try {
remove.setOptions(options);
remove.setInputFormat(data);
newData = Filter.useFilter(data, remove);
} catch (Exception e) {
e.printStackTrace();
}
Instances filteredData = newData;
NumericToNominal filter = new NumericToNominal();
try {
filter.setOptions(options);
filter.setInputFormat(filteredData);
filteredData = Filter.useFilter(newData, filter);
} catch (Exception e1) {
e1.printStackTrace();
}
Apriori aprioriObj = new Apriori();
aprioriObj.setNumRules(500);
try {
aprioriObj.buildAssociations(filteredData);
} catch (Exception e) {
e.printStackTrace();
}
associationRules = aprioriObj.toString();
//System.out.println("A Priori Rules: "+associationRules);
return associationRules;
}
}