/**
* File ./src/main/java/de/lemo/dms/connectors/chemgapedia/fizHelper/LogReader.java
* Lemo-Data-Management-Server for learning analytics.
* Copyright (C) 2013
* Leonard Kappe, Andreas Pursian, Sebastian Schwarzrock, Boris Wenzlaff
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
**/
/**
* File ./main/java/de/lemo/dms/connectors/chemgapedia/fizHelper/LogReader.java
* Date 2013-01-24
* Project Lemo Learning Analytics
*/
package de.lemo.dms.connectors.chemgapedia.fizHelper;
import java.io.BufferedReader;
import java.io.FileReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.hibernate.Criteria;
import org.hibernate.Query;
import org.hibernate.Session;
import org.hibernate.criterion.Restrictions;
import de.lemo.dms.connectors.IConnector;
import de.lemo.dms.core.Clock;
import de.lemo.dms.core.config.ServerConfiguration;
import de.lemo.dms.db.IDBHandler;
import de.lemo.dms.db.mapping.CourseMining;
import de.lemo.dms.db.mapping.CourseResourceMining;
import de.lemo.dms.db.mapping.CourseUserMining;
import de.lemo.dms.db.mapping.IDMappingMining;
import de.lemo.dms.db.mapping.ResourceLogMining;
import de.lemo.dms.db.mapping.ResourceMining;
import de.lemo.dms.db.mapping.RoleMining;
import de.lemo.dms.db.mapping.UserMining;
/**
* The Class LogReader. Reads Chemgapedia's server-logs and saves the found objects into the database.
*/
public class LogReader {
/**
* User-objects of previous connector-runs
*/
private final Map<String, UserMining> oldUsers = new HashMap<String, UserMining>();
/**
* Role-objects of previous connector-runs
*/
private final Map<Long, RoleMining> oldRoles = new HashMap<Long, RoleMining>();
/**
* CourseUser-objects of previous connector-runs
*/
private final Map<String, CourseUserMining> oldCourseUsers = new HashMap<String, CourseUserMining>();
/**
* User-objects of current connector-run
*/
private Map<String, UserMining> newUsers = new HashMap<String, UserMining>();
/**
* Resource-objects of previous connector-runs
*/
private final Map<String, ResourceMining> oldResources = new HashMap<String, ResourceMining>();
/**
* Resource-objects of current connector-run
*/
private final Map<String, ResourceMining> newResources = new HashMap<String, ResourceMining>();
/**
* CourseResource-objects found in database
*/
private final Map<String, CourseResourceMining> courseResources = new HashMap<String, CourseResourceMining>();
/**
* IDMapping-objects of previous connector-runs
*/
private Map<String, IDMappingMining> idMapping = new HashMap<String, IDMappingMining>();
/**
* IDMapping-objects of current connector-run
*/
private Map<String, IDMappingMining> newIdMapping = new HashMap<String, IDMappingMining>();
/**
* HashMap storing all logged accesses, with according user-login as key
*/
private final Map<String, ArrayList<LogObject>> userHistories = new HashMap<String, ArrayList<LogObject>>();
/**
* Course-objects of previous connector-runs
*/
private final Map<String, CourseMining> oldCourses = new HashMap<String, CourseMining>();
/**
* Internal clock-object for statistics
*/
private final Clock clock = new Clock();
/**
* DBHandler-object, for connection to Mining-Database
*/
private final IDBHandler dbHandler = ServerConfiguration.getInstance().getMiningDbHandler();
/**
* Database's largest id used in ResourceLogMining
*/
private Long resLogId;
/**
* Database's largest id used in UserMining
*/
private Long userIdCount;
/**
* Database's largest id used in CourseUserMining
*/
private Long courseUserIdCount;
/**
* Database's largest id used in ResourceMining
*/
private Long resIdCount;
private Long startTime;
/**
* Database's largest timestamp used in ResourceLogMining
*/
private final IConnector connector;
private Logger logger = Logger.getLogger(this.getClass());
private RoleMining standardRole;
/**
* Creates a new LogReader-object, imports necessary objects from Mining-Database and sets counters.
*
* @param connector
* @param session
*/
@SuppressWarnings("unchecked")
public LogReader(final IConnector connector, final Session session, List<Long> courses)
{
this.connector = connector;
final long platformId = connector.getPlatformId();
try {
this.startTime = (Long) session.createQuery("Select max(latestTimestamp) from ConfigMining where platform=" + this.connector.getPlatformId()).uniqueResult();
if(this.startTime == null)
{
this.startTime = 0L;
}
this.newIdMapping = new HashMap<String, IDMappingMining>();
Criteria c = session.createCriteria(IDMappingMining.class, "idmap");
c.add(Restrictions.eq("idmap.platform", platformId));
final List<IDMappingMining> ids = c.list();
// Load previously saved idMappingMining, used to identify resources
this.idMapping = new HashMap<String, IDMappingMining>();
for (int i = 0; i < ids.size(); i++)
{
ids.get(i).setPlatform(connector.getPlatformId());
this.idMapping.put(ids.get(i).getHash(), ids.get(i));
}
logger.info("Read " + ids.size() + " IDMappings from database.");
// Load previously saved UserMining-objects
c = session.createCriteria(UserMining.class, "users");
c.add(Restrictions.eq("users.platform", platformId));
final List<UserMining> us = c.list();
for (int i = 0; i < us.size(); i++) {
this.oldUsers.put(us.get(i).getLogin(), us.get(i));
}
logger.info("Read " + us.size() + " UserMinings from database.");
// Load previously saved ResourceMining-objects
c = session.createCriteria(ResourceMining.class, "resources");
c.add(Restrictions.eq("resources.platform", platformId));
final List<ResourceMining> rt = c.list();
for (final ResourceMining res : rt) {
this.oldResources.put(res.getUrl(), res);
}
logger.info("Read " + rt.size() + " ResourceMinings from database.");
// Load previously saved CourseUserMining-objects
c = session.createCriteria(CourseUserMining.class, "courseUsers");
c.add(Restrictions.eq("courseUsers.platform", platformId));
final List<CourseUserMining> cu = c.list();
for (final CourseUserMining res : cu) {
this.oldCourseUsers.put(res.getCourse().getId() + "" + res.getUser().getId(), res);
}
logger.info("Read " + cu.size() + " CourseUserMinings from database.");
// Load previously saved CourseMining-objects
c = session.createCriteria(CourseMining.class, "courses");
c.add(Restrictions.eq("courses.platform", platformId));
final List<CourseMining> cm = c.list();
for (int i = 0; i < cm.size(); i++)
{
this.oldCourses.put(cm.get(i).getTitle(), cm.get(i));
}
logger.info("Read " + cm.size() + " CourseMinings from database.");
// Load previously saved CourseMining-objects
c = session.createCriteria(RoleMining.class, "roles");
c.add(Restrictions.eq("roles.platform", platformId));
final List<RoleMining> roles = c.list();
for (int i = 0; i < roles.size(); i++)
{
this.oldRoles.put(roles.get(i).getId(), roles.get(i));
}
logger.info("Read " + cm.size() + " RoleMinings from database.");
if(this.oldRoles.size() == 0)
{
RoleMining role = new RoleMining();
role.setId(Long.valueOf(connector.getPrefix() + "" + 0));
role.setDescription("Standard Chemgapedia-Nutzer");
role.setName("Student");
role.setPlatform(connector.getPlatformId());
role.setShortname("STD");
role.setType(2);
this.standardRole = role;
this.oldRoles.put(role.getId(), role);
}
// Load previously saved CourseResourceMining-objects
c = session.createCriteria(CourseResourceMining.class, "coursesResources");
c.add(Restrictions.eq("coursesResources.platform", platformId));
final List<CourseResourceMining> courseResource = c.list();
for (int i = 0; i < courseResource.size(); i++) {
this.courseResources.put(courseResource.get(i).getResource().getUrl(), courseResource.get(i));
}
logger.info("Read " + courseResource.size() + " CourseResourceMinings from database.");
final Query resCount = session.createQuery("select max(res.id) from ResourceMining res where res.platform="
+ platformId + "");
if (resCount.list().size() > 0) {
this.resIdCount = ((ArrayList<Long>) resCount.list()).get(0);
}
if ((this.resIdCount != null) && (this.resIdCount != 0)) {
this.resIdCount = Long.valueOf(this.resIdCount.toString().substring(
connector.getPrefix().toString().length()));
} else {
this.resIdCount = 0L;
}
final Query userCount = session.createQuery("select max(user.id) from UserMining user where user.platform="
+ platformId + "");
if (userCount.list().size() > 0) {
this.userIdCount = ((ArrayList<Long>) userCount.list()).get(0);
}
if ((this.userIdCount != null) && (this.userIdCount != 0)) {
this.userIdCount = Long.valueOf(this.userIdCount.toString().substring(
connector.getPrefix().toString().length()));
} else {
this.userIdCount = 0L;
}
final Query courseUserCount = session.createQuery("select max(courseUser.id) from CourseUserMining courseUser where courseUser.platform="
+ platformId + "");
if (courseUserCount.list().size() > 0) {
this.courseUserIdCount = ((ArrayList<Long>) courseUserCount.list()).get(0);
}
if ((this.courseUserIdCount != null) && (this.courseUserIdCount != 0)) {
this.courseUserIdCount = Long.valueOf(this.courseUserIdCount.toString().substring(
connector.getPrefix().toString().length()));
} else {
this.courseUserIdCount = 0L;
}
final Query logCount = session
.createQuery("select max(log.id) from ResourceLogMining log");
if (logCount.list().size() > 0) {
this.resLogId = ((ArrayList<Long>) logCount.list()).get(0);
}
if (this.resLogId == null) {
this.resLogId = 0L;
}
session.close();
} catch (final Exception e)
{
logger.error(e.getMessage());
}
}
/**
* Updates the data.
*
* @param inFile
* @return
*/
private void update(final boolean filterLog, final Session session)
{
if (filterLog) {
this.filterServerLogFile();
}
this.save(session);
for (final ResourceMining resource : this.newResources.values()) {
this.oldResources.put(resource.getUrl(), resource);
}
for (final UserMining user : this.newUsers.values()) {
this.oldUsers.put(user.getLogin(), user);
}
for (final IDMappingMining mapping : this.newIdMapping.values()) {
this.idMapping.put(mapping.getHash(), mapping);
}
this.clearMaps();
}
public void clearMaps()
{
this.newResources.clear();
this.newUsers.clear();
this.userHistories.clear();
this.newIdMapping.clear();
}
/**
* Filters irrelevant lines (bots, spiders) out of the log file and writes a new one.
*
* @param outFile
* filename of the new filtered log file.
*/
private void filterServerLogFile()
{
try {
this.clock.reset();
ArrayList<LogObject> a;
final Object[] user = this.newUsers.values().toArray();
final HashMap<String, UserMining> tempUsers = new HashMap<String, UserMining>();
final double old = Long.parseLong(this.newUsers.size() + "");
int totalLines = 0;
int linesDeleted = 0;
final BotFinder bf = new BotFinder();
for (int i = 0; i < user.length; i++)
{
int susp1 = 0;
int susp2 = 0;
int susp3 = 0;
a = this.userHistories.get(((UserMining) user[i]).getLogin());
Collections.sort(a);
if ((a != null) && (a.size() > 0))
{
totalLines += a.size();
susp1 = bf.checkFastOnes(a, 1).size();
susp2 = bf.checkPeriods(a, 5);
susp3 = bf.checkForRepetitions(a, 10);
if ((susp1 < 1) && (susp2 == 0) && (susp3 == 0)) {
tempUsers.put(((UserMining) user[i]).getLogin(), (UserMining) user[i]);
} else
{
linesDeleted += a.size();
this.userHistories.remove(((UserMining) user[i]).getLogin());
}
}
}
final double cutUsePerc = (old - Long.valueOf("" + tempUsers.size())) / (old / 100);
final double tmp = totalLines / 100.0d;
final double cutLinPerc = linesDeleted / tmp;
logger.info("Filtered " + (old - tempUsers.size()) + " suspicious users out of " + old + " ("
+ new DecimalFormat("0.00").format(cutUsePerc) + "%), eliminating " + linesDeleted + " log lines ("
+ new DecimalFormat("0.00").format(cutLinPerc) + "%).");
this.newUsers = tempUsers;
} catch (final Exception e)
{
logger.error("While filtering log-file:");
}
}
/**
* Loads the data from the server-log-file.
*
* @param inFile
* the path of the server log file
* @param linesPerRun
* Number of log-lines that are read before data is stored to database
* @param filterLog
* Determines whether suspicious logs are ignored or not
*/
public void loadServerLogData(final String inFile, final Long linesPerRun, final boolean filterLog,
final Session session)
{
try
{
logger.info("Reading server log " + inFile);
final BufferedReader input = new BufferedReader(new FileReader(inFile));
int count = 0;
try
{
String line = null;
this.clock.reset();
Long i = 0L;
while ((line = input.readLine()) != null)
{
i++;
if ((linesPerRun != 0) && (i > 0) && ((i % linesPerRun) == 0))
{
this.update(filterLog, session);
}
count++;
boolean newRes = false;
final LogLine logLine = new LogLine(line);
// The line is only processed, if it is readable and not older the the line before
if (logLine.isValid() && this.startTime < logLine.getTimestamp())
{
final LogObject lo = new LogObject();
String name;
name = logLine.getId();
long id = -1;
// Set user-id
if (this.idMapping.get(name) != null)
{
id = this.idMapping.get(name).getId();
lo.setId(id);
}
if (id == -1)
{
id = this.userIdCount + 1;
this.userIdCount = id;
lo.setId(Long.valueOf(this.connector.getPrefix() + "" + id));
}
// Set timestamp
lo.setTime(logLine.getTimestamp());
// Set url
lo.setUrl(logLine.getUrl());
// Set HTTP-status
lo.setStatus(logLine.getHttpStatus());
// Set referrer
lo.setReferrer(logLine.getReferrer());
// Set duration to standard (will be calculated later on)
lo.setDuration(-1);
// Check if resource is already known. If yes, set course. Else create new resource later on.
if ((this.oldResources.get(lo.getUrl()) == null)
&& (this.newResources.get(lo.getUrl()) == null))
{
newRes = true;
lo.setCourse(null);
}
else
{
final CourseResourceMining c = this.courseResources.get(lo.getUrl());
if (this.courseResources.get(lo.getUrl()) != null) {
lo.setCourse(c.getCourse());
}
}
// Check if users is known
if ((this.oldUsers.get(logLine.getId()) != null)
|| (this.newUsers.get(logLine.getId()) != null))
{
UserMining u;
if (this.oldUsers.get(logLine.getId()) != null) {
u = this.oldUsers.get(logLine.getId());
} else {
u = this.newUsers.get(logLine.getId());
}
// Check if the user is known and if he has 'logged out' since last request
if (!lo.getReferrer().equals("-") && !lo.getReferrer().contains("www.google."))
{
final ArrayList<LogObject> tlo = this.userHistories.get(logLine.getId());
if ((tlo != null) && tlo.get(tlo.size() - 1).getReferrer().equals(lo.getUrl()))
{
this.userHistories.get(logLine.getId())
.get(this.userHistories.get(logLine.getId()).size() - 1)
.setDuration(lo.getTime() - u.getLastAccess());
}
if (lo.getTime() > u.getLastAccess()) {
u.setLastAccess(lo.getTime());
}
}
else
{
u.setLastLogin(u.getCurrentLogin());
if (u.getCurrentLogin() < lo.getTime()) {
u.setCurrentLogin(lo.getTime());
}
if (u.getLastAccess() < lo.getTime()) {
u.setLastAccess(lo.getTime());
}
}
u.setPlatform(this.connector.getPlatformId());
this.newUsers.put(u.getLogin(), u);
lo.setUser(u);
}
else
{
// If the user is unknown, create new user-object
final UserMining u = new UserMining();
u.setId(lo.getId());
u.setGender(0);
u.setFirstAccess(lo.getTime());
u.setLastAccess(lo.getTime());
// google-referrers aren't replaced with "-" although they are external
if (lo.getReferrer().equals("-") || lo.getReferrer().startsWith("www.google.")) {
u.setCurrentLogin(lo.getTime());
} else {
u.setCurrentLogin(0);
}
u.setPlatform(this.connector.getPlatformId());
u.setLogin(logLine.getId());
this.newUsers.put(logLine.getId(), u);
lo.setUser(u);
}
// Save viewed glossary entries to the resource-list because they aren't registered within
// XML-packages
if ((newRes && lo.getUrl().endsWith(".html")))
{
final ResourceMining r = new ResourceMining();
long resourceId = -1;
if (this.idMapping.get(lo.getUrl()) != null)
{
resourceId = this.idMapping.get(lo.getUrl()).getId();
lo.setId(resourceId);
}
if (resourceId == -1)
{
resourceId = this.resIdCount + 1;
this.resIdCount = resourceId;
this.idMapping.put(
lo.getUrl(),
new IDMappingMining(
Long.valueOf(this.connector.getPrefix() + "" + resourceId), lo
.getUrl(), this.connector.getPlatformId()));
this.newIdMapping.put(
lo.getUrl(),
new IDMappingMining(
Long.valueOf(this.connector.getPrefix() + "" + resourceId), lo
.getUrl(), this.connector.getPlatformId()));
resourceId = Long.valueOf(this.connector.getPrefix() + "" + resourceId);
lo.setId(resourceId);
}
r.setId(resourceId);
r.setUrl(lo.getUrl());
// Regex used to prevent the inclusion of assignment-pages
if (newRes && !r.getUrl().matches("[0-9a-z]{32}[-]{1}[0-9]++"))
{
if (lo.getUrl().endsWith("/index.html"))
{
final int inPos = lo.getUrl().substring(0, lo.getUrl().lastIndexOf("/") - 1)
.lastIndexOf("/");
String urlCut = lo.getUrl().substring(0, inPos);
urlCut = urlCut.substring(urlCut.lastIndexOf('/') + 1, urlCut.length());
r.setType("Index");
} else {
r.setType("Unknown");
}
r.setPosition(-1);
}
else
{
r.setType("GlossaryEntry");
r.setPosition(-5);
}
// Construct resource title from URL
String h = lo.getUrl().substring(lo.getUrl().lastIndexOf("/") + 1, lo.getUrl().length());
h = h.substring(0, h.indexOf('.'));
String f = "";
if (h.length() > 0) {
f = Character.toUpperCase(h.charAt(0)) + "";
}
if (h.length() > 0) {
h = f + h.substring(1);
} else {
logger.debug("URL doesn't match pattern: " + lo.getUrl());
}
r.setTitle(h);
// cutting out supplements
if (r.getUrl().contains("/vsengine/supplement/")) {
r.setType("Supplement");
}
if (r.getUrl().contains("/mindmap/")) {
r.setType("Mindmap");
}
r.setPlatform(this.connector.getPlatformId());
this.newResources.put(r.getUrl(), r);
}
if (this.userHistories.get(logLine.getId()) == null)
{
final ArrayList<LogObject> a = new ArrayList<LogObject>();
a.add(lo);
this.userHistories.put(logLine.getId(), a);
}
else
{
this.userHistories.get(logLine.getId()).add(lo);
}
}
else if (!logLine.isValid()) {
logger.debug("Line doesn't match pattern.");
} else {
logger.debug("Line's timestamp is to old.");
}
}
if (filterLog) {
this.filterServerLogFile();
}
}
finally
{
logger.info("Read " + count + " lines.");
input.close();
}
} catch (final Exception ex)
{
logger.error(ex.getMessage());
}
}
/**
* Writes the data to the database.
*/
public Long save(Session session)
{
final List<Collection<?>> l = new ArrayList<Collection<?>>();
final ArrayList<ResourceLogMining> resourceLogMining = new ArrayList<ResourceLogMining>();
ArrayList<CourseUserMining> courseUserMining = new ArrayList<CourseUserMining>();
final Collection<UserMining> users = this.newUsers.values();
final Collection<IDMappingMining> idmap = this.newIdMapping.values();
logger.info("Found " + users.size() + " users.");
l.add(users);
logger.info("Found " + idmap.size() + " IDMappings.");
l.add(idmap);
for (final ArrayList<LogObject> loadedItem : this.userHistories.values())
{
final HashMap<Long, CourseUserMining> courseUserSingle = new HashMap<Long, CourseUserMining>();
for (int i = 0; i < loadedItem.size(); i++)
{
final ResourceLogMining rl = new ResourceLogMining();
// Set Url for resource-object
if (this.newResources.get(loadedItem.get(i).getUrl()) != null) {
rl.setResource(this.newResources.get(loadedItem.get(i).getUrl()));
} else {
rl.setResource(this.oldResources.get(loadedItem.get(i).getUrl()));
}
rl.setCourse(loadedItem.get(i).getCourse());
rl.setUser(loadedItem.get(i).getUser());
rl.setTimestamp(loadedItem.get(i).getTime());
rl.setDuration(loadedItem.get(i).getDuration());
rl.setAction("View");
rl.setPlatform(this.connector.getPlatformId());
if(rl.getCourse() != null)
{
rl.setId(this.resLogId + 1);
this.resLogId++;
CourseUserMining cu = courseUserSingle.get(rl.getCourse().getId());
if(cu == null)
{
cu = new CourseUserMining();
cu.setCourse(rl.getCourse());
cu.setUser(rl.getUser());
cu.setEnrolend(rl.getTimestamp());
cu.setEnrolstart(rl.getTimestamp());
cu.setRole(this.standardRole);
cu.setPlatform(connector.getPlatformId());
Long id = this.courseUserIdCount + 1;
this.courseUserIdCount = id;
cu.setId(Long.valueOf(connector.getPrefix() + "" + id));
courseUserSingle.put(rl.getCourse().getId(), cu);
}
else
{
if(cu.getEnrolend() < rl.getTimestamp())
cu.setEnrolend(rl.getTimestamp());
if(cu.getEnrolstart() > rl.getTimestamp())
cu.setEnrolstart(rl.getTimestamp());
}
resourceLogMining.add(rl);
}
}
courseUserMining.addAll(courseUserSingle.values());
}
Collections.sort(resourceLogMining);
Long maxLog = 0L;
if(resourceLogMining.size() > 0)
{
maxLog = resourceLogMining.get(resourceLogMining.size() -1 ).getTimestamp();
}
logger.info("Found " + newResources.values().size() + " resources.");
l.add(this.newResources.values());
logger.info("Found " + oldRoles.values().size() + " roles.");
l.add(this.oldRoles.values());
logger.info("Found " + courseUserMining.size() + " courseUsers.");
l.add(courseUserMining);logger.info("Found " + resourceLogMining.size() + " resourceLogs.");
l.add(resourceLogMining);
logger.info("Writing to database...");
if (session.isOpen()) {
this.dbHandler.saveCollectionToDB(session, l);
} else
{
session = this.dbHandler.getMiningSession();
this.dbHandler.saveCollectionToDB(session, l);
}
return maxLog;
}
}