/*
* #!
* Ontopia Classify
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.topicmaps.classify;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.ServletContext;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import net.ontopia.topicmaps.core.AssociationIF;
import net.ontopia.topicmaps.core.TopicIF;
import net.ontopia.topicmaps.core.TopicMapBuilderIF;
import net.ontopia.topicmaps.core.TopicMapIF;
import net.ontopia.topicmaps.core.TopicMapStoreIF;
import net.ontopia.topicmaps.nav2.utils.ContextUtils;
import net.ontopia.topicmaps.nav2.utils.NavigatorUtils;
import net.ontopia.topicmaps.query.core.ParsedQueryIF;
import net.ontopia.topicmaps.query.core.QueryProcessorIF;
import net.ontopia.topicmaps.query.core.QueryResultIF;
import net.ontopia.topicmaps.query.utils.QueryUtils;
import net.ontopia.topicmaps.utils.DuplicateSuppressionUtils;
import net.ontopia.topicmaps.utils.TopicStringifiers;
import net.ontopia.utils.ObjectUtils;
import net.ontopia.utils.OntopiaRuntimeException;
import net.ontopia.utils.StringUtils;
/**
* INTERNAL:
*/
public class WebChew {
HttpServletRequest request;
HttpServletResponse response;
int visibleRows = Integer.MAX_VALUE;
String redirectURI; // link back to topic page
public WebChew(HttpServletRequest request, HttpServletResponse response) {
this.request = request;
this.response = response;
}
public void setVisibleRows(int visibleRows) {
this.visibleRows = visibleRows;
}
public void setRedirectURI(String redirectURI) {
this.redirectURI = redirectURI;
}
public void processForm() {
HttpSession session = request.getSession(true);
String tmckey = getClassificationKey();
// reclassify
if (request.getParameter("reclassify") != null) {
session.removeAttribute(tmckey);
}
// black list selected terms
String blacklisted = request.getParameter("blacklisted");
if (blacklisted != null && blacklisted.length() > 0) {
BlackList bl = getBlackList();
bl.addStopWord(blacklisted);
bl.save();
}
// remove selected association
String removeAssociation = request.getParameter("removeAssociation");
if (removeAssociation != null) {
// process form data
try {
TopicMapStoreIF store = NavigatorUtils.getTopicMapRepository(session.getServletContext()).getReferenceByKey(request.getParameter("tm")).createStore(false);
try {
TopicMapIF topicmap = store.getTopicMap();
AssociationIF assoc = (AssociationIF)topicmap.getObjectById(removeAssociation);
if (assoc != null)
assoc.remove();
store.commit();
} finally {
store.close();
}
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
}
String[] selected = request.getParameterValues("selected");
if (request.getParameter("ok") != null ||
request.getParameter("cancel") != null) {
try {
// if ok pressed process form
if (request.getParameter("ok") != null) {
// create associations; look up existing classified document in session
TopicMapClassification tmc = (TopicMapClassification)session.getAttribute(tmckey);
if (tmc == null) return;
// process form data
TopicMapStoreIF store = NavigatorUtils.getTopicMapRepository(session.getServletContext()).getReferenceByKey(request.getParameter("tm")).createStore(false);
try {
TopicMapIF topicmap = store.getTopicMap();
TopicMapBuilderIF builder = topicmap.getBuilder();
// get document topic
TopicIF dtopic = (TopicIF)topicmap.getObjectById(request.getParameter("id"));
if (selected != null && selected.length > 0) {
for (int i=0; i < selected.length; i++) {
String termid = selected[i];
String at = request.getParameter("at-" + termid);
if (at == null || at.equals("-")) continue;
String cn = request.getParameter("cn-" + termid);
String ct = request.getParameter("ct-" + termid);
if (ct == null || ct.equals("-")) continue;
// create new candidate topic
TopicIF ctopic;
if (ct.startsWith("new:")) {
String ctoid = ct.substring("new:".length());
TopicIF ctype = (TopicIF)topicmap.getObjectById(ctoid);
if (ctype == null)
throw new OntopiaRuntimeException("Cannot find topic type: " + ct + " " + ctoid);
ctopic = builder.makeTopic(ctype);
builder.makeTopicName(ctopic, cn);
} else if (ct.equals("-")) {
continue; // ignore
} else {
ctopic = (TopicIF)topicmap.getObjectById(ct);
}
// create association
String[] at_data = StringUtils.split(at, ":");
if (at_data.length != 3) continue;
TopicIF atype = (TopicIF)topicmap.getObjectById(at_data[0]);
if (atype == null)
throw new OntopiaRuntimeException("Cannot find association type: " + at);
TopicIF drtype = (TopicIF)topicmap.getObjectById(at_data[1]);
if (drtype == null)
throw new OntopiaRuntimeException("Cannot find document roletype: " + at_data[1]);
TopicIF crtype = (TopicIF)topicmap.getObjectById(at_data[2]);
if (crtype == null)
throw new OntopiaRuntimeException("Cannot find concept roletype: " + at_data[2]);
AssociationIF assoc = builder.makeAssociation(atype);
builder.makeAssociationRole(assoc, drtype, dtopic);
builder.makeAssociationRole(assoc, crtype, ctopic);
}
// remove duplicate associations
DuplicateSuppressionUtils.removeDuplicateAssociations(dtopic);
store.commit();
}
} finally {
store.close();
}
}
// clear classication
session.removeAttribute(tmckey);
// redirect back to instance page
response.sendRedirect(redirectURI);
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
}
}
private BlackList getBlackList() {
HttpSession session = request.getSession(true);
String blkey = "webchew-blacklist-" + request.getParameter("tm");
synchronized (session) {
BlackList bl = (BlackList)session.getAttribute(blkey);
if (bl == null) {
bl = new BlackList(new File(System.getProperty("user.home") + "/.oks/classify/blacklist." + request.getParameter("tm")));
session.setAttribute(blkey, bl);
}
return bl;
}
}
private String getClassificationKey() {
return "webchew-" + request.getParameter("tm") + ":" + request.getParameter("id");
}
public WebClassification getClassification() {
try {
// look up existing classified document in session
HttpSession session = request.getSession(true);
String tmckey = getClassificationKey();
TopicMapClassification tmc = (TopicMapClassification)session.getAttribute(tmckey);
if (tmc == null) {
// use document repository
TopicMapIF topicmap = ContextUtils.getTopicMap(request);
TopicIF topic = (TopicIF)topicmap.getObjectById(request.getParameter("id"));
// get content via plug-in
ClassifyPluginIF cp = WebChew.getPlugin(request);
ClassifiableContentIF cc = cp.getClassifiableContent(topic);
// if no plug-in content then delegate to file upload
if (cc == null)
cc = ClassifyUtils.getFileUploadContent(request);
// classify content
if (cc != null) {
tmc = classifyContent(cc, topicmap);
session.setAttribute(tmckey, tmc);
}
}
return (tmc == null ? null : new WebClassification(tmc));
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
}
/**
* INTERNAL: Returns the plug-in class instance used by the ontopoly
* plugin. Used by classify/plugin.jsp.
*/
public static ClassifyPluginIF getPlugin(HttpServletRequest request) {
// create plugin by dynamically intantiating plugin class
HttpSession session = request.getSession(true);
ServletContext scontext = session.getServletContext();
String pclass = scontext.getInitParameter("classify_plugin");
if (pclass == null)
pclass = "net.ontopia.topicmaps.classify.DefaultPlugin";
ClassifyPluginIF cp = (ClassifyPluginIF)ObjectUtils.newInstance(pclass);
if (cp instanceof HttpServletRequestAwareIF)
((HttpServletRequestAwareIF)cp).setRequest(request);
return cp;
}
private TopicMapClassification classifyContent(ClassifiableContentIF cc, TopicMapIF topicmap) {
try {
TopicMapClassification tmc = new TopicMapClassification(topicmap);
BlackList bl = getBlackList();
if (bl != null) tmc.setCustomTermAnalyzer(bl);
tmc.classify(cc);
return tmc;
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
}
public class WebClassification {
TopicMapClassification tmc;
List<WebTerm> topterms;
WebClassification(TopicMapClassification tmc) {
this.tmc = tmc;
// get top terms
Term[] terms = tmc.getTermDatabase().getTermsByRank();
topterms = new ArrayList<WebTerm>(visibleRows);
// ignore black listed terms
BlackList bl = getBlackList();
for (int i=0; i < terms.length && topterms.size() < visibleRows; i++) {
Term term = terms[i];
if (bl == null || !bl.isStopWord(term.getPreferredName())) {
topterms.add(new WebTerm(this, term, i));
}
}
}
public List<WebTerm> getTerms() {
return topterms;
}
public Collection<TopicIF> getCandidateTypes() {
return tmc.getCandidateTypes();
}
public Collection<TopicMapAnalyzer.AssociationType> getAssociationTypes() {
return tmc.getAssociationTypes();
}
public Collection<ExistingAssociation> getExistingAssociations() {
Collection<ExistingAssociation> result = new ArrayList<ExistingAssociation>();
try {
TopicMapIF topicmap = ContextUtils.getTopicMap(request);
QueryProcessorIF qp = QueryUtils.getQueryProcessor(topicmap);
ParsedQueryIF pq = qp.parse("select $A, $O from role-player($R1, %TOPIC%), type($R1, %CRTYPE%), association-role($A, $R1), type($A, %ATYPE%), association-role($A, $R2), $R1 /= $R2, type($R2, %PRTYPE%), role-player($R2, $O)?");
Map<String, Object> params = new HashMap<String, Object>();
params.put("TOPIC", topicmap.getObjectById(request.getParameter("id")));
for (TopicMapAnalyzer.AssociationType _atype : getAssociationTypes()) {
TopicIF atype = (TopicIF)topicmap.getObjectById(_atype.getAssociationTypeId());
TopicIF crtype = (TopicIF)topicmap.getObjectById(_atype.getContentRoleTypeId());
TopicIF prtype = (TopicIF)topicmap.getObjectById(_atype.getTopicRoleTypeId());
params.put("ATYPE", atype);
params.put("CRTYPE", crtype);
params.put("PRTYPE", prtype);
QueryResultIF qr = pq.execute(params);
while (qr.next()) {
AssociationIF assoc = (AssociationIF)qr.getValue(0);
TopicIF player = (TopicIF)qr.getValue(1);
ExistingAssociation x = new ExistingAssociation();
x.associationId = assoc.getObjectId();
x.associationName = TopicStringifiers.toString(atype, crtype);
x.associatedTopicName = TopicStringifiers.toString(player);
result.add(x);
}
}
} catch (Exception e) {
throw new OntopiaRuntimeException(e);
}
return result;
}
}
public class ExistingAssociation {
protected String associationId;
protected String associationName;
protected String associatedTopicName;
public String getAssociationId() {
return associationId;
}
public String getAssociationName() {
return associationName;
}
public String getAssociatedTopicName() {
return associatedTopicName;
}
}
public class WebTerm {
WebClassification wc;
Term term;
int sequenceId;
List<TopicIF> candidates;
WebTerm(WebClassification wc, Term term, int sequenceId) {
this.wc = wc;
this.term = term;
this.sequenceId = sequenceId;
this.candidates = new ArrayList<TopicIF>();
Variant[] variants = term.getVariantsByRank();
for (int i=0; i < variants.length; i++) {
for (TopicIF c : wc.tmc.getTopics(variants[i])) {
if (!candidates.contains(c))
candidates.add(c);
}
}
}
public String getId() {
return term.getStem();
}
public int getSequenceId() {
return sequenceId;
}
public boolean getSelected() {
String id = getId();
String[] selected = request.getParameterValues("selected");
if (selected != null && selected.length > 0) {
for (int i=0; i < selected.length; i++) {
if (id.equals(selected[i])) return true;
}
}
if (selected == null || selected.length == 0)
return getHasCandidateTopics() && getDefaultAssociationType() != null;
else
return false;
}
public String getNameField() {
return "cn-" + getId();
}
public String getNameValue() {
String value = request.getParameter(getNameField());
if (value != null)
return value;
else
return term.getPreferredName();
}
public String getNameTitle() {
return term.getStem() + ": " + StringUtils.join(term.getVariants(), " | ");
}
public double getScore() {
return term.getScore();
}
public double getScorePercent() {
return (100d*term.getScore());
}
public int getOccurrences() {
return term.getOccurrences();
}
public String getCandidateTopicField() {
return "ct-" + getId();
}
public boolean getHasCandidateTopics() {
return !candidates.isEmpty();
}
public Collection<TopicIF> getCandidateTopics() {
return candidates;
}
public String getAssociationTypeField() {
return "at-" + getId();
}
public TopicMapAnalyzer.AssociationType getDefaultAssociationType() {
// select association type if score is lower than term score and lower than highest score
boolean hasCandidates = getHasCandidateTopics();
double tscore = getScore();
TopicMapAnalyzer.AssociationType ttype = null;
double ttscore = -1.0d;
//! System.out.println("|----------------------------");
for (TopicMapAnalyzer.AssociationType xtype : wc.getAssociationTypes()) {
double xscore = xtype.getScoreThreshold(hasCandidates);
//! System.out.println("AT: " + xtype.atype + " " + xscore + "->" + tscore);
if (xscore >= 0 && tscore > xscore &&
(ttype == null || ttscore < xscore)) {
ttype = xtype;
ttscore = ttype.getScoreThreshold(hasCandidates);
}
//! System.out.println(" " + (ttype == null ? null : ttype.atype) + " " + (ttype == null ? -1.0d : ttype.getScoreThreshold(hasCandidates)));
}
//! System.out.println("TT: " + ttype + " " + (ttype == null ? -1.0d : ttype.getScoreThreshold(hasCandidates)));
//! System.out.println("|----------------------------");
return ttype;
}
}
}