/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.csniper.webapp.search.cqp;
import static org.apache.commons.io.IOUtils.closeQuietly;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.dao.DataAccessResourceFailureException;
import de.tudarmstadt.ukp.csniper.webapp.search.CorpusService;
import de.tudarmstadt.ukp.csniper.webapp.search.SearchEngine;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
public class CqpEngine
implements SearchEngine, Serializable
{
private static final long serialVersionUID = -4853538198064093163L;
private static final String REGISTRY = "registry";
private String name;
private File cqpExecutable;
private static String macrosLocation;
private CorpusService corpusService;
@Required
public void setCqpExecutable(File aCqpExecutable)
{
cqpExecutable = aCqpExecutable;
}
public File getCqpExecutable()
{
return cqpExecutable;
}
public void setMacrosLocation(String aMacrosLocation)
{
macrosLocation = aMacrosLocation;
}
public String getMacrosLocation()
{
return macrosLocation;
}
@Override
public void setBeanName(String aName)
{
name = aName;
}
@Override
public String getName()
{
return name;
}
@Override
public void setCorpusService(CorpusService aCorpusService)
{
corpusService = aCorpusService;
}
@Override
public CqpQuery createQuery(String aType, String aCollection, String aQuery)
{
CqpQuery query = null;
try {
query = new CqpQuery(this, aType, aCollection);
query.setContext(1, 1, ContextUnit.SENTENCE);
query.setMacrosLocation(macrosLocation);
query.runQuery(aQuery);
return query;
}
catch (RuntimeException e) {
if (query != null) {
IOUtils.closeQuietly(query);
}
throw e;
}
}
public File getRegistryPath()
{
return new File(corpusService.getRepositoryPath(), REGISTRY);
}
public String getEncoding(String aCollectionId)
{
try {
List<String> lines = FileUtils.readLines(
new File(getRegistryPath(), aCollectionId.toLowerCase()), "UTF-8");
for (String line : lines) {
line = line.toLowerCase();
if (line.startsWith("##:: charset")) {
if (line.contains("iso-8859-1") || line.contains("latin1")) {
return "ISO-8859-1";
}
break;
}
}
return "UTF-8";
}
catch (IOException e) {
throw new DataAccessResourceFailureException("Unable to read registry file", e);
}
}
public static List<CqpMacro> getMacros()
{
List<CqpMacro> macros = new ArrayList<CqpMacro>();
boolean open = false;
CqpMacro currentMacro = null;
String lastComment = "";
InputStream is = null;
try {
is = ResourceUtils.resolveLocation(macrosLocation, null, null).openStream();
for (LineIterator li = IOUtils.lineIterator(is, "UTF-8"); li.hasNext();) {
String line = li.next();
String n = line.toLowerCase().trim();
// comment
if (n.startsWith("#") && !open) {
lastComment = line;
continue;
}
if (n.startsWith("macro") && !open) {
currentMacro = new CqpMacro();
Pattern p = Pattern.compile("MACRO\\s+(\\w+)\\s*\\((\\d+)\\)");
Matcher m = p.matcher(line.trim());
if (m.matches() && m.groupCount() >= 2) {
currentMacro.setName(m.group(1));
currentMacro.setParamCount(Integer.parseInt(m.group(2)));
currentMacro.setComment(lastComment);
currentMacro.setBody(new ArrayList<String>());
}
else {
// throw new
}
continue;
}
if (n.startsWith("(") && !open) {
open = true;
continue;
}
if (n.startsWith(")") && open) {
if (n.startsWith(");") || (li.hasNext() && li.next().trim().startsWith(";"))) {
open = false;
macros.add(currentMacro);
continue;
}
}
if (open) {
currentMacro.getBody().add(line.trim());
}
}
}
catch (IOException e) {
e.printStackTrace();
}
finally {
closeQuietly(is);
}
return macros;
}
}