/******************************************************************************* * Copyright 2013 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.csniper.webapp.search.cqp; import static org.apache.commons.io.IOUtils.closeQuietly; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.LineIterator; import org.springframework.beans.factory.annotation.Required; import org.springframework.dao.DataAccessResourceFailureException; import de.tudarmstadt.ukp.csniper.webapp.search.CorpusService; import de.tudarmstadt.ukp.csniper.webapp.search.SearchEngine; import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils; public class CqpEngine implements SearchEngine, Serializable { private static final long serialVersionUID = -4853538198064093163L; private static final String REGISTRY = "registry"; private String name; private File cqpExecutable; private static String macrosLocation; private CorpusService corpusService; @Required public void setCqpExecutable(File aCqpExecutable) { cqpExecutable = aCqpExecutable; } public File getCqpExecutable() { return cqpExecutable; } public void setMacrosLocation(String aMacrosLocation) { macrosLocation = aMacrosLocation; } public String getMacrosLocation() { return macrosLocation; } @Override public void setBeanName(String aName) { name = aName; } @Override public String getName() { return name; } @Override public void setCorpusService(CorpusService aCorpusService) { corpusService = aCorpusService; } @Override public CqpQuery createQuery(String aType, String aCollection, String aQuery) { CqpQuery query = null; try { query = new CqpQuery(this, aType, aCollection); query.setContext(1, 1, ContextUnit.SENTENCE); query.setMacrosLocation(macrosLocation); query.runQuery(aQuery); return query; } catch (RuntimeException e) { if (query != null) { IOUtils.closeQuietly(query); } throw e; } } public File getRegistryPath() { return new File(corpusService.getRepositoryPath(), REGISTRY); } public String getEncoding(String aCollectionId) { try { List<String> lines = FileUtils.readLines( new File(getRegistryPath(), aCollectionId.toLowerCase()), "UTF-8"); for (String line : lines) { line = line.toLowerCase(); if (line.startsWith("##:: charset")) { if (line.contains("iso-8859-1") || line.contains("latin1")) { return "ISO-8859-1"; } break; } } return "UTF-8"; } catch (IOException e) { throw new DataAccessResourceFailureException("Unable to read registry file", e); } } public static List<CqpMacro> getMacros() { List<CqpMacro> macros = new ArrayList<CqpMacro>(); boolean open = false; CqpMacro currentMacro = null; String lastComment = ""; InputStream is = null; try { is = ResourceUtils.resolveLocation(macrosLocation, null, null).openStream(); for (LineIterator li = IOUtils.lineIterator(is, "UTF-8"); li.hasNext();) { String line = li.next(); String n = line.toLowerCase().trim(); // comment if (n.startsWith("#") && !open) { lastComment = line; continue; } if (n.startsWith("macro") && !open) { currentMacro = new CqpMacro(); Pattern p = Pattern.compile("MACRO\\s+(\\w+)\\s*\\((\\d+)\\)"); Matcher m = p.matcher(line.trim()); if (m.matches() && m.groupCount() >= 2) { currentMacro.setName(m.group(1)); currentMacro.setParamCount(Integer.parseInt(m.group(2))); currentMacro.setComment(lastComment); currentMacro.setBody(new ArrayList<String>()); } else { // throw new } continue; } if (n.startsWith("(") && !open) { open = true; continue; } if (n.startsWith(")") && open) { if (n.startsWith(");") || (li.hasNext() && li.next().trim().startsWith(";"))) { open = false; macros.add(currentMacro); continue; } } if (open) { currentMacro.getBody().add(line.trim()); } } } catch (IOException e) { e.printStackTrace(); } finally { closeQuietly(is); } return macros; } }