/**
* Copyright 2014 Eediom Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.araqne.logdb.crawler.query;
import java.util.Arrays;
import java.util.Map;
import org.apache.felix.ipojo.annotations.Component;
import org.apache.felix.ipojo.annotations.Invalidate;
import org.apache.felix.ipojo.annotations.Requires;
import org.apache.felix.ipojo.annotations.Validate;
import org.araqne.logdb.AbstractQueryCommandParser;
import org.araqne.logdb.QueryCommand;
import org.araqne.logdb.QueryContext;
import org.araqne.logdb.QueryParseException;
import org.araqne.logdb.QueryParserService;
import org.araqne.logdb.query.parser.ParseResult;
import org.araqne.logdb.query.parser.QueryTokenizer;
/**
* @author xeraph@eediom.com
*/
@Component(name = "wget-query-command-parser")
public class WgetQueryCommandParser extends AbstractQueryCommandParser {
@Requires
private QueryParserService parserService;
public WgetQueryCommandParser() {
setDescriptions("Fetch HTTP content and parse DOM elements.", "HTTP 통신을 통해 웹페이지를 다운로드하고 해석한 결과를 출력합니다.");
setOptions("url", false, "Request URL, starts with http:// or https://", "HTTP 요청할 URL을 입력합니다.");
setOptions("selector", false, "DOM selector", "CSS의 셀렉터와 동일한 문법으로 HTML DOM 트리에서 선택할 요소를 지정합니다.");
setOptions("timeout", false, "HTTP timeout. Default is 30 seconds.", "HTTP 연결 타임아웃 시간을 초 단위로 지정합니다. 미지정 시 기본값은 30초입니다.");
setOptions("encoding", false, "Character encoding. Default is `utf-8`.",
"HTTP 응답 해석에 사용할 인코딩을 지정합니다. 미지정 시 기본값은 utf-8 입니다.");
}
@Override
public String getCommandName() {
return "wget";
}
@Validate
public void start() {
parserService.addCommandParser(this);
}
@Invalidate
public void stop() {
if (parserService != null)
parserService.removeCommandParser(this);
}
@Override
public QueryCommand parse(QueryContext context, String commandString) {
ParseResult r = QueryTokenizer.parseOptions(context, commandString, getCommandName().length(),
Arrays.asList("selector", "timeout", "method", "encoding", "url", "auth"), getFunctionRegistry());
@SuppressWarnings("unchecked")
Map<String, String> options = (Map<String, String>) r.value;
String url = options.get("url");
String selector = options.get("selector");
int timeout = 30;
if (options.get("timeout") != null)
timeout = Integer.valueOf(options.get("timeout"));
String method = options.get("method");
if (method == null)
method = "get";
else if (!method.equals("get") && !method.equals("post"))
throw new QueryParseException("invalid-wget-method", -1);
String encoding = options.get("encoding");
if (encoding == null)
encoding = "utf-8";
String auth = options.get("auth");
return new WgetQueryCommand(url, selector, timeout, method, encoding, auth);
}
}