/** * License Agreement for OpenSearchServer * * Copyright (C) 2010-2013 Emmanuel Keller / Jaeksoft * * http://www.open-search-server.com * * This file is part of OpenSearchServer. * * OpenSearchServer is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * OpenSearchServer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenSearchServer. * If not, see <http://www.gnu.org/licenses/>. **/ package com.jaeksoft.searchlib.scheduler.task; import java.io.File; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.security.NoSuchAlgorithmException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.stream.StreamSource; import javax.xml.xpath.XPathExpressionException; import org.w3c.dom.Node; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.jaeksoft.searchlib.Client; import com.jaeksoft.searchlib.Logging; import com.jaeksoft.searchlib.SearchLibException; import com.jaeksoft.searchlib.config.Config; import com.jaeksoft.searchlib.crawler.web.database.CredentialItem; import com.jaeksoft.searchlib.crawler.web.database.CredentialItem.CredentialType; import com.jaeksoft.searchlib.crawler.web.spider.DownloadItem; import com.jaeksoft.searchlib.crawler.web.spider.HttpDownloader; import com.jaeksoft.searchlib.scheduler.TaskAbstract; import com.jaeksoft.searchlib.scheduler.TaskLog; import com.jaeksoft.searchlib.scheduler.TaskProperties; import com.jaeksoft.searchlib.scheduler.TaskPropertyDef; import com.jaeksoft.searchlib.scheduler.TaskPropertyType; import com.jaeksoft.searchlib.util.DomUtils; import com.jaeksoft.searchlib.util.Variables; public class TaskXmlLoad extends TaskAbstract { final private TaskPropertyDef propUri = new TaskPropertyDef(TaskPropertyType.textBox, "URI", "Uri", null, 100); final private TaskPropertyDef propLogin = new TaskPropertyDef(TaskPropertyType.textBox, "Login", "Login", null, 50); final private TaskPropertyDef propPassword = new TaskPropertyDef(TaskPropertyType.password, "Password", "Password", null, 20); final private TaskPropertyDef propUserAgent = new TaskPropertyDef(TaskPropertyType.textBox, "User agent", "UserAgent", null, 20); final private TaskPropertyDef propBuffersize = new TaskPropertyDef(TaskPropertyType.textBox, "Buffer size", "Buffer size", null, 10); final private TaskPropertyDef propXsl = new TaskPropertyDef(TaskPropertyType.multilineTextBox, "XSL", "XSL", null, 100, 30); final private TaskPropertyDef[] taskPropertyDefs = { propUri, propLogin, propPassword, propUserAgent, propBuffersize, propXsl }; @Override public String getName() { return "XML load"; } @Override public TaskPropertyDef[] getPropertyList() { return taskPropertyDefs; } @Override public String[] getPropertyValues(Config config, TaskPropertyDef propertyDef, TaskProperties taskProperties) throws SearchLibException { return null; } @Override public String getDefaultValue(Config config, TaskPropertyDef propertyDef) { if (propertyDef == propBuffersize) return "50"; else if (propertyDef == propUserAgent) try { return config.getWebPropertyManager().getUserAgent().getValue(); } catch (IOException e) { Logging.error(e); } return null; } @Override public void execute(Client client, TaskProperties properties, Variables variables, TaskLog taskLog) throws SearchLibException, IOException { String uriString = properties.getValue(propUri); String login = properties.getValue(propLogin); String password = properties.getValue(propPassword); String p = properties.getValue(propBuffersize); String xsl = properties.getValue(propXsl); String userAgent = properties.getValue(propUserAgent); File xmlTempResult = null; int bufferSize = 50; if (p != null && p.length() > 0) bufferSize = Integer.parseInt(p); HttpDownloader httpDownloader = client.getWebCrawlMaster().getNewHttpDownloader(true, userAgent, false); try { URI uri = new URI(uriString); CredentialItem credentialItem = null; if (login != null && password != null) credentialItem = new CredentialItem(CredentialType.BASIC_DIGEST, null, login, password, null, null); DownloadItem downloadItem = httpDownloader.get(uri, credentialItem); downloadItem.checkNoErrorList(200); Node xmlDoc = null; if (xsl != null && xsl.length() > 0) { xmlTempResult = File.createTempFile("ossupload", ".xml"); DomUtils.xslt(new StreamSource(downloadItem.getContentInputStream()), xsl, xmlTempResult); xmlDoc = DomUtils.readXml(new StreamSource(xmlTempResult), false); } else xmlDoc = DomUtils.readXml(new InputSource(downloadItem.getContentInputStream()), false); client.updateXmlDocuments(xmlDoc, bufferSize, credentialItem, httpDownloader, taskLog); client.deleteXmlDocuments(xmlDoc, bufferSize, taskLog); } catch (XPathExpressionException e) { throw new SearchLibException(e); } catch (NoSuchAlgorithmException e) { throw new SearchLibException(e); } catch (ParserConfigurationException e) { throw new SearchLibException(e); } catch (SAXException e) { throw new SearchLibException(e); } catch (IOException e) { throw new SearchLibException(e); } catch (URISyntaxException e) { throw new SearchLibException(e); } catch (InstantiationException e) { throw new SearchLibException(e); } catch (IllegalAccessException e) { throw new SearchLibException(e); } catch (ClassNotFoundException e) { throw new SearchLibException(e); } catch (TransformerException e) { throw new SearchLibException(e); } finally { if (xmlTempResult != null) xmlTempResult.delete(); httpDownloader.release(); } } }