/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.webui.service.impl; import java.lang.invoke.MethodHandles; import java.sql.SQLException; import java.util.List; import javax.annotation.Resource; import org.apache.nutch.webui.client.NutchClient; import org.apache.nutch.webui.client.NutchClientFactory; import org.apache.nutch.webui.client.impl.CrawlingCycle; import org.apache.nutch.webui.client.impl.RemoteCommandsBatchFactory; import org.apache.nutch.webui.client.impl.CrawlingCycleListener; import org.apache.nutch.webui.client.impl.RemoteCommand; import org.apache.nutch.webui.client.impl.RemoteCommandExecutor; import org.apache.nutch.webui.client.model.Crawl; import org.apache.nutch.webui.client.model.Crawl.CrawlStatus; import org.apache.nutch.webui.model.NutchInstance; import org.apache.nutch.webui.service.CrawlService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import com.j256.ormlite.dao.Dao; @Service public class CrawlServiceImpl implements CrawlService, CrawlingCycleListener { private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); @Resource private Dao<Crawl, Long> crawlDao; @Resource private NutchClientFactory nutchClientFactory; @Resource private RemoteCommandsBatchFactory commandFactory; @Override @Async public void startCrawl(Long crawlId, NutchInstance instance) { Crawl crawl = null; try { crawl = crawlDao.queryForId(crawlId); if(crawl.getCrawlId()==null) { crawl.setCrawlId("crawl-" + crawlId.toString()); } NutchClient client = nutchClientFactory.getClient(instance); String seedDirectory = client.createSeed(crawl.getSeedList()); crawl.setSeedDirectory(seedDirectory); List<RemoteCommand> commands = commandFactory.createCommands(crawl); RemoteCommandExecutor executor = new RemoteCommandExecutor(client); CrawlingCycle cycle = new CrawlingCycle(this, executor, crawl, commands); cycle.executeCrawlCycle(); } catch (Exception e) { crawl.setStatus(CrawlStatus.ERROR); saveCrawl(crawl); LOG.error("exception occured", e); } } @Override public List<Crawl> getCrawls() { try { return crawlDao.queryForAll(); } catch (SQLException e) { throw new RuntimeException(e); } } @Override public void saveCrawl(Crawl crawl) { try { crawlDao.createOrUpdate(crawl); } catch (SQLException e) { throw new RuntimeException(e); } } @Override public void deleteCrawl(Long crawlId) { try { crawlDao.deleteById(crawlId); } catch (SQLException e) { throw new RuntimeException(e); } } @Override public void crawlingStarted(Crawl crawl) { crawl.setStatus(CrawlStatus.CRAWLING); crawl.setProgress(0); saveCrawl(crawl); } @Override public void onCrawlError(Crawl crawl, String msg) { crawl.setStatus(CrawlStatus.ERROR); saveCrawl(crawl); } @Override public void commandExecuted(Crawl crawl, RemoteCommand command, int progress) { crawl.setProgress(progress); saveCrawl(crawl); } @Override public void crawlingFinished(Crawl crawl) { crawl.setStatus(CrawlStatus.FINISHED); saveCrawl(crawl); } }