/** * @(#) CourseSearchParser.java * * This file is part of the Course Scheduler, an open source, cross platform * course scheduling tool, configurable for most universities. * * Copyright (C) 2010-2014 Devyse.io; All rights reserved. * * @license GNU General Public License version 3 (GPLv3) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package io.devyse.scheduler.parse.jsoup.banner; import io.devyse.scheduler.parse.jsoup.AbstractParser; import io.devyse.scheduler.retrieval.CoursePersister; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.ext.XLogger; import org.slf4j.ext.XLoggerFactory; /** * Process the course search results page into separate sub-documents for each course * * @author Mike Reinhold * @since 4.12.4 */ public class CourseSearchParser extends AbstractParser<Void> { /** * Static logger */ private static XLogger logger = XLoggerFactory.getXLogger(CourseSearchParser.class); /** * Serial Version UID */ private static final long serialVersionUID = 1L; /** * The course data persister which will store the course into the data model */ private CoursePersister persister; /** * Course Search results page parser for retrieving Course Data * * @param document the document containing the course search results * @param timeout the socket connection timeout for the course search * @param CoursePersister the course persister callback which saves the data */ public CourseSearchParser(Document document, int timeout, CoursePersister persister){ super(document, timeout); this.persister = persister; } /* (non-Javadoc) * @see io.devyse.scheduler.parse.jsoup.AbstractParser#parse(org.jsoup.nodes.Document) */ protected void parse(Document document){ Set<CourseParser> courseParsers = new HashSet<>(); logger.debug("\n=== Section Listing =============================="); Elements sectionRows = document.select("table.datadisplaytable > tbody > tr:has(th.ddtitle, td.dddefault span)"); logger.debug("Found {} Sections ({} Rows)", sectionRows.size()/2, sectionRows.size()); for(Element row = sectionRows.first(); row != null; row = row.nextElementSibling()){ // Section info is 2 table rows - 1 "header" table row and 1 "detail" table row, each with sub info Element section = row.clone(); row = row.nextElementSibling(); Element sectionDetail = row.clone(); Document sectionDocument = new Document(document.baseUri()); sectionDocument.appendChild(section); sectionDocument.appendChild(sectionDetail); CourseParser courseParser = new CourseParser(sectionDocument, this.getTimeout()); courseParsers.add(courseParser); courseParser.fork(); } //TODO evaluate moving this into the CourseParser instead of here //may improve performance a bit since we don't have to wait for threads to join, //but may limit our ability to track progress int section = 0; for(CourseParser parser : courseParsers){ Map<String, String> result = parser.join(); logger.debug("\n---- Section {}", ++section); persister.persist(result); } } }