/*
* This file is part of Flicklib.
*
* Copyright (C) Francis De Brabandere
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.flicklib.service.movie.imdb;
import com.flicklib.domain.MovieType;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Copy of jmoviedb functionality!
* @author francisdb
*/
class ImdbParserRegex {
private String html;
/**
* The default constructor
* @param html - a HTML document
*/
ImdbParserRegex(String html) {
this.html = html;
}
/**
* Returns an array of the movie's genres, if the open document is a movie page.
* @return an array of genres, or an empty array if none were found.
*/
List<String> getGenres() {
/*
* Examples:
* <a href="/Sections/Genres/Crime/">Crime</a>
* <a href="/Sections/Genres/Film-Noir/">Film-Noir</a>
* <a href="/Sections/Genres/Thriller/">Thriller</a>
*/
Pattern patternGenre = Pattern.compile("<a href=\"/Sections/Genres/[^/]+/\">([^<]+)</a>");
Matcher matcherGenre = patternGenre.matcher(html);
List<String> temp = new ArrayList<String>();
while (matcherGenre.find()) {
temp.add(matcherGenre.group(1));
}
return temp;
}
MovieType getType() {
MovieType type = null;
Pattern patternType = Pattern.compile("<h1>(.+)</h1>");
Matcher matcherType = patternType.matcher(html);
if (matcherType.find()) {
String match = matcherType.group(1);
type = getType(match, true);
}
return type;
}
/**
* TODO cleanup put in other class?
* @param header
* @return
*/
static MovieType getType(final String header, final boolean html) {
MovieType type = MovieType.MOVIE;
if (header.contains("(TV)")) {
type = MovieType.TV_MOVIE;
} else if (header.contains("(V)")) {
type = MovieType.VIDEO_MOVIE;
} else if (header.contains("TV mini-series")) {
type = MovieType.MINI_SERIES;
} else if (header.contains("TVseries")) {
type = MovieType.TV_SERIES;
} else if (html && header.startsWith(""")) {
type = MovieType.TV_SERIES;
} else if (!html && header.startsWith("\"")) {
type = MovieType.TV_SERIES;
}
return type;
}
/**
* Remove quote at beginning and end of title for TV-series
* @param title
* @return
*/
static String cleanTitle(String title){
if (title.startsWith("\"") && title.endsWith("\"")) {
title = title.substring(1, title.length() - 1);
}
return title;
}
/**
* Returns the movie's production year, if the open document is a movie page.
* @return a year
*/
int getYear() {
Pattern patternYear = Pattern.compile("<a href=\"/Sections/Years/\\d{4}\">(\\d{4})</a>");
Matcher matcherYear = patternYear.matcher(html);
if (matcherYear.find()) {
return Integer.parseInt(matcherYear.group(1));
}
return 0;
}
}