/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.source.ambient;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.*;
import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.IntRange;
/**
* Serves documents from the ODP239 test set. For more details, please see:
* http://credo.fub.it/odp239/.
*/
@Bindable(prefix = "Odp239DocumentSource", inherit = CommonAttributes.class)
public class Odp239DocumentSource extends FubDocumentSource
{
static final FubTestCollection DATA = new FubTestCollection("/odp239");
static final int TOPIC_COUNT = 239;
static final int MAX_RESULTS_PER_TOPIC = 1000;
/**
* ODP239 Topic. The ODP239 Topic to load documents from.
*/
@Input
@Processing
@Attribute
@Required
@Group(TOPIC_ID)
@Level(AttributeLevel.BASIC)
public Odp239Topic topic = Odp239Topic.ARTS_ANIMATION;
@Processing
@Input
@Attribute(key = AttributeNames.RESULTS, inherit = true)
@IntRange(min = 1, max = MAX_RESULTS_PER_TOPIC)
public int results = MAX_RESULTS_PER_TOPIC;
@Processing
@Output
@Attribute(key = AttributeNames.RESULTS_TOTAL, inherit = true)
public long resultsTotal = MAX_RESULTS_PER_TOPIC;
/**
* All available ODP239 topics.
*/
public static enum Odp239Topic
{
ARTS_ANIMATION(1, "Arts > Animation"),
ARTS_ARCHITECTURE(2, "Arts > Architecture"),
ARTS_BODYART(3, "Arts > Bodyart"),
ARTS_COMICS(4, "Arts > Comics"),
ARTS_CRAFTS(5, "Arts > Crafts"),
ARTS_EDUCATION(6, "Arts > Education"),
ARTS_ILLUSTRATION(7, "Arts > Illustration"),
ARTS_LITERATURE(8, "Arts > Literature"),
ARTS_MOVIES(9, "Arts > Movies"),
ARTS_MUSIC(10, "Arts > Music"),
ARTS_ONLINE_WRITING(11, "Arts > Online Writing"),
ARTS_PEOPLE(12, "Arts > People"),
ARTS_PERFORMING_ARTS(13, "Arts > Performing Arts"),
ARTS_PHOTOGRAPHY(14, "Arts > Photography"),
ARTS_RADIO(15, "Arts > Radio"),
ARTS_TELEVISION(16, "Arts > Television"),
ARTS_VIDEO(17, "Arts > Video"),
ARTS_VISUAL_ARTS(18, "Arts > Visual Arts"),
ARTS_WRITERS_RESOURCES(19, "Arts > Writers Resources"),
BUSINESS_AGRICULTURE_AND_FORESTRY(20, "Business > Agriculture and Forestry"),
BUSINESS_ARTS_AND_ENTERTAINMENT(21, "Business > Arts and Entertainment"),
BUSINESS_AUTOMOTIVE(22, "Business > Automotive"),
BUSINESS_BUSINESS_SERVICES(23, "Business > Business Services"),
BUSINESS_CHEMICALS(24, "Business > Chemicals"),
BUSINESS_CONSTRUCTION_AND_MAINTENANCE(25, "Business > Construction and Maintenance"),
BUSINESS_CONSUMER_GOODS_AND_SERVICES(26, "Business > Consumer Goods and Services"),
BUSINESS_ECOMMERCE(27, "Business > E-Commerce"),
BUSINESS_EDUCATION_AND_TRAINING(28, "Business > Education and Training"),
BUSINESS_ELECTRONICS_AND_ELECTRICAL(29, "Business > Electronics and Electrical"),
BUSINESS_ENERGY(30, "Business > Energy"),
BUSINESS_FINANCIAL_SERVICES(31, "Business > Financial Services"),
BUSINESS_FOOD_AND_RELATED_PRODUCTS(32, "Business > Food and Related Products"),
BUSINESS_HEALTHCARE(33, "Business > Healthcare"),
BUSINESS_HOSPITALITY(34, "Business > Hospitality"),
BUSINESS_HUMAN_RESOURCES(35, "Business > Human Resources"),
BUSINESS_INDUSTRIAL_GOODS_AND_SERVICES(36, "Business > Industrial Goods and Services"),
BUSINESS_INFORMATION_TECHNOLOGY(37, "Business > Information Technology"),
BUSINESS_INVESTING(38, "Business > Investing"),
BUSINESS_MANAGEMENT(39, "Business > Management"),
BUSINESS_MARKETING_AND_ADVERTISING(40, "Business > Marketing and Advertising"),
BUSINESS_MATERIALS(41, "Business > Materials"),
BUSINESS_OPPORTUNITIES(42, "Business > Opportunities"),
BUSINESS_REAL_ESTATE(43, "Business > Real Estate"),
BUSINESS_RETAIL_TRADE(44, "Business > Retail Trade"),
BUSINESS_SMALL_BUSINESS(45, "Business > Small Business"),
BUSINESS_TELECOMMUNICATIONS(46, "Business > Telecommunications"),
BUSINESS_TEXTILES_AND_NONWOVENS(47, "Business > Textiles and Nonwovens"),
BUSINESS_TRANSPORTATION_AND_LOGISTICS(48, "Business > Transportation and Logistics"),
COMPUTERS_ALGORITHMS(49, "Computers > Algorithms"),
COMPUTERS_ARTIFICIAL_INTELLIGENCE(50, "Computers > Artificial Intelligence"),
COMPUTERS_ARTIFICIAL_LIFE(51, "Computers > Artificial Life"),
COMPUTERS_CAD_AND_CAM(52, "Computers > CAD and CAM"),
COMPUTERS_COMPANIES(53, "Computers > Companies"),
COMPUTERS_COMPUTER_SCIENCE(54, "Computers > Computer Science"),
COMPUTERS_CONSULTANTS(55, "Computers > Consultants"),
COMPUTERS_DATA_COMMUNICATIONS(56, "Computers > Data Communications"),
COMPUTERS_DATA_FORMATS(57, "Computers > Data Formats"),
COMPUTERS_EMULATORS(58, "Computers > Emulators"),
COMPUTERS_GRAPHICS(59, "Computers > Graphics"),
COMPUTERS_HACKING(60, "Computers > Hacking"),
COMPUTERS_HARDWARE(61, "Computers > Hardware"),
COMPUTERS_INTERNET(62, "Computers > Internet"),
COMPUTERS_MOBILE_COMPUTING(63, "Computers > Mobile Computing"),
COMPUTERS_MULTIMEDIA(64, "Computers > Multimedia"),
COMPUTERS_OPEN_SOURCE(65, "Computers > Open Source"),
COMPUTERS_PARALLEL_COMPUTING(66, "Computers > Parallel Computing"),
COMPUTERS_PROGRAMMING(67, "Computers > Programming"),
COMPUTERS_ROBOTICS(68, "Computers > Robotics"),
COMPUTERS_SECURITY(69, "Computers > Security"),
COMPUTERS_SOFTWARE(70, "Computers > Software"),
COMPUTERS_SPEECH_TECHNOLOGY(71, "Computers > Speech Technology"),
COMPUTERS_SYSTEMS(72, "Computers > Systems"),
COMPUTERS_USENET(73, "Computers > Usenet"),
COMPUTERS_VIRTUAL_REALITY(74, "Computers > Virtual Reality"),
GAMES_BOARD_GAMES(75, "Games > Board Games"),
GAMES_GAMBLING(76, "Games > Gambling"),
GAMES_MINIATURES(77, "Games > Miniatures"),
GAMES_ROLEPLAYING(78, "Games > Roleplaying"),
GAMES_TRADING_CARD_GAMES(79, "Games > Trading Card Games"),
GAMES_VIDEO_GAMES(80, "Games > Video Games"),
HEALTH_ALTERNATIVE(81, "Health > Alternative"),
HEALTH_ANIMAL(82, "Health > Animal"),
HEALTH_BEAUTY(83, "Health > Beauty"),
HEALTH_CHILD_HEALTH(84, "Health > Child Health"),
HEALTH_CONDITIONS_AND_DISEASES(85, "Health > Conditions and Diseases"),
HEALTH_DENTISTRY(86, "Health > Dentistry"),
HEALTH_FITNESS(87, "Health > Fitness"),
HEALTH_MEDICINE(88, "Health > Medicine"),
HEALTH_MENTAL_HEALTH(89, "Health > Mental Health"),
HEALTH_NURSING(90, "Health > Nursing"),
HEALTH_NUTRITION(91, "Health > Nutrition"),
HEALTH_OCCUPATIONAL_HEALTH_AND_SAFETY(92, "Health > Occupational Health and Safety"),
HEALTH_PROFESSIONS(93, "Health > Professions"),
HEALTH_PUBLIC_HEALTH_AND_SAFETY(94, "Health > Public Health and Safety"),
HEALTH_REPRODUCTIVE_HEALTH(95, "Health > Reproductive Health"),
HEALTH_SENIOR_HEALTH(96, "Health > Senior Health"),
HEALTH_WOMENS_HEALTH(97, "Health > Women's Health"),
HOME_CONSUMER_INFORMATION(98, "Home > Consumer Information"),
HOME_COOKING(99, "Home > Cooking"),
HOME_FAMILY(100, "Home > Family"),
HOME_GARDENING(101, "Home > Gardening"),
HOME_HOME_IMPROVEMENT(102, "Home > Home Improvement"),
HOME_PERSONAL_FINANCE(103, "Home > Personal Finance"),
KIDS_AND_TEENS_ARTS(104, "Kids and Teens > Arts"),
KIDS_AND_TEENS_ENTERTAINMENT(105, "Kids and Teens > Entertainment"),
KIDS_AND_TEENS_GAMES(106, "Kids and Teens > Games"),
KIDS_AND_TEENS_HEALTH(107, "Kids and Teens > Health"),
KIDS_AND_TEENS_INTERNATIONAL(108, "Kids and Teens > International"),
KIDS_AND_TEENS_PEOPLE_AND_SOCIETY(109, "Kids and Teens > People and Society"),
KIDS_AND_TEENS_PRESCHOOL(110, "Kids and Teens > Pre-School"),
KIDS_AND_TEENS_SCHOOL_TIME(111, "Kids and Teens > School Time"),
KIDS_AND_TEENS_SPORTS_AND_HOBBIES(112, "Kids and Teens > Sports and Hobbies"),
KIDS_AND_TEENS_TEEN_LIFE(113, "Kids and Teens > Teen Life"),
NEWS_MEDIA(114, "News > Media"),
NEWS_NEWSPAPERS(115, "News > Newspapers"),
NEWS_WEATHER(116, "News > Weather"),
RECREATION_ANTIQUES(117, "Recreation > Antiques"),
RECREATION_AUDIO(118, "Recreation > Audio"),
RECREATION_AUTOS(119, "Recreation > Autos"),
RECREATION_AVIATION(120, "Recreation > Aviation"),
RECREATION_BIRDING(121, "Recreation > Birding"),
RECREATION_BOATING(122, "Recreation > Boating"),
RECREATION_CAMPS(123, "Recreation > Camps"),
RECREATION_CLIMBING(124, "Recreation > Climbing"),
RECREATION_COLLECTING(125, "Recreation > Collecting"),
RECREATION_FOOD(126, "Recreation > Food"),
RECREATION_GUNS(127, "Recreation > Guns"),
RECREATION_HUMOR(128, "Recreation > Humor"),
RECREATION_KITES(129, "Recreation > Kites"),
RECREATION_LIVING_HISTORY(130, "Recreation > Living History"),
RECREATION_MODELS(131, "Recreation > Models"),
RECREATION_MOTORCYCLES(132, "Recreation > Motorcycles"),
RECREATION_OUTDOORS(133, "Recreation > Outdoors"),
RECREATION_PETS(134, "Recreation > Pets"),
RECREATION_ROADS_AND_HIGHWAYS(135, "Recreation > Roads and Highways"),
RECREATION_SCOUTING(136, "Recreation > Scouting"),
RECREATION_THEME_PARKS(137, "Recreation > Theme Parks"),
RECREATION_TOBACCO(138, "Recreation > Tobacco"),
RECREATION_TRAINS_AND_RAILROADS(139, "Recreation > Trains and Railroads"),
REFERENCE_ARCHIVES(140, "Reference > Archives"),
REFERENCE_DICTIONARIES(141, "Reference > Dictionaries"),
REFERENCE_EDUCATION(142, "Reference > Education"),
REFERENCE_KNOWLEDGE_MANAGEMENT(143, "Reference > Knowledge Management"),
REFERENCE_LIBRARIES(144, "Reference > Libraries"),
REFERENCE_MAPS(145, "Reference > Maps"),
REFERENCE_MUSEUMS(146, "Reference > Museums"),
REFERENCE_QUOTATIONS(147, "Reference > Quotations"),
SCIENCE_AGRICULTURE(148, "Science > Agriculture"),
SCIENCE_ANOMALIES_AND_ALTERNATIVE_SCIENCE(149, "Science > Anomalies and Alternative Science"),
SCIENCE_ASTRONOMY(150, "Science > Astronomy"),
SCIENCE_BIOLOGY(151, "Science > Biology"),
SCIENCE_CHEMISTRY(152, "Science > Chemistry"),
SCIENCE_EARTH_SCIENCES(153, "Science > Earth Sciences"),
SCIENCE_EDUCATIONAL_RESOURCES(154, "Science > Educational Resources"),
SCIENCE_ENVIRONMENT(155, "Science > Environment"),
SCIENCE_INSTRUMENTS_AND_SUPPLIES(156, "Science > Instruments and Supplies"),
SCIENCE_MATH(157, "Science > Math"),
SCIENCE_PHYSICS(158, "Science > Physics"),
SCIENCE_SCIENCE_IN_SOCIETY(159, "Science > Science in Society"),
SCIENCE_SOCIAL_SCIENCES(160, "Science > Social Sciences"),
SCIENCE_TECHNOLOGY(161, "Science > Technology"),
SHOPPING_ANTIQUES_AND_COLLECTIBLES(162, "Shopping > Antiques and Collectibles"),
SHOPPING_AUCTIONS(163, "Shopping > Auctions"),
SHOPPING_CHILDREN(164, "Shopping > Children"),
SHOPPING_CLASSIFIEDS(165, "Shopping > Classifieds"),
SHOPPING_CLOTHING(166, "Shopping > Clothing"),
SHOPPING_CONSUMER_ELECTRONICS(167, "Shopping > Consumer Electronics"),
SHOPPING_CRAFTS(168, "Shopping > Crafts"),
SHOPPING_ENTERTAINMENT(169, "Shopping > Entertainment"),
SHOPPING_ETHNIC_AND_REGIONAL(170, "Shopping > Ethnic and Regional"),
SHOPPING_FOOD(171, "Shopping > Food"),
SHOPPING_GENERAL_MERCHANDISE(172, "Shopping > General Merchandise"),
SHOPPING_GIFTS(173, "Shopping > Gifts"),
SHOPPING_HEALTH(174, "Shopping > Health"),
SHOPPING_HOME_AND_GARDEN(175, "Shopping > Home and Garden"),
SHOPPING_JEWELRY(176, "Shopping > Jewelry"),
SHOPPING_NICHE(177, "Shopping > Niche"),
SHOPPING_PETS(178, "Shopping > Pets"),
SHOPPING_PHOTOGRAPHY(179, "Shopping > Photography"),
SHOPPING_PUBLICATIONS(180, "Shopping > Publications"),
SHOPPING_RECREATION(181, "Shopping > Recreation"),
SHOPPING_SPORTS(182, "Shopping > Sports"),
SHOPPING_TOOLS(183, "Shopping > Tools"),
SHOPPING_TOYS_AND_GAMES(184, "Shopping > Toys and Games"),
SHOPPING_VEHICLES(185, "Shopping > Vehicles"),
SHOPPING_VISUAL_ARTS(186, "Shopping > Visual Arts"),
SOCIETY_ACTIVISM(187, "Society > Activism"),
SOCIETY_CRIME(188, "Society > Crime"),
SOCIETY_DISABLED(189, "Society > Disabled"),
SOCIETY_ETHNICITY(190, "Society > Ethnicity"),
SOCIETY_FUTURE(191, "Society > Future"),
SOCIETY_GAY_LESBIAN_AND_BISEXUAL(192, "Society > Gay, Lesbian, and Bisexual"),
SOCIETY_GENEALOGY(193, "Society > Genealogy"),
SOCIETY_GOVERNMENT(194, "Society > Government"),
SOCIETY_HISTORY(195, "Society > History"),
SOCIETY_HOLIDAYS(196, "Society > Holidays"),
SOCIETY_ISSUES(197, "Society > Issues"),
SOCIETY_LAW(198, "Society > Law"),
SOCIETY_LIFESTYLE_CHOICES(199, "Society > Lifestyle Choices"),
SOCIETY_MILITARY(200, "Society > Military"),
SOCIETY_ORGANIZATIONS(201, "Society > Organizations"),
SOCIETY_PARANORMAL(202, "Society > Paranormal"),
SOCIETY_PEOPLE(203, "Society > People"),
SOCIETY_PHILANTHROPY(204, "Society > Philanthropy"),
SOCIETY_PHILOSOPHY(205, "Society > Philosophy"),
SOCIETY_POLITICS(206, "Society > Politics"),
SOCIETY_RELATIONSHIPS(207, "Society > Relationships"),
SOCIETY_RELIGION_AND_SPIRITUALITY(208, "Society > Religion and Spirituality"),
SOCIETY_SEXUALITY(209, "Society > Sexuality"),
SOCIETY_SUBCULTURES(210, "Society > Subcultures"),
SOCIETY_SUPPORT_GROUPS(211, "Society > Support Groups"),
SOCIETY_TRANSGENDERED(212, "Society > Transgendered"),
SOCIETY_WORK(213, "Society > Work"),
SPORTS_ADVENTURE_RACING(214, "Sports > Adventure Racing"),
SPORTS_BASEBALL(215, "Sports > Baseball"),
SPORTS_BASKETBALL(216, "Sports > Basketball"),
SPORTS_BOWLING(217, "Sports > Bowling"),
SPORTS_BOXING(218, "Sports > Boxing"),
SPORTS_CHEERLEADING(219, "Sports > Cheerleading"),
SPORTS_CRICKET(220, "Sports > Cricket"),
SPORTS_CYCLING(221, "Sports > Cycling"),
SPORTS_DISABLED(222, "Sports > Disabled"),
SPORTS_EQUESTRIAN(223, "Sports > Equestrian"),
SPORTS_FANTASY(224, "Sports > Fantasy"),
SPORTS_GOLF(225, "Sports > Golf"),
SPORTS_HOCKEY(226, "Sports > Hockey"),
SPORTS_LACROSSE(227, "Sports > Lacrosse"),
SPORTS_MARTIAL_ARTS(228, "Sports > Martial Arts"),
SPORTS_MOTORSPORTS(229, "Sports > Motorsports"),
SPORTS_PAINTBALL(230, "Sports > Paintball"),
SPORTS_RESOURCES(231, "Sports > Resources"),
SPORTS_RODEO(232, "Sports > Rodeo"),
SPORTS_RUNNING(233, "Sports > Running"),
SPORTS_SKATEBOARDING(234, "Sports > Skateboarding"),
SPORTS_SOCCER(235, "Sports > Soccer"),
SPORTS_TENNIS(236, "Sports > Tennis"),
SPORTS_TRACK_AND_FIELD(237, "Sports > Track and Field"),
SPORTS_VOLLEYBALL(238, "Sports > Volleyball"),
SPORTS_WATER_SPORTS(239, "Sports > Water Sports");
private int topicId;
private String query;
private Odp239Topic(int topicId, String query)
{
this.topicId = topicId;
this.query = query;
}
public int getTopicId()
{
return topicId;
}
@Override
public String toString()
{
return query;
}
}
@Override
public void process() throws ProcessingException
{
query = topic.query;
processInternal(DATA, topic.getTopicId(), results);
}
public static String getTopicLabel(String topicId)
{
return DATA.getTopicLabel(topicId);
}
}