package edu.brown.benchmark.wikipedia.data;
import edu.brown.statistics.ObjectHistogram;
public abstract class PageHistograms {
/**
* The length of the PAGE_TITLE column
*/
public static final ObjectHistogram<Integer> TITLE_LENGTH = new ObjectHistogram<Integer>() {
{
this.put(1, 5);
this.put(2, 44);
this.put(3, 364);
this.put(4, 976);
this.put(5, 1352);
this.put(6, 2267);
this.put(7, 2868);
this.put(8, 3444);
this.put(9, 3799);
this.put(10, 4388);
this.put(11, 5637);
this.put(12, 7784);
this.put(13, 9413);
this.put(14, 7919);
this.put(15, 5127);
this.put(16, 3810);
this.put(17, 3540);
this.put(18, 3323);
this.put(19, 2912);
this.put(20, 2652);
this.put(21, 2490);
this.put(22, 2320);
this.put(23, 2158);
this.put(24, 1957);
this.put(25, 1701);
this.put(26, 1602);
this.put(27, 1419);
this.put(28, 1385);
this.put(29, 1168);
this.put(30, 1102);
this.put(31, 1030);
this.put(32, 984);
this.put(33, 852);
this.put(34, 801);
this.put(35, 762);
this.put(36, 639);
this.put(37, 593);
this.put(38, 531);
this.put(39, 524);
this.put(40, 472);
this.put(41, 404);
this.put(42, 353);
this.put(43, 344);
this.put(44, 307);
this.put(45, 240);
this.put(46, 250);
this.put(47, 169);
this.put(48, 195);
this.put(49, 159);
this.put(50, 130);
this.put(51, 115);
this.put(52, 124);
this.put(53, 104);
this.put(54, 78);
this.put(55, 95);
this.put(56, 77);
this.put(57, 64);
this.put(58, 66);
this.put(59, 47);
this.put(60, 75);
this.put(61, 46);
this.put(62, 45);
this.put(63, 33);
this.put(64, 39);
this.put(65, 36);
this.put(66, 30);
this.put(67, 24);
this.put(68, 28);
this.put(69, 22);
this.put(70, 13);
this.put(71, 23);
this.put(72, 15);
this.put(73, 12);
this.put(74, 11);
this.put(75, 6);
this.put(76, 12);
this.put(77, 10);
this.put(78, 7);
this.put(79, 6);
this.put(80, 7);
this.put(81, 3);
this.put(83, 4);
this.put(84, 4);
this.put(85, 2);
this.put(86, 2);
this.put(87, 4);
this.put(88, 4);
this.put(89, 1);
this.put(90, 1);
this.put(91, 5);
this.put(92, 3);
this.put(93, 6);
this.put(94, 1);
this.put(95, 3);
this.put(96, 5);
this.put(97, 1);
this.put(99, 1);
this.put(100, 1);
this.put(103, 2);
this.put(104, 1);
this.put(105, 1);
this.put(106, 2);
this.put(109, 2);
this.put(111, 1);
this.put(115, 1);
this.put(117, 1);
this.put(118, 1);
this.put(134, 1);
this.put(141, 1);
}
};
/**
* Revisions per page
* This seems way off because I think our sample data set is incomplete
*/
public static final ObjectHistogram<Integer> REVISIONS_PER_PAGE = new ObjectHistogram<Integer>() {
{
this.put(1, 39401); // XXX 39401
this.put(2, 16869); // XXX 16869
this.put(3, 8127);
this.put(4, 5229);
this.put(5, 3621);
this.put(6, 2538);
this.put(7, 2001);
this.put(8, 1668);
this.put(9, 1419);
this.put(10, 1183);
this.put(11, 1088);
this.put(12, 981);
this.put(13, 857);
this.put(14, 771);
this.put(15, 651);
this.put(16, 637);
this.put(17, 633);
this.put(18, 537);
this.put(19, 488);
this.put(20, 500);
this.put(21, 467);
this.put(22, 394);
this.put(23, 378);
this.put(24, 354);
this.put(25, 303);
this.put(26, 304);
this.put(27, 285);
this.put(28, 249);
this.put(29, 232);
this.put(30, 258);
this.put(31, 234);
this.put(32, 205);
this.put(33, 223);
this.put(34, 171);
this.put(35, 177);
this.put(36, 170);
this.put(37, 171);
this.put(38, 155);
this.put(39, 150);
this.put(40, 129);
this.put(41, 142);
this.put(42, 120);
this.put(43, 118);
this.put(44, 129);
this.put(45, 113);
this.put(46, 92);
this.put(47, 113);
this.put(48, 98);
this.put(49, 100);
this.put(50, 76);
this.put(51, 122);
this.put(52, 89);
this.put(53, 102);
this.put(54, 84);
this.put(55, 84);
this.put(56, 78);
this.put(57, 76);
this.put(58, 71);
this.put(59, 63);
this.put(60, 69);
this.put(61, 76);
this.put(62, 60);
this.put(63, 53);
this.put(64, 56);
this.put(65, 52);
this.put(66, 47);
this.put(67, 46);
this.put(68, 46);
this.put(69, 55);
this.put(70, 46);
this.put(71, 37);
this.put(72, 50);
this.put(73, 43);
this.put(74, 43);
this.put(75, 34);
this.put(76, 46);
this.put(77, 38);
this.put(78, 37);
this.put(79, 34);
this.put(80, 49);
this.put(81, 34);
this.put(82, 33);
this.put(83, 33);
this.put(84, 40);
this.put(85, 33);
this.put(86, 28);
this.put(87, 35);
this.put(88, 29);
this.put(89, 35);
this.put(90, 20);
this.put(91, 20);
this.put(92, 35);
this.put(93, 32);
this.put(94, 27);
this.put(95, 25);
this.put(96, 25);
this.put(97, 25);
this.put(98, 28);
this.put(99, 21);
this.put(100, 244);
this.put(110, 179);
this.put(120, 167);
this.put(130, 137);
this.put(140, 98);
this.put(150, 105);
this.put(160, 88);
this.put(170, 81);
this.put(180, 72);
this.put(190, 69);
this.put(200, 62);
this.put(210, 60);
this.put(220, 38);
this.put(230, 45);
this.put(240, 43);
this.put(250, 36);
this.put(260, 38);
this.put(270, 43);
this.put(280, 36);
this.put(290, 18);
this.put(300, 33);
this.put(310, 20);
this.put(320, 18);
this.put(330, 32);
this.put(340, 19);
this.put(350, 23);
this.put(360, 27);
this.put(370, 22);
this.put(380, 17);
this.put(390, 19);
this.put(400, 9);
this.put(410, 13);
this.put(420, 12);
this.put(430, 19);
this.put(440, 16);
this.put(450, 12);
this.put(460, 10);
this.put(470, 8);
this.put(480, 5);
this.put(490, 6);
this.put(500, 7);
this.put(510, 9);
this.put(520, 9);
this.put(530, 7);
this.put(540, 12);
this.put(550, 9);
this.put(560, 8);
this.put(570, 11);
this.put(580, 4);
this.put(590, 3);
this.put(600, 12);
this.put(610, 9);
this.put(620, 5);
this.put(630, 7);
this.put(640, 5);
this.put(650, 5);
this.put(660, 4);
this.put(670, 5);
this.put(680, 2);
this.put(690, 2);
this.put(700, 4);
this.put(710, 5);
this.put(720, 4);
this.put(730, 6);
this.put(740, 7);
this.put(750, 5);
this.put(760, 2);
this.put(770, 1);
this.put(780, 2);
this.put(800, 4);
this.put(810, 1);
this.put(820, 1);
this.put(830, 1);
this.put(840, 6);
this.put(850, 3);
this.put(860, 4);
this.put(870, 1);
this.put(880, 2);
this.put(890, 4);
this.put(900, 2);
this.put(910, 2);
this.put(920, 2);
this.put(930, 1);
this.put(940, 3);
this.put(950, 6);
this.put(960, 4);
this.put(970, 1);
this.put(980, 3);
this.put(990, 2);
this.put(1000, 1);
}
};
/**
* The histogram of the PAGE_NAMESPACE column
*/
public static final ObjectHistogram<Integer> NAMESPACE = new ObjectHistogram<Integer>() {
{
this.put(0, 40847);
this.put(1, 15304);
this.put(2, 4718);
this.put(3, 23563);
this.put(4, 2562);
this.put(5, 268);
this.put(6, 6991);
this.put(7, 330);
this.put(8, 9);
this.put(9, 6);
this.put(10, 1187);
this.put(11, 263);
this.put(12, 3);
this.put(13, 2);
this.put(14, 2831);
this.put(15, 694);
this.put(100, 393);
this.put(101, 29);
}
};
/**
* The histogram of the PAGE_RESTRICTIONS column
*/
public static final ObjectHistogram<String> RESTRICTIONS = new ObjectHistogram<String>() {
{
this.put("", 99917);
this.put("edit=autoconfirmed:move=autoconfirmed", 20);
this.put("edit=autoconfirmed:move=sysop", 8);
this.put("edit=sysop:move=sysop", 23);
this.put("move=:edit=", 24);
this.put("move=sysop", 1);
this.put("move=sysop:edit=sysop", 5);
this.put("sysop", 2);
}
};
}