/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.knittingboar.records; import java.io.IOException; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; import junit.framework.TestCase; public class TestRCV1RecordFactory extends TestCase { String training_rec_0 = "0 |f 7:4.3696374e-02 8:1.0872085e-01 19:2.2659289e-02 20:1.6952585e-02 50:3.3265986e-02 52:2.8914521e-02 99:6.2935837e-02 111:3.6749814e-02 124:4.5141779e-02 147:2.7024418e-02 153:5.3756956e-02 169:2.8062440e-02 182:4.7379807e-02 183:2.7668567e-02 188:1.4508039e-02 269:2.3687121e-02 271:2.0829555e-02 297:2.8352227e-02 311:3.3546336e-02 319:2.8875276e-02 332:4.7258154e-02 337:3.1720489e-02 360:6.8111412e-02 368:4.4445150e-02 411:4.4164777e-02 488:8.4059432e-02 586:2.9122708e-02 591:9.5403686e-02 664:3.6937956e-02 702:2.8176809e-02 737:1.6336726e-01 739:5.1228814e-02 757:3.4760747e-02 764:3.6367100e-02 768:6.1244022e-02 791:2.1772176e-01 817:7.4271448e-02 848:4.0480603e-02 895:5.1346138e-02 933:4.1986264e-02 979:1.1311502e-01 1003:4.5158323e-02 1005:4.0224314e-02 1021:4.6525169e-02 1071:2.9869374e-02 1127:2.1704819e-02 1133:4.5880664e-02 1162:3.8132094e-02 1178:5.2212182e-02 1180:1.0740499e-01 1338:4.9277205e-02 1360:4.6650354e-02 1498:5.9916675e-02 1511:7.6297082e-02 1577:5.0769087e-02 1659:5.0992116e-02 1666:2.4987224e-02 1674:2.9845037e-02 1810:4.6527624e-02 1966:4.3204561e-02 2018:4.3157250e-02 2066:1.3678090e-01 2074:1.0599699e-01 2117:9.8577492e-02 2183:1.4329165e-01 2248:1.2792459e-01 2276:7.9498030e-02 2316:4.9681831e-02 2340:5.8379412e-02 2762:5.1772792e-02 2771:4.9624689e-02 3077:2.1542890e-01 3227:8.3143584e-02 3246:5.2039523e-02 3282:5.2630566e-02 3369:7.0463479e-02 3615:5.6905646e-02 3620:6.6913836e-02 3962:6.1502680e-02 4132:2.1751978e-01 4143:2.6172629e-01 4144:9.1886766e-02 4499:1.1314832e-01 5031:7.9870239e-02 5055:8.6920090e-02 5401:5.4840717e-02 5423:9.5343769e-02 5860:8.9788958e-02 6065:8.6977042e-02 6668:7.6055169e-02 6697:6.8251781e-02 7139:6.4996362e-02 7426:1.2097790e-01 7606:1.9588335e-01 8870:1.4963643e-01 9804:9.4143294e-02 12121:7.4564628e-02 13942:1.6451047e-01 14595:1.0607405e-01 15422:8.9860193e-02 15652:1.0834268e-01 16223:9.6487328e-02 16859:1.0539885e-01 17424:8.1960648e-02 19529:9.3970060e-02 23299:1.8965191e-01 24377:1.0888006e-01 24448:9.8843329e-02 24454:2.8149781e-01 24455:2.1925208e-01 26622:1.0557952e-01 31771:1.3358803e-01 41700:1.2038895e-0"; String training_rec_1 = "1 |f 9:3.3307336e-02 13:2.6428020e-02 55:4.5726493e-02 69:3.0852484e-02 93:7.4375033e-02 111:4.7884714e-02 140:5.6255672e-02 151:6.5337561e-02 153:7.0044883e-02 161:5.7628881e-02 175:4.5645405e-02 180:4.6431489e-02 187:5.3116236e-02 193:3.8840283e-02 209:6.7031987e-02 217:3.3030130e-02 229:4.9895555e-02 233:2.7318209e-02 236:2.9892704e-02 252:5.6756295e-02 258:4.4865504e-02 260:6.3265145e-02 263:5.3964965e-02 269:5.2257512e-02 271:4.5953277e-02 276:3.2793090e-02 286:3.6571421e-02 288:2.9139040e-02 319:3.7624255e-02 334:7.6396912e-02 338:3.6081653e-02 362:7.6015718e-02 389:5.4903280e-02 417:2.6063753e-02 426:5.4687556e-02 438:7.3853023e-02 453:3.9429404e-02 477:4.9223945e-02 480:5.3083062e-02 488:5.2191041e-02 506:3.5930801e-02 558:8.1321917e-02 561:5.6125600e-02 594:5.5980112e-02 617:8.4778033e-02 623:4.6125464e-02 642:4.1558836e-02 644:1.0274204e-01 755:6.0711723e-02 803:5.0099224e-02 836:5.0887167e-02 837:5.0023027e-02 951:6.7326568e-02 1012:1.0415152e-01 1037:7.9722285e-02 1059:2.8764643e-02 1061:5.8410108e-02 1077:6.1814863e-02 1089:7.1079604e-02 1129:7.9089224e-02 1133:5.9782140e-02 1196:1.0324168e-01 1212:9.1613702e-02 1218:3.9104007e-02 1221:6.0955465e-02 1237:5.1370349e-02 1240:7.8351930e-02 1241:8.0820285e-02 1287:7.0892565e-02 1342:4.7291242e-02 1356:6.4580373e-02 1486:5.8335997e-02 1492:5.5702407e-02 1499:8.0403641e-02 1546:7.6899387e-02 1575:5.2044731e-02 1626:7.6970406e-02 1659:1.1249663e-01 1823:1.1926711e-01 1839:8.0284260e-02 1976:9.9477187e-02 1985:7.4262738e-02 2008:7.3236965e-02 2061:1.1390504e-01 2153:4.4327311e-02 2190:9.7444594e-02 2212:5.6166001e-02 2234:4.9261238e-02 2446:6.5645538e-02 2455:7.3392190e-02 3088:9.1792777e-02 3230:5.9966434e-02 3247:1.3415127e-01 3261:8.2616769e-02 3306:2.2782215e-01 3394:6.7915484e-02 3443:8.2661413e-02 3669:2.0983912e-01 3690:1.0559268e-01 3899:7.4440584e-02 4367:2.0285535e-01 4369:9.3478583e-02 4455:2.1870497e-01 4500:8.8423654e-02 4965:7.5678401e-02 5223:7.7687882e-02 5539:1.2643857e-01 5874:7.3007621e-02 5945:8.4572427e-02 5995:8.5160516e-02 6025:9.9534206e-02 6051:1.3297138e-01 6510:1.3806941e-01 7456:1.9141673e-01 9117:9.7984366e-02 9257:1.1816826e-01 9461:1.6047940e-01 10985:1.1505207e-01 13963:2.0298573e-01 14669:1.3871375e-01 14770:1.0384714e-01 16122:1.2237830e-01 23461:1.8466952e-01 23701:1.1287191e-01 25082:1.4810963e-01 40790:1.3507748e-01"; String training_rec_2 = "1 |f 5:5.2558247e-02 63:1.2409918e-01 70:6.0630817e-02 188:4.9340606e-02 193:1.0137629e-01 286:9.5454372e-02 571:3.5901710e-01 811:8.9977279e-02 942:1.6419628e-01 1193:9.6185014e-02 1380:1.2333614e-01 1702:3.7394261e-01 1766:4.7051618e-01 2244:3.2123861e-01 2298:4.9728591e-02 2554:3.1551713e-01 2565:3.2508451e-01 3400:1.7101182e-01 13750:2.5799808e-01"; public void testParse() throws Exception { RCV1RecordFactory factory = new RCV1RecordFactory(); Vector v = new RandomAccessSparseVector(RCV1RecordFactory.FEATURES); int actual = factory.processLine(training_rec_0, v); assertEquals( 0, actual); assertEquals( .043696374, v.get(7)); Vector v2 = new RandomAccessSparseVector(RCV1RecordFactory.FEATURES); int actual2 = factory.processLine(training_rec_1, v2); assertEquals( 1, actual2); assertEquals( .030852484, v2.get(69)); } }