/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.cf.taste.example.jester; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.regex.Pattern; import com.google.common.collect.Lists; import org.apache.mahout.cf.taste.example.grouplens.GroupLensDataModel; import org.apache.mahout.cf.taste.impl.common.FastByIDMap; import org.apache.mahout.cf.taste.impl.model.GenericDataModel; import org.apache.mahout.cf.taste.impl.model.GenericPreference; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.Preference; import org.apache.mahout.common.iterator.FileLineIterator; public final class JesterDataModel extends FileDataModel { private static final Pattern COMMA_PATTERN = Pattern.compile(","); private long userBeingRead; public JesterDataModel() throws IOException { this(GroupLensDataModel.readResourceToTempFile("/org/apache/mahout/cf/taste/example/jester/jester-data-1.csv")); } /** * @param ratingsFile Jester ratings file in CSV format * @throws IOException if an error occurs while reading or writing files */ public JesterDataModel(File ratingsFile) throws IOException { super(ratingsFile); } @Override public void reload() { userBeingRead = 0; super.reload(); } @Override protected DataModel buildModel() throws IOException { FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>(); FileLineIterator iterator = new FileLineIterator(getDataFile(), false); FastByIDMap<FastByIDMap<Long>> timestamps = new FastByIDMap<FastByIDMap<Long>>(); processFile(iterator, data, timestamps, false); return new GenericDataModel(GenericDataModel.toDataMap(data, true)); } @Override protected void processLine(String line, FastByIDMap<?> rawData, FastByIDMap<FastByIDMap<Long>> timestamps, boolean fromPriorData) { FastByIDMap<Collection<Preference>> data = (FastByIDMap<Collection<Preference>>) rawData; String[] jokePrefs = COMMA_PATTERN.split(line); int count = Integer.parseInt(jokePrefs[0]); Collection<Preference> prefs = Lists.newArrayListWithCapacity(count); for (int itemID = 1; itemID < jokePrefs.length; itemID++) { // yes skip first one, just a count String jokePref = jokePrefs[itemID]; if (!"99".equals(jokePref)) { float jokePrefValue = Float.parseFloat(jokePref); prefs.add(new GenericPreference(userBeingRead, itemID, jokePrefValue)); } } data.put(userBeingRead, prefs); userBeingRead++; } }