/** * Copyright 2012 Anjuke Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.anjuke.romar.mahout.persistence; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.zip.GZIPInputStream; import org.apache.mahout.cf.taste.impl.common.FastByIDMap; import org.apache.mahout.cf.taste.impl.model.GenericPreference; import org.apache.mahout.cf.taste.model.Preference; import org.apache.mahout.cf.taste.model.PreferenceArray; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.anjuke.romar.mahout.util.Util; public class FilePreferenceSource extends AbstractFilePreferenceSource implements PreferenceSource { private static final Logger LOG = LoggerFactory.getLogger(FilePreferenceSource.class); private PrintWriter _writer; private long _logCount = 0; private final Object _snapshotWriterLock = new Object(); public FilePreferenceSource(File path) { super(path); _writer = createWriter(); } @Override public void setPreference(long userID, long itemID, float value) { synchronized (this) { _logCount++; _writer.print(userID); _writer.print(','); _writer.print(itemID); _writer.print(','); _writer.println(value); _writer.flush(); } } @Override public void removePreference(long userID, long itemID) { synchronized (this) { _logCount++; _writer.print(userID); _writer.print(','); _writer.print(itemID); _writer.println(','); _writer.flush(); } } @Override public void removePreferenceByUserId(long userID) { // FIXME throw new UnsupportedOperationException(); } @Override public void removePreferenceByItemId(long itemID) { // FIXME throw new UnsupportedOperationException(); } @Override public void commit() { synchronized (this) { if (_logCount > 0) { _writer.flush(); _writer.close(); _writer = createWriter(); _logCount = 0; } } } private PrintWriter createWriter() { File file = createNewLogFile(); return createWriter(file); } private static PrintWriter createWriter(File file) { try { OutputStream os = new FileOutputStream(file); // String name = file.getName(); // if (name.endsWith(".gz")) { // try { // os=new GZIPOutputStream(os); // } catch (IOException e) { // throw new RuntimeException(e); // } // } return new PrintWriter(os); } catch (FileNotFoundException e) { throw new AssertionError(e); } } private static BufferedReader createReader(File file) { try { InputStream is = new FileInputStream(file); String name = file.getName(); if (name.endsWith(".gz")) { is = new GZIPInputStream(is); } return new BufferedReader(new InputStreamReader(is)); } catch (IOException e) { throw new RuntimeException(e); } } @Override public void compact() { // 避免过长的时间持有对象锁 final long version = getCurrentVersion() - 1; if (version < 0) { return; } File latestSnapshotFile = getLatestSnapshotFile(); if (latestSnapshotFile != null && getSnapshotFileVersion(latestSnapshotFile) == version) { return; } synchronized (_snapshotWriterLock) { PrintWriter snapshotWriter = createWriter(getSnapshotFile(version)); try { final List<File> logFileList = getLogFileListUntilVersion(version); List<File> fileToIt = new ArrayList<File>(logFileList); if (latestSnapshotFile != null) { fileToIt.add(0, latestSnapshotFile); } PreferenceIterator it = new LogFileIterator(fileToIt); FastByIDMap<PreferenceArray> data = new FastByIDMap<PreferenceArray>(); while (it.hasNext()) { Preference pref = it.next(); if (it.getType() == PreferenceType.ADD) { Util.applyAdd(data, pref); } else if (it.getType() == PreferenceType.DELETE) { Util.applyRemove(data, pref); } } for (Entry<Long, PreferenceArray> entry : data.entrySet()) { PreferenceArray array = entry.getValue(); for (int i = 0, length = array.length(); i < length; i++) { long userID = array.getUserID(i); long itemID = array.getItemID(i); float value = array.getValue(i); snapshotWriter.print(userID); snapshotWriter.print(','); snapshotWriter.print(itemID); snapshotWriter.print(','); snapshotWriter.println(value); } } snapshotWriter.flush(); snapshotWriter.close(); removeFile(); } finally { snapshotWriter.close(); } } } private void removeFile() { List<File> snapshotFiles = listSnapshotFileNamesAndSorted(); if (snapshotFiles.size() < 2) { return; } long version = -1; for (int i = 0, length = snapshotFiles.size(); i < length - 2; i++) { File file = snapshotFiles.get(i); version = getSnapshotFileVersion(file); file.delete(); } if (version > 0) { List<File> logs = getLogFileListUntilVersion(version); for (File file : logs) { file.delete(); } } } private static class LogFileIterator implements PreferenceIterator { private Iterator<File> _fileIt; private BufferedReader _currentReader = null; private Preference _preference; private PreferenceType _type; public LogFileIterator(List<File> list) { super(); _fileIt = list.iterator(); } BufferedReader createReader() { if (!_fileIt.hasNext()) { return null; } else { // create reader File file = _fileIt.next(); LOG.info("read file " + file.getAbsolutePath()); return FilePreferenceSource.createReader(file); } } @Override public boolean hasNext() { if (_currentReader == null) { _currentReader = createReader(); if (_currentReader == null) { return false; } } String line; try { while ((line = _currentReader.readLine()) == null) { close(); _currentReader = createReader(); if (_currentReader == null) { return false; } } } catch (IOException e) { close(); throw new RuntimeException(e); } String[] tmp = line.split(","); long userID = Long.parseLong(tmp[0]); long itemID = Long.parseLong(tmp[1]); float value; if (tmp.length == 2 || "".equals(tmp[2])) { value = 0; _type = PreferenceType.DELETE; } else { value = Float.parseFloat(tmp[2]); _type = PreferenceType.ADD; } _preference = new GenericPreference(userID, itemID, value); return true; } @Override public Preference next() { if (_preference == null) { throw new NoSuchElementException(); } return _preference; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public PreferenceType getType() { return _type; } void close() { if (_currentReader != null) { try { _currentReader.close(); } catch (IOException e) { LOG.info(e.getMessage(), e); } } } } @Override public void close() { _writer.close(); } @Override public FastByIDMap<PreferenceArray> getPreferenceUserData() { File snapshotFile = getLatestSnapshotFile(); long version; if (snapshotFile == null) { version = -1; } else { version = getSnapshotFileVersion(snapshotFile); } final List<File> list = new ArrayList<File>(getLogFileListFromVersion(version)); if (snapshotFile != null) { list.add(0, snapshotFile); } PreferenceIterator it = new LogFileIterator(list); FastByIDMap<PreferenceArray> data = new FastByIDMap<PreferenceArray>(); while (it.hasNext()) { Preference pref = it.next(); if (it.getType() == PreferenceType.ADD) { Util.applyAdd(data, pref); } else if (it.getType() == PreferenceType.DELETE) { Util.applyRemove(data, pref); } } return data; } }