package net.andreinc.mockneat.unit.text.markov;
/**
* Copyright 2017, Andrei N. Ciobanu
Permission is hereby granted, free of charge, to any user obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. PARAM NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER PARAM AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, FREE_TEXT OF OR PARAM CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS PARAM THE SOFTWARE.
*/
import net.andreinc.mockneat.MockNeat;
import net.andreinc.mockneat.abstraction.MockUnit;
import net.andreinc.mockneat.utils.file.FileManager;
import net.andreinc.mockneat.types.enums.MarkovChainType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static java.util.Arrays.stream;
import static org.apache.commons.lang3.StringUtils.lowerCase;
import static org.apache.commons.lang3.text.WordUtils.capitalize;
public class MarkovUnit {
private static final FileManager fm = FileManager.getInstance();
private static final Logger logger = LoggerFactory.getLogger(MarkovUnit.class);
private final String path;
private final Map<WordState, WordStatistic> chain;
private final MockUnit<WordState> randState;
private Integer stateSize = 2;
private MockNeat mock = MockNeat.threadLocal();
private MarkovUnit(MockNeat mock, List<String> lines, String path, Integer stateSize) {
this.path = path;
this.stateSize = stateSize;
this.chain = getChain(getRawChain(getWords(lines)));
this.mock = mock;
this.randState = this.mock.fromKeys(chain);
}
public static MarkovUnit internal(MockNeat mock, MarkovChainType chainType, int stateSize) throws IOException {
return new MarkovUnit(mock, fm.read(chainType), chainType.getFile(), stateSize);
}
public static MarkovUnit external(MockNeat mock, String path, int stateSize) throws IOException {
return new MarkovUnit(mock, fm.read(path), path, stateSize);
}
private List<String> getWords(List<String> lines) {
logger.info("Obtaining the list of words from: '{}'.", this.path);
List<String> words = new ArrayList<>();
lines.forEach(line -> {
line = line.replaceAll("\""," ");
stream(line.split(" ")).forEach(word -> {
String trimmed = lowerCase(word.trim());
if (!"".equals(word))
words.add(trimmed);
});
});
logger.info("{} words detected in '{}'.", words.size(), path);
return words;
}
private Map<WordState, Map<String, Integer>> getRawChain(List<String> words) {
logger.info("Building WordState(s) from the words found in '{}'.", path);
Map<WordState, Map<String, Integer>> result = new HashMap<>();
WordState currentState;
Map<String, Integer> currentRawValue;
Integer currentCount;
int stop = words.size() - stateSize;
String nextWord;
for(int i = 0; i < stop; i++) {
nextWord = words.get(i+stateSize);
currentState = WordState.fromWords(words, stateSize, i);
currentRawValue = result.get(currentState);
if (null==currentRawValue) {
currentRawValue = new HashMap<>();
result.put(currentState, currentRawValue);
}
currentCount = currentRawValue.get(nextWord);
if (null==currentCount) {
currentCount = 0;
}
currentRawValue.put(nextWord, ++currentCount);
}
logger.info("{} WordState(s) detected in '{}'.", result.keySet().size(), path);
return result;
}
private Map<WordState, WordStatistic> getChain(Map<WordState, Map<String, Integer>> rawChain) {
return rawChain
.entrySet()
.stream()
.collect(Collectors.toMap(Map.Entry::getKey,
e -> new WordStatistic(e.getValue())));
}
public String generateText(Integer maxLength) {
// Obtain a objs state from the existing states
StringBuilder buff = new StringBuilder();
WordState state = randState.val();
String prev = ".";
WordStatistic statistic;
String next;
while (buff.length() < maxLength) {
statistic = chain.get(state);
while(null==statistic) {
statistic = chain.get(randState.val());
}
next = statistic.nextWord();
state = state.nextState(next);
if (prev.endsWith(".")) {
next = capitalize(next);
}
prev = next;
buff.append(next).append(" ");
}
return buff.subSequence(0, maxLength).toString();
}
}