chainy/node_modules/markov/index.js

200 lines
5.6 KiB
JavaScript
Raw Permalink Normal View History

2020-01-26 19:03:32 +00:00
var EventEmitter = require('events').EventEmitter;
var deck = require('deck');
var Lazy = require('lazy');
var Hash = require('hashish');
module.exports = function (order) {
if (!order) order = 2;
var db = {};
var self = {};
self.seed = function (seed, cb) {
if (seed instanceof EventEmitter) {
Lazy(seed).lines.forEach(self.seed);
if (cb) {
seed.on('error', cb);
seed.on('end', cb);
}
}
else {
var text = (Buffer.isBuffer(seed) ? seed.toString() : seed)
var words = text.split(/\s+/);
var links = [];
for (var i = 0; i < words.length; i += order) {
var link = words.slice(i, i + order).join(' ');
links.push(link);
}
if (links.length <= 1) {
if (cb) cb(null);
return;
}
for (var i = 1; i < links.length; i++) {
var word = links[i-1];
var cword = clean(word);
var next = links[i];
var cnext = clean(next);
var node = Hash.has(db, cword)
? db[cword]
: {
count : 0,
words : {},
next : {},
prev : {},
}
;
db[cword] = node;
node.count ++;
node.words[word] = (
Hash.has(node.words, word) ? node.words[word] : 0
) + 1;
node.next[cnext] = (
Hash.has(node.next, cnext) ? node.next[cnext] : 0
) + 1
if (i > 1) {
var prev = clean(links[i-2]);
node.prev[prev] = (
Hash.has(node.prev, prev) ? node.prev[prev] : 0
) + 1;
}
else {
node.prev[''] = (node.prev[''] || 0) + 1;
}
}
if (!Hash.has(db, cnext)) db[cnext] = {
count : 1,
words : {},
next : { '' : 0 },
prev : {},
};
var n = db[cnext];
n.words[next] = (Hash.has(n.words, next) ? n.words[next] : 0) + 1;
n.prev[cword] = (Hash.has(n.prev, cword) ? n.prev[cword] : 0) + 1;
n.next[''] = (n.next[''] || 0) + 1;
if (cb) cb(null);
}
};
self.search = function (text) {
var words = text.split(/\s+/);
// find a starting point...
var start = null;
var groups = {};
for (var i = 0; i < words.length; i += order) {
var word = clean(words.slice(i, i + order).join(' '));
if (Hash.has(db, word)) groups[word] = db[word].count;
}
return deck.pick(groups);
};
self.pick = function () {
return deck.pick(Object.keys(db))
};
self.next = function (cur) {
if (!cur || !db[cur]) return undefined;
var next = deck.pick(db[cur].next);
return next && {
key : next,
word : deck.pick(db[next].words),
} || undefined;
};
self.prev = function (cur) {
if (!cur || !db[cur]) return undefined;
var prev = deck.pick(db[cur].prev);
return prev && {
key : prev,
word : deck.pick(db[prev].words),
} || undefined;
};
self.forward = function (cur, limit) {
var res = [];
while (cur && !limit || res.length < limit) {
var next = self.next(cur);
if (!next) break;
cur = next.key;
res.push(next.word);
}
return res;
};
self.backward = function (cur, limit) {
var res = [];
while (cur && !limit || res.length < limit) {
var prev = self.prev(cur);
if (!prev) break;
cur = prev.key;
res.unshift(prev.word);
}
return res;
};
self.fill = function (cur, limit) {
var res = [ deck.pick(db[cur].words) ];
if (!res[0]) return [];
if (limit && res.length >= limit) return res;;
var pcur = cur;
var ncur = cur;
while (pcur || ncur) {
if (pcur) {
var prev = self.prev(pcur);
pcur = null;
if (prev) {
pcur = prev.key;
res.unshift(prev.word);
if (limit && res.length >= limit) break;
}
}
if (ncur) {
var next = self.next(ncur);
ncur = null;
if (next) {
ncur = next.key;
res.unshift(next.word);
if (limit && res.length >= limit) break;
}
}
}
return res;
};
self.respond = function (text, limit) {
var cur = self.search(text) || self.pick();
return self.fill(cur, limit);
};
self.word = function (cur) {
return db[cur] && deck.pick(db[cur].words);
};
return self;
};
function clean (s) {
return s
.toLowerCase()
.replace(/[^a-z\d]+/g, '_')
.replace(/^_/, '')
.replace(/_$/, '')
;
}