200 lines
5.6 KiB
JavaScript
200 lines
5.6 KiB
JavaScript
|
var EventEmitter = require('events').EventEmitter;
|
||
|
var deck = require('deck');
|
||
|
var Lazy = require('lazy');
|
||
|
var Hash = require('hashish');
|
||
|
|
||
|
module.exports = function (order) {
|
||
|
if (!order) order = 2;
|
||
|
var db = {};
|
||
|
var self = {};
|
||
|
|
||
|
self.seed = function (seed, cb) {
|
||
|
if (seed instanceof EventEmitter) {
|
||
|
Lazy(seed).lines.forEach(self.seed);
|
||
|
|
||
|
if (cb) {
|
||
|
seed.on('error', cb);
|
||
|
seed.on('end', cb);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
var text = (Buffer.isBuffer(seed) ? seed.toString() : seed)
|
||
|
var words = text.split(/\s+/);
|
||
|
var links = [];
|
||
|
|
||
|
for (var i = 0; i < words.length; i += order) {
|
||
|
var link = words.slice(i, i + order).join(' ');
|
||
|
links.push(link);
|
||
|
}
|
||
|
|
||
|
if (links.length <= 1) {
|
||
|
if (cb) cb(null);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
for (var i = 1; i < links.length; i++) {
|
||
|
var word = links[i-1];
|
||
|
var cword = clean(word);
|
||
|
var next = links[i];
|
||
|
var cnext = clean(next);
|
||
|
|
||
|
var node = Hash.has(db, cword)
|
||
|
? db[cword]
|
||
|
: {
|
||
|
count : 0,
|
||
|
words : {},
|
||
|
next : {},
|
||
|
prev : {},
|
||
|
}
|
||
|
;
|
||
|
db[cword] = node;
|
||
|
|
||
|
node.count ++;
|
||
|
node.words[word] = (
|
||
|
Hash.has(node.words, word) ? node.words[word] : 0
|
||
|
) + 1;
|
||
|
node.next[cnext] = (
|
||
|
Hash.has(node.next, cnext) ? node.next[cnext] : 0
|
||
|
) + 1
|
||
|
if (i > 1) {
|
||
|
var prev = clean(links[i-2]);
|
||
|
node.prev[prev] = (
|
||
|
Hash.has(node.prev, prev) ? node.prev[prev] : 0
|
||
|
) + 1;
|
||
|
}
|
||
|
else {
|
||
|
node.prev[''] = (node.prev[''] || 0) + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!Hash.has(db, cnext)) db[cnext] = {
|
||
|
count : 1,
|
||
|
words : {},
|
||
|
next : { '' : 0 },
|
||
|
prev : {},
|
||
|
};
|
||
|
var n = db[cnext];
|
||
|
n.words[next] = (Hash.has(n.words, next) ? n.words[next] : 0) + 1;
|
||
|
n.prev[cword] = (Hash.has(n.prev, cword) ? n.prev[cword] : 0) + 1;
|
||
|
n.next[''] = (n.next[''] || 0) + 1;
|
||
|
|
||
|
if (cb) cb(null);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
self.search = function (text) {
|
||
|
var words = text.split(/\s+/);
|
||
|
|
||
|
// find a starting point...
|
||
|
var start = null;
|
||
|
var groups = {};
|
||
|
for (var i = 0; i < words.length; i += order) {
|
||
|
var word = clean(words.slice(i, i + order).join(' '));
|
||
|
if (Hash.has(db, word)) groups[word] = db[word].count;
|
||
|
}
|
||
|
|
||
|
return deck.pick(groups);
|
||
|
};
|
||
|
|
||
|
self.pick = function () {
|
||
|
return deck.pick(Object.keys(db))
|
||
|
};
|
||
|
|
||
|
self.next = function (cur) {
|
||
|
if (!cur || !db[cur]) return undefined;
|
||
|
|
||
|
var next = deck.pick(db[cur].next);
|
||
|
return next && {
|
||
|
key : next,
|
||
|
word : deck.pick(db[next].words),
|
||
|
} || undefined;
|
||
|
};
|
||
|
|
||
|
self.prev = function (cur) {
|
||
|
if (!cur || !db[cur]) return undefined;
|
||
|
|
||
|
var prev = deck.pick(db[cur].prev);
|
||
|
return prev && {
|
||
|
key : prev,
|
||
|
word : deck.pick(db[prev].words),
|
||
|
} || undefined;
|
||
|
};
|
||
|
|
||
|
self.forward = function (cur, limit) {
|
||
|
var res = [];
|
||
|
while (cur && !limit || res.length < limit) {
|
||
|
var next = self.next(cur);
|
||
|
if (!next) break;
|
||
|
cur = next.key;
|
||
|
res.push(next.word);
|
||
|
}
|
||
|
|
||
|
return res;
|
||
|
};
|
||
|
|
||
|
self.backward = function (cur, limit) {
|
||
|
var res = [];
|
||
|
while (cur && !limit || res.length < limit) {
|
||
|
var prev = self.prev(cur);
|
||
|
if (!prev) break;
|
||
|
cur = prev.key;
|
||
|
res.unshift(prev.word);
|
||
|
}
|
||
|
|
||
|
return res;
|
||
|
};
|
||
|
|
||
|
self.fill = function (cur, limit) {
|
||
|
var res = [ deck.pick(db[cur].words) ];
|
||
|
if (!res[0]) return [];
|
||
|
if (limit && res.length >= limit) return res;;
|
||
|
|
||
|
var pcur = cur;
|
||
|
var ncur = cur;
|
||
|
|
||
|
while (pcur || ncur) {
|
||
|
if (pcur) {
|
||
|
var prev = self.prev(pcur);
|
||
|
pcur = null;
|
||
|
if (prev) {
|
||
|
pcur = prev.key;
|
||
|
res.unshift(prev.word);
|
||
|
if (limit && res.length >= limit) break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (ncur) {
|
||
|
var next = self.next(ncur);
|
||
|
ncur = null;
|
||
|
if (next) {
|
||
|
ncur = next.key;
|
||
|
res.unshift(next.word);
|
||
|
if (limit && res.length >= limit) break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return res;
|
||
|
};
|
||
|
|
||
|
self.respond = function (text, limit) {
|
||
|
var cur = self.search(text) || self.pick();
|
||
|
return self.fill(cur, limit);
|
||
|
};
|
||
|
|
||
|
self.word = function (cur) {
|
||
|
return db[cur] && deck.pick(db[cur].words);
|
||
|
};
|
||
|
|
||
|
return self;
|
||
|
};
|
||
|
|
||
|
function clean (s) {
|
||
|
return s
|
||
|
.toLowerCase()
|
||
|
.replace(/[^a-z\d]+/g, '_')
|
||
|
.replace(/^_/, '')
|
||
|
.replace(/_$/, '')
|
||
|
;
|
||
|
}
|