nasg/elasticlunr.js

2507 lines
67 KiB
JavaScript

/**
* elasticlunr - http://weixsong.github.io
* Lightweight full-text search engine in Javascript for browser search and offline search. - 0.9.5
*
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
* MIT Licensed
* @license
*/
(function(){
/*!
* elasticlunr.js
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* Convenience function for instantiating a new elasticlunr index and configuring it
* with the default pipeline functions and the passed config function.
*
* When using this convenience function a new index will be created with the
* following functions already in the pipeline:
*
* 1. elasticlunr.trimmer - trim non-word character
* 2. elasticlunr.StopWordFilter - filters out any stop words before they enter the
* index
* 3. elasticlunr.stemmer - stems the tokens before entering the index.
*
*
* Example:
*
* var idx = elasticlunr(function () {
* this.addField('id');
* this.addField('title');
* this.addField('body');
*
* //this.setRef('id'); // default ref is 'id'
*
* this.pipeline.add(function () {
* // some custom pipeline function
* });
* });
*
* idx.addDoc({
* id: 1,
* title: 'Oracle released database 12g',
* body: 'Yestaday, Oracle has released their latest database, named 12g, more robust. this product will increase Oracle profit.'
* });
*
* idx.addDoc({
* id: 2,
* title: 'Oracle released annual profit report',
* body: 'Yestaday, Oracle has released their annual profit report of 2015, total profit is 12.5 Billion.'
* });
*
* # simple search
* idx.search('oracle database');
*
* # search with query-time boosting
* idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}});
*
* @param {Function} config A function that will be called with the new instance
* of the elasticlunr.Index as both its context and first parameter. It can be used to
* customize the instance of new elasticlunr.Index.
* @namespace
* @module
* @return {elasticlunr.Index}
*
*/
var elasticlunr = function (config) {
var idx = new elasticlunr.Index;
idx.pipeline.add(
elasticlunr.trimmer,
elasticlunr.stopWordFilter,
elasticlunr.stemmer
);
if (config) config.call(idx, idx);
return idx;
};
elasticlunr.version = "0.9.5";
// only used this to make elasticlunr.js compatible with lunr-languages
// this is a trick to define a global alias of elasticlunr
lunr = elasticlunr;
/*!
* elasticlunr.utils
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* A namespace containing utils for the rest of the elasticlunr library
*/
elasticlunr.utils = {};
/**
* Print a warning message to the console.
*
* @param {String} message The message to be printed.
* @memberOf Utils
*/
elasticlunr.utils.warn = (function (global) {
return function (message) {
if (global.console && console.warn) {
console.warn(message);
}
};
})(this);
/**
* Convert an object to string.
*
* In the case of `null` and `undefined` the function returns
* an empty string, in all other cases the result of calling
* `toString` on the passed object is returned.
*
* @param {object} obj The object to convert to a string.
* @return {String} string representation of the passed object.
* @memberOf Utils
*/
elasticlunr.utils.toString = function (obj) {
if (obj === void 0 || obj === null) {
return "";
}
return obj.toString();
};
/*!
* elasticlunr.EventEmitter
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.EventEmitter is an event emitter for elasticlunr.
* It manages adding and removing event handlers and triggering events and their handlers.
*
* Each event could has multiple corresponding functions,
* these functions will be called as the sequence that they are added into the event.
*
* @constructor
*/
elasticlunr.EventEmitter = function () {
this.events = {};
};
/**
* Binds a handler function to a specific event(s).
*
* Can bind a single function to many different events in one call.
*
* @param {String} [eventName] The name(s) of events to bind this function to.
* @param {Function} fn The function to call when an event is fired.
* @memberOf EventEmitter
*/
elasticlunr.EventEmitter.prototype.addListener = function () {
var args = Array.prototype.slice.call(arguments),
fn = args.pop(),
names = args;
if (typeof fn !== "function") throw new TypeError ("last argument must be a function");
names.forEach(function (name) {
if (!this.hasHandler(name)) this.events[name] = [];
this.events[name].push(fn);
}, this);
};
/**
* Removes a handler function from a specific event.
*
* @param {String} eventName The name of the event to remove this function from.
* @param {Function} fn The function to remove from an event.
* @memberOf EventEmitter
*/
elasticlunr.EventEmitter.prototype.removeListener = function (name, fn) {
if (!this.hasHandler(name)) return;
var fnIndex = this.events[name].indexOf(fn);
if (fnIndex === -1) return;
this.events[name].splice(fnIndex, 1);
if (this.events[name].length == 0) delete this.events[name];
};
/**
* Call all functions that bounded to the given event.
*
* Additional data can be passed to the event handler as arguments to `emit`
* after the event name.
*
* @param {String} eventName The name of the event to emit.
* @memberOf EventEmitter
*/
elasticlunr.EventEmitter.prototype.emit = function (name) {
if (!this.hasHandler(name)) return;
var args = Array.prototype.slice.call(arguments, 1);
this.events[name].forEach(function (fn) {
fn.apply(undefined, args);
}, this);
};
/**
* Checks whether a handler has ever been stored against an event.
*
* @param {String} eventName The name of the event to check.
* @private
* @memberOf EventEmitter
*/
elasticlunr.EventEmitter.prototype.hasHandler = function (name) {
return name in this.events;
};
/*!
* elasticlunr.tokenizer
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* A function for splitting a string into tokens.
* Currently English is supported as default.
* Uses `elasticlunr.tokenizer.seperator` to split strings, you could change
* the value of this property to set how you want strings are split into tokens.
* IMPORTANT: use elasticlunr.tokenizer.seperator carefully, if you are not familiar with
* text process, then you'd better not change it.
*
* @module
* @param {String} str The string that you want to tokenize.
* @see elasticlunr.tokenizer.seperator
* @return {Array}
*/
elasticlunr.tokenizer = function (str) {
if (!arguments.length || str === null || str === undefined) return [];
if (Array.isArray(str)) {
var arr = str.filter(function(token) {
if (token === null || token === undefined) {
return false;
}
return true;
});
arr = arr.map(function (t) {
return elasticlunr.utils.toString(t).toLowerCase();
});
var out = [];
arr.forEach(function(item) {
var tokens = item.split(elasticlunr.tokenizer.seperator);
out = out.concat(tokens);
}, this);
return out;
}
return str.toString().trim().toLowerCase().split(elasticlunr.tokenizer.seperator);
};
/**
* Default string seperator.
*/
elasticlunr.tokenizer.defaultSeperator = /[\s\-]+/;
/**
* The sperator used to split a string into tokens. Override this property to change the behaviour of
* `elasticlunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
*
* @static
* @see elasticlunr.tokenizer
*/
elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator;
/**
* Set up customized string seperator
*
* @param {Object} sep The customized seperator that you want to use to tokenize a string.
*/
elasticlunr.tokenizer.setSeperator = function(sep) {
if (sep !== null && sep !== undefined && typeof(sep) === 'object') {
elasticlunr.tokenizer.seperator = sep;
}
}
/**
* Reset string seperator
*
*/
elasticlunr.tokenizer.resetSeperator = function() {
elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator;
}
/**
* Get string seperator
*
*/
elasticlunr.tokenizer.getSeperator = function() {
return elasticlunr.tokenizer.seperator;
}
/*!
* elasticlunr.Pipeline
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.Pipelines maintain an ordered list of functions to be applied to
* both documents tokens and query tokens.
*
* An instance of elasticlunr.Index will contain a pipeline
* with a trimmer, a stop word filter, an English stemmer. Extra
* functions can be added before or after either of these functions or these
* default functions can be removed.
*
* When run the pipeline, it will call each function in turn.
*
* The output of the functions in the pipeline will be passed to the next function
* in the pipeline. To exclude a token from entering the index the function
* should return undefined, the rest of the pipeline will not be called with
* this token.
*
* For serialisation of pipelines to work, all functions used in an instance of
* a pipeline should be registered with elasticlunr.Pipeline. Registered functions can
* then be loaded. If trying to load a serialised pipeline that uses functions
* that are not registered an error will be thrown.
*
* If not planning on serialising the pipeline then registering pipeline functions
* is not necessary.
*
* @constructor
*/
elasticlunr.Pipeline = function () {
this._queue = [];
};
elasticlunr.Pipeline.registeredFunctions = {};
/**
* Register a function in the pipeline.
*
* Functions that are used in the pipeline should be registered if the pipeline
* needs to be serialised, or a serialised pipeline needs to be loaded.
*
* Registering a function does not add it to a pipeline, functions must still be
* added to instances of the pipeline for them to be used when running a pipeline.
*
* @param {Function} fn The function to register.
* @param {String} label The label to register this function with
* @memberOf Pipeline
*/
elasticlunr.Pipeline.registerFunction = function (fn, label) {
if (label in elasticlunr.Pipeline.registeredFunctions) {
elasticlunr.utils.warn('Overwriting existing registered function: ' + label);
}
fn.label = label;
elasticlunr.Pipeline.registeredFunctions[label] = fn;
};
/**
* Get a registered function in the pipeline.
*
* @param {String} label The label of registered function.
* @return {Function}
* @memberOf Pipeline
*/
elasticlunr.Pipeline.getRegisteredFunction = function (label) {
if ((label in elasticlunr.Pipeline.registeredFunctions) !== true) {
return null;
}
return elasticlunr.Pipeline.registeredFunctions[label];
};
/**
* Warns if the function is not registered as a Pipeline function.
*
* @param {Function} fn The function to check for.
* @private
* @memberOf Pipeline
*/
elasticlunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
var isRegistered = fn.label && (fn.label in this.registeredFunctions);
if (!isRegistered) {
elasticlunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn);
}
};
/**
* Loads a previously serialised pipeline.
*
* All functions to be loaded must already be registered with elasticlunr.Pipeline.
* If any function from the serialised data has not been registered then an
* error will be thrown.
*
* @param {Object} serialised The serialised pipeline to load.
* @return {elasticlunr.Pipeline}
* @memberOf Pipeline
*/
elasticlunr.Pipeline.load = function (serialised) {
var pipeline = new elasticlunr.Pipeline;
serialised.forEach(function (fnName) {
var fn = elasticlunr.Pipeline.getRegisteredFunction(fnName);
if (fn) {
pipeline.add(fn);
} else {
throw new Error('Cannot load un-registered function: ' + fnName);
}
});
return pipeline;
};
/**
* Adds new functions to the end of the pipeline.
*
* Logs a warning if the function has not been registered.
*
* @param {Function} functions Any number of functions to add to the pipeline.
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.add = function () {
var fns = Array.prototype.slice.call(arguments);
fns.forEach(function (fn) {
elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn);
this._queue.push(fn);
}, this);
};
/**
* Adds a single function after a function that already exists in the
* pipeline.
*
* Logs a warning if the function has not been registered.
* If existingFn is not found, throw an Exception.
*
* @param {Function} existingFn A function that already exists in the pipeline.
* @param {Function} newFn The new function to add to the pipeline.
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.after = function (existingFn, newFn) {
elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn);
var pos = this._queue.indexOf(existingFn);
if (pos === -1) {
throw new Error('Cannot find existingFn');
}
this._queue.splice(pos + 1, 0, newFn);
};
/**
* Adds a single function before a function that already exists in the
* pipeline.
*
* Logs a warning if the function has not been registered.
* If existingFn is not found, throw an Exception.
*
* @param {Function} existingFn A function that already exists in the pipeline.
* @param {Function} newFn The new function to add to the pipeline.
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.before = function (existingFn, newFn) {
elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn);
var pos = this._queue.indexOf(existingFn);
if (pos === -1) {
throw new Error('Cannot find existingFn');
}
this._queue.splice(pos, 0, newFn);
};
/**
* Removes a function from the pipeline.
*
* @param {Function} fn The function to remove from the pipeline.
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.remove = function (fn) {
var pos = this._queue.indexOf(fn);
if (pos === -1) {
return;
}
this._queue.splice(pos, 1);
};
/**
* Runs the current list of functions that registered in the pipeline against the
* input tokens.
*
* @param {Array} tokens The tokens to run through the pipeline.
* @return {Array}
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.run = function (tokens) {
var out = [],
tokenLength = tokens.length,
pipelineLength = this._queue.length;
for (var i = 0; i < tokenLength; i++) {
var token = tokens[i];
for (var j = 0; j < pipelineLength; j++) {
token = this._queue[j](token, i, tokens);
if (token === void 0 || token === null) break;
};
if (token !== void 0 && token !== null) out.push(token);
};
return out;
};
/**
* Resets the pipeline by removing any existing processors.
*
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.reset = function () {
this._queue = [];
};
/**
* Get the pipeline if user want to check the pipeline.
*
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.get = function () {
return this._queue;
};
/**
* Returns a representation of the pipeline ready for serialisation.
* Only serialize pipeline function's name. Not storing function, so when
* loading the archived JSON index file, corresponding pipeline function is
* added by registered function of elasticlunr.Pipeline.registeredFunctions
*
* Logs a warning if the function has not been registered.
*
* @return {Array}
* @memberOf Pipeline
*/
elasticlunr.Pipeline.prototype.toJSON = function () {
return this._queue.map(function (fn) {
elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn);
return fn.label;
});
};
/*!
* elasticlunr.Index
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.Index is object that manages a search index. It contains the indexes
* and stores all the tokens and document lookups. It also provides the main
* user facing API for the library.
*
* @constructor
*/
elasticlunr.Index = function () {
this._fields = [];
this._ref = 'id';
this.pipeline = new elasticlunr.Pipeline;
this.documentStore = new elasticlunr.DocumentStore;
this.index = {};
this.eventEmitter = new elasticlunr.EventEmitter;
this._idfCache = {};
this.on('add', 'remove', 'update', (function () {
this._idfCache = {};
}).bind(this));
};
/**
* Bind a handler to events being emitted by the index.
*
* The handler can be bound to many events at the same time.
*
* @param {String} [eventName] The name(s) of events to bind the function to.
* @param {Function} fn The serialised set to load.
* @memberOf Index
*/
elasticlunr.Index.prototype.on = function () {
var args = Array.prototype.slice.call(arguments);
return this.eventEmitter.addListener.apply(this.eventEmitter, args);
};
/**
* Removes a handler from an event being emitted by the index.
*
* @param {String} eventName The name of events to remove the function from.
* @param {Function} fn The serialised set to load.
* @memberOf Index
*/
elasticlunr.Index.prototype.off = function (name, fn) {
return this.eventEmitter.removeListener(name, fn);
};
/**
* Loads a previously serialised index.
*
* Issues a warning if the index being imported was serialised
* by a different version of elasticlunr.
*
* @param {Object} serialisedData The serialised set to load.
* @return {elasticlunr.Index}
* @memberOf Index
*/
elasticlunr.Index.load = function (serialisedData) {
if (serialisedData.version !== elasticlunr.version) {
elasticlunr.utils.warn('version mismatch: current '
+ elasticlunr.version + ' importing ' + serialisedData.version);
}
var idx = new this;
idx._fields = serialisedData.fields;
idx._ref = serialisedData.ref;
idx.documentStore = elasticlunr.DocumentStore.load(serialisedData.documentStore);
idx.pipeline = elasticlunr.Pipeline.load(serialisedData.pipeline);
idx.index = {};
for (var field in serialisedData.index) {
idx.index[field] = elasticlunr.InvertedIndex.load(serialisedData.index[field]);
}
return idx;
};
/**
* Adds a field to the list of fields that will be searchable within documents in the index.
*
* Remember that inner index is build based on field, which means each field has one inverted index.
*
* Fields should be added before any documents are added to the index, fields
* that are added after documents are added to the index will only apply to new
* documents added to the index.
*
* @param {String} fieldName The name of the field within the document that should be indexed
* @return {elasticlunr.Index}
* @memberOf Index
*/
elasticlunr.Index.prototype.addField = function (fieldName) {
this._fields.push(fieldName);
this.index[fieldName] = new elasticlunr.InvertedIndex;
return this;
};
/**
* Sets the property used to uniquely identify documents added to the index,
* by default this property is 'id'.
*
* This should only be changed before adding documents to the index, changing
* the ref property without resetting the index can lead to unexpected results.
*
* @param {String} refName The property to use to uniquely identify the
* documents in the index.
* @param {Boolean} emitEvent Whether to emit add events, defaults to true
* @return {elasticlunr.Index}
* @memberOf Index
*/
elasticlunr.Index.prototype.setRef = function (refName) {
this._ref = refName;
return this;
};
/**
*
* Set if the JSON format original documents are save into elasticlunr.DocumentStore
*
* Defaultly save all the original JSON documents.
*
* @param {Boolean} save Whether to save the original JSON documents.
* @return {elasticlunr.Index}
* @memberOf Index
*/
elasticlunr.Index.prototype.saveDocument = function (save) {
this.documentStore = new elasticlunr.DocumentStore(save);
return this;
};
/**
* Add a JSON format document to the index.
*
* This is the way new documents enter the index, this function will run the
* fields from the document through the index's pipeline and then add it to
* the index, it will then show up in search results.
*
* An 'add' event is emitted with the document that has been added and the index
* the document has been added to. This event can be silenced by passing false
* as the second argument to add.
*
* @param {Object} doc The JSON format document to add to the index.
* @param {Boolean} emitEvent Whether or not to emit events, default true.
* @memberOf Index
*/
elasticlunr.Index.prototype.addDoc = function (doc, emitEvent) {
if (!doc) return;
var emitEvent = emitEvent === undefined ? true : emitEvent;
var docRef = doc[this._ref];
this.documentStore.addDoc(docRef, doc);
this._fields.forEach(function (field) {
var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field]));
this.documentStore.addFieldLength(docRef, field, fieldTokens.length);
var tokenCount = {};
fieldTokens.forEach(function (token) {
if (token in tokenCount) tokenCount[token] += 1;
else tokenCount[token] = 1;
}, this);
for (var token in tokenCount) {
var termFrequency = tokenCount[token];
termFrequency = Math.sqrt(termFrequency);
this.index[field].addToken(token, { ref: docRef, tf: termFrequency });
}
}, this);
if (emitEvent) this.eventEmitter.emit('add', doc, this);
};
/**
* Removes a document from the index by doc ref.
*
* To make sure documents no longer show up in search results they can be
* removed from the index using this method.
*
* A 'remove' event is emitted with the document that has been removed and the index
* the document has been removed from. This event can be silenced by passing false
* as the second argument to remove.
*
* If user setting DocumentStore not storing the documents, then remove doc by docRef is not allowed.
*
* @param {String|Integer} docRef The document ref to remove from the index.
* @param {Boolean} emitEvent Whether to emit remove events, defaults to true
* @memberOf Index
*/
elasticlunr.Index.prototype.removeDocByRef = function (docRef, emitEvent) {
if (!docRef) return;
if (this.documentStore.isDocStored() === false) {
return;
}
if (!this.documentStore.hasDoc(docRef)) return;
var doc = this.documentStore.getDoc(docRef);
this.removeDoc(doc, false);
};
/**
* Removes a document from the index.
* This remove operation could work even the original doc is not store in the DocumentStore.
*
* To make sure documents no longer show up in search results they can be
* removed from the index using this method.
*
* A 'remove' event is emitted with the document that has been removed and the index
* the document has been removed from. This event can be silenced by passing false
* as the second argument to remove.
*
*
* @param {Object} doc The document ref to remove from the index.
* @param {Boolean} emitEvent Whether to emit remove events, defaults to true
* @memberOf Index
*/
elasticlunr.Index.prototype.removeDoc = function (doc, emitEvent) {
if (!doc) return;
var emitEvent = emitEvent === undefined ? true : emitEvent;
var docRef = doc[this._ref];
if (!this.documentStore.hasDoc(docRef)) return;
this.documentStore.removeDoc(docRef);
this._fields.forEach(function (field) {
var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field]));
fieldTokens.forEach(function (token) {
this.index[field].removeToken(token, docRef);
}, this);
}, this);
if (emitEvent) this.eventEmitter.emit('remove', doc, this);
};
/**
* Updates a document in the index.
*
* When a document contained within the index gets updated, fields changed,
* added or removed, to make sure it correctly matched against search queries,
* it should be updated in the index.
*
* This method is just a wrapper around `remove` and `add`
*
* An 'update' event is emitted with the document that has been updated and the index.
* This event can be silenced by passing false as the second argument to update. Only
* an update event will be fired, the 'add' and 'remove' events of the underlying calls
* are silenced.
*
* @param {Object} doc The document to update in the index.
* @param {Boolean} emitEvent Whether to emit update events, defaults to true
* @see Index.prototype.remove
* @see Index.prototype.add
* @memberOf Index
*/
elasticlunr.Index.prototype.updateDoc = function (doc, emitEvent) {
var emitEvent = emitEvent === undefined ? true : emitEvent;
this.removeDocByRef(doc[this._ref], false);
this.addDoc(doc, false);
if (emitEvent) this.eventEmitter.emit('update', doc, this);
};
/**
* Calculates the inverse document frequency for a token within the index of a field.
*
* @param {String} token The token to calculate the idf of.
* @param {String} field The field to compute idf.
* @see Index.prototype.idf
* @private
* @memberOf Index
*/
elasticlunr.Index.prototype.idf = function (term, field) {
var cacheKey = "@" + field + '/' + term;
if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey];
var df = this.index[field].getDocFreq(term);
var idf = 1 + Math.log(this.documentStore.length / (df + 1));
this._idfCache[cacheKey] = idf;
return idf;
};
/**
* get fields of current index instance
*
* @return {Array}
*/
elasticlunr.Index.prototype.getFields = function () {
return this._fields.slice();
};
/**
* Searches the index using the passed query.
* Queries should be a string, multiple words are allowed.
*
* If config is null, will search all fields defaultly, and lead to OR based query.
* If config is specified, will search specified with query time boosting.
*
* All query tokens are passed through the same pipeline that document tokens
* are passed through, so any language processing involved will be run on every
* query term.
*
* Each query term is expanded, so that the term 'he' might be expanded to
* 'hello' and 'help' if those terms were already included in the index.
*
* Matching documents are returned as an array of objects, each object contains
* the matching document ref, as set for this index, and the similarity score
* for this document against the query.
*
* @param {String} query The query to search the index with.
* @param {JSON} userConfig The user query config, JSON format.
* @return {Object}
* @see Index.prototype.idf
* @see Index.prototype.documentVector
* @memberOf Index
*/
elasticlunr.Index.prototype.search = function (query, userConfig) {
if (!query) return [];
if (typeof query === 'string') {
query = {any: query};
} else {
query = JSON.parse(JSON.stringify(query));
}
var configStr = null;
if (userConfig != null) {
configStr = JSON.stringify(userConfig);
}
var config = new elasticlunr.Configuration(configStr, this.getFields()).get();
var queryTokens = {};
var queryFields = Object.keys(query);
for (var i = 0; i < queryFields.length; i++) {
var key = queryFields[i];
queryTokens[key] = this.pipeline.run(elasticlunr.tokenizer(query[key]));
}
var queryResults = {};
for (var field in config) {
var tokens = queryTokens[field] || queryTokens.any;
if (!tokens) {
continue;
}
var fieldSearchResults = this.fieldSearch(tokens, field, config);
var fieldBoost = config[field].boost;
for (var docRef in fieldSearchResults) {
fieldSearchResults[docRef] = fieldSearchResults[docRef] * fieldBoost;
}
for (var docRef in fieldSearchResults) {
if (docRef in queryResults) {
queryResults[docRef] += fieldSearchResults[docRef];
} else {
queryResults[docRef] = fieldSearchResults[docRef];
}
}
}
var results = [];
var result;
for (var docRef in queryResults) {
result = {ref: docRef, score: queryResults[docRef]};
if (this.documentStore.hasDoc(docRef)) {
result.doc = this.documentStore.getDoc(docRef);
}
results.push(result);
}
results.sort(function (a, b) { return b.score - a.score; });
return results;
};
/**
* search queryTokens in specified field.
*
* @param {Array} queryTokens The query tokens to query in this field.
* @param {String} field Field to query in.
* @param {elasticlunr.Configuration} config The user query config, JSON format.
* @return {Object}
*/
elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) {
var booleanType = config[fieldName].bool;
var expand = config[fieldName].expand;
var boost = config[fieldName].boost;
var scores = null;
var docTokens = {};
// Do nothing if the boost is 0
if (boost === 0) {
return;
}
queryTokens.forEach(function (token) {
var tokens = [token];
if (expand == true) {
tokens = this.index[fieldName].expandToken(token);
}
// Consider every query token in turn. If expanded, each query token
// corresponds to a set of tokens, which is all tokens in the
// index matching the pattern queryToken* .
// For the set of tokens corresponding to a query token, find and score
// all matching documents. Store those scores in queryTokenScores,
// keyed by docRef.
// Then, depending on the value of booleanType, combine the scores
// for this query token with previous scores. If booleanType is OR,
// then merge the scores by summing into the accumulated total, adding
// new document scores are required (effectively a union operator).
// If booleanType is AND, accumulate scores only if the document
// has previously been scored by another query token (an intersection
// operation0.
// Furthermore, since when booleanType is AND, additional
// query tokens can't add new documents to the result set, use the
// current document set to limit the processing of each new query
// token for efficiency (i.e., incremental intersection).
var queryTokenScores = {};
tokens.forEach(function (key) {
var docs = this.index[fieldName].getDocs(key);
var idf = this.idf(key, fieldName);
if (scores && booleanType == 'AND') {
// special case, we can rule out documents that have been
// already been filtered out because they weren't scored
// by previous query token passes.
var filteredDocs = {};
for (var docRef in scores) {
if (docRef in docs) {
filteredDocs[docRef] = docs[docRef];
}
}
docs = filteredDocs;
}
// only record appeared token for retrieved documents for the
// original token, not for expaned token.
// beause for doing coordNorm for a retrieved document, coordNorm only care how many
// query token appear in that document.
// so expanded token should not be added into docTokens, if added, this will pollute the
// coordNorm
if (key == token) {
this.fieldSearchStats(docTokens, key, docs);
}
for (var docRef in docs) {
var tf = this.index[fieldName].getTermFrequency(key, docRef);
var fieldLength = this.documentStore.getFieldLength(docRef, fieldName);
var fieldLengthNorm = 1;
if (fieldLength != 0) {
fieldLengthNorm = 1 / Math.sqrt(fieldLength);
}
var penality = 1;
if (key != token) {
// currently I'm not sure if this penality is enough,
// need to do verification
penality = (1 - (key.length - token.length) / key.length) * 0.15;
}
var score = tf * idf * fieldLengthNorm * penality;
if (docRef in queryTokenScores) {
queryTokenScores[docRef] += score;
} else {
queryTokenScores[docRef] = score;
}
}
}, this);
scores = this.mergeScores(scores, queryTokenScores, booleanType);
}, this);
scores = this.coordNorm(scores, docTokens, queryTokens.length);
return scores;
};
/**
* Merge the scores from one set of tokens into an accumulated score table.
* Exact operation depends on the op parameter. If op is 'AND', then only the
* intersection of the two score lists is retained. Otherwise, the union of
* the two score lists is returned. For internal use only.
*
* @param {Object} bool accumulated scores. Should be null on first call.
* @param {String} scores new scores to merge into accumScores.
* @param {Object} op merge operation (should be 'AND' or 'OR').
*
*/
elasticlunr.Index.prototype.mergeScores = function (accumScores, scores, op) {
if (!accumScores) {
return scores;
}
if (op == 'AND') {
var intersection = {};
for (var docRef in scores) {
if (docRef in accumScores) {
intersection[docRef] = accumScores[docRef] + scores[docRef];
}
}
return intersection;
} else {
for (var docRef in scores) {
if (docRef in accumScores) {
accumScores[docRef] += scores[docRef];
} else {
accumScores[docRef] = scores[docRef];
}
}
return accumScores;
}
};
/**
* Record the occuring query token of retrieved doc specified by doc field.
* Only for inner user.
*
* @param {Object} docTokens a data structure stores which token appears in the retrieved doc.
* @param {String} token query token
* @param {Object} docs the retrieved documents of the query token
*
*/
elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs) {
for (var doc in docs) {
if (doc in docTokens) {
docTokens[doc].push(token);
} else {
docTokens[doc] = [token];
}
}
};
/**
* coord norm the score of a doc.
* if a doc contain more query tokens, then the score will larger than the doc
* contains less query tokens.
*
* only for inner use.
*
* @param {Object} results first results
* @param {Object} docs field search results of a token
* @param {Integer} n query token number
* @return {Object}
*/
elasticlunr.Index.prototype.coordNorm = function (scores, docTokens, n) {
for (var doc in scores) {
if (!(doc in docTokens)) continue;
var tokens = docTokens[doc].length;
scores[doc] = scores[doc] * tokens / n;
}
return scores;
};
/**
* Returns a representation of the index ready for serialisation.
*
* @return {Object}
* @memberOf Index
*/
elasticlunr.Index.prototype.toJSON = function () {
var indexJson = {};
this._fields.forEach(function (field) {
indexJson[field] = this.index[field].toJSON();
}, this);
return {
version: elasticlunr.version,
fields: this._fields,
ref: this._ref,
documentStore: this.documentStore.toJSON(),
index: indexJson,
pipeline: this.pipeline.toJSON()
};
};
/**
* Applies a plugin to the current index.
*
* A plugin is a function that is called with the index as its context.
* Plugins can be used to customise or extend the behaviour the index
* in some way. A plugin is just a function, that encapsulated the custom
* behaviour that should be applied to the index.
*
* The plugin function will be called with the index as its argument, additional
* arguments can also be passed when calling use. The function will be called
* with the index as its context.
*
* Example:
*
* var myPlugin = function (idx, arg1, arg2) {
* // `this` is the index to be extended
* // apply any extensions etc here.
* }
*
* var idx = elasticlunr(function () {
* this.use(myPlugin, 'arg1', 'arg2')
* })
*
* @param {Function} plugin The plugin to apply.
* @memberOf Index
*/
elasticlunr.Index.prototype.use = function (plugin) {
var args = Array.prototype.slice.call(arguments, 1);
args.unshift(this);
plugin.apply(this, args);
};
/*!
* elasticlunr.DocumentStore
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.DocumentStore is a simple key-value document store used for storing sets of tokens for
* documents stored in index.
*
* elasticlunr.DocumentStore store original JSON format documents that you could build search snippet by this original JSON document.
*
* user could choose whether original JSON format document should be store, if no configuration then document will be stored defaultly.
* If user care more about the index size, user could select not store JSON documents, then this will has some defects, such as user
* could not use JSON document to generate snippets of search results.
*
* @param {Boolean} save If the original JSON document should be stored.
* @constructor
* @module
*/
elasticlunr.DocumentStore = function (save) {
if (save === null || save === undefined) {
this._save = true;
} else {
this._save = save;
}
this.docs = {};
this.docInfo = {};
this.length = 0;
};
/**
* Loads a previously serialised document store
*
* @param {Object} serialisedData The serialised document store to load.
* @return {elasticlunr.DocumentStore}
*/
elasticlunr.DocumentStore.load = function (serialisedData) {
var store = new this;
store.length = serialisedData.length;
store.docs = serialisedData.docs;
store.docInfo = serialisedData.docInfo;
store._save = serialisedData.save;
return store;
};
/**
* check if current instance store the original doc
*
* @return {Boolean}
*/
elasticlunr.DocumentStore.prototype.isDocStored = function () {
return this._save;
};
/**
* Stores the given doc in the document store against the given id.
* If docRef already exist, then update doc.
*
* Document is store by original JSON format, then you could use original document to generate search snippets.
*
* @param {Integer|String} docRef The key used to store the JSON format doc.
* @param {Object} doc The JSON format doc.
*/
elasticlunr.DocumentStore.prototype.addDoc = function (docRef, doc) {
if (!this.hasDoc(docRef)) this.length++;
if (this._save === true) {
this.docs[docRef] = clone(doc);
} else {
this.docs[docRef] = null;
}
};
/**
* Retrieves the JSON doc from the document store for a given key.
*
* If docRef not found, return null.
* If user set not storing the documents, return null.
*
* @param {Integer|String} docRef The key to lookup and retrieve from the document store.
* @return {Object}
* @memberOf DocumentStore
*/
elasticlunr.DocumentStore.prototype.getDoc = function (docRef) {
if (this.hasDoc(docRef) === false) return null;
return this.docs[docRef];
};
/**
* Checks whether the document store contains a key (docRef).
*
* @param {Integer|String} docRef The id to look up in the document store.
* @return {Boolean}
* @memberOf DocumentStore
*/
elasticlunr.DocumentStore.prototype.hasDoc = function (docRef) {
return docRef in this.docs;
};
/**
* Removes the value for a key in the document store.
*
* @param {Integer|String} docRef The id to remove from the document store.
* @memberOf DocumentStore
*/
elasticlunr.DocumentStore.prototype.removeDoc = function (docRef) {
if (!this.hasDoc(docRef)) return;
delete this.docs[docRef];
delete this.docInfo[docRef];
this.length--;
};
/**
* Add field length of a document's field tokens from pipeline results.
* The field length of a document is used to do field length normalization even without the original JSON document stored.
*
* @param {Integer|String} docRef document's id or reference
* @param {String} fieldName field name
* @param {Integer} length field length
*/
elasticlunr.DocumentStore.prototype.addFieldLength = function (docRef, fieldName, length) {
if (docRef === null || docRef === undefined) return;
if (this.hasDoc(docRef) == false) return;
if (!this.docInfo[docRef]) this.docInfo[docRef] = {};
this.docInfo[docRef][fieldName] = length;
};
/**
* Update field length of a document's field tokens from pipeline results.
* The field length of a document is used to do field length normalization even without the original JSON document stored.
*
* @param {Integer|String} docRef document's id or reference
* @param {String} fieldName field name
* @param {Integer} length field length
*/
elasticlunr.DocumentStore.prototype.updateFieldLength = function (docRef, fieldName, length) {
if (docRef === null || docRef === undefined) return;
if (this.hasDoc(docRef) == false) return;
this.addFieldLength(docRef, fieldName, length);
};
/**
* get field length of a document by docRef
*
* @param {Integer|String} docRef document id or reference
* @param {String} fieldName field name
* @return {Integer} field length
*/
elasticlunr.DocumentStore.prototype.getFieldLength = function (docRef, fieldName) {
if (docRef === null || docRef === undefined) return 0;
if (!(docRef in this.docs)) return 0;
if (!(fieldName in this.docInfo[docRef])) return 0;
return this.docInfo[docRef][fieldName];
};
/**
* Returns a JSON representation of the document store used for serialisation.
*
* @return {Object} JSON format
* @memberOf DocumentStore
*/
elasticlunr.DocumentStore.prototype.toJSON = function () {
return {
docs: this.docs,
docInfo: this.docInfo,
length: this.length,
save: this._save
};
};
/**
* Cloning object
*
* @param {Object} object in JSON format
* @return {Object} copied object
*/
function clone(obj) {
if (null === obj || "object" !== typeof obj) return obj;
var copy = obj.constructor();
for (var attr in obj) {
if (obj.hasOwnProperty(attr)) copy[attr] = obj[attr];
}
return copy;
}
/*!
* elasticlunr.stemmer
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
* Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
*/
/**
* elasticlunr.stemmer is an english language stemmer, this is a JavaScript
* implementation of the PorterStemmer taken from http://tartarus.org/~martin
*
* @module
* @param {String} str The string to stem
* @return {String}
* @see elasticlunr.Pipeline
*/
elasticlunr.stemmer = (function(){
var step2list = {
"ational" : "ate",
"tional" : "tion",
"enci" : "ence",
"anci" : "ance",
"izer" : "ize",
"bli" : "ble",
"alli" : "al",
"entli" : "ent",
"eli" : "e",
"ousli" : "ous",
"ization" : "ize",
"ation" : "ate",
"ator" : "ate",
"alism" : "al",
"iveness" : "ive",
"fulness" : "ful",
"ousness" : "ous",
"aliti" : "al",
"iviti" : "ive",
"biliti" : "ble",
"logi" : "log"
},
step3list = {
"icate" : "ic",
"ative" : "",
"alize" : "al",
"iciti" : "ic",
"ical" : "ic",
"ful" : "",
"ness" : ""
},
c = "[^aeiou]", // consonant
v = "[aeiouy]", // vowel
C = c + "[^aeiouy]*", // consonant sequence
V = v + "[aeiou]*", // vowel sequence
mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
s_v = "^(" + C + ")?" + v; // vowel in stem
var re_mgr0 = new RegExp(mgr0);
var re_mgr1 = new RegExp(mgr1);
var re_meq1 = new RegExp(meq1);
var re_s_v = new RegExp(s_v);
var re_1a = /^(.+?)(ss|i)es$/;
var re2_1a = /^(.+?)([^s])s$/;
var re_1b = /^(.+?)eed$/;
var re2_1b = /^(.+?)(ed|ing)$/;
var re_1b_2 = /.$/;
var re2_1b_2 = /(at|bl|iz)$/;
var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
var re_1c = /^(.+?[^aeiou])y$/;
var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
var re2_4 = /^(.+?)(s|t)(ion)$/;
var re_5 = /^(.+?)e$/;
var re_5_1 = /ll$/;
var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
var porterStemmer = function porterStemmer(w) {
var stem,
suffix,
firstch,
re,
re2,
re3,
re4;
if (w.length < 3) { return w; }
firstch = w.substr(0,1);
if (firstch == "y") {
w = firstch.toUpperCase() + w.substr(1);
}
// Step 1a
re = re_1a
re2 = re2_1a;
if (re.test(w)) { w = w.replace(re,"$1$2"); }
else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
// Step 1b
re = re_1b;
re2 = re2_1b;
if (re.test(w)) {
var fp = re.exec(w);
re = re_mgr0;
if (re.test(fp[1])) {
re = re_1b_2;
w = w.replace(re,"");
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = re_s_v;
if (re2.test(stem)) {
w = stem;
re2 = re2_1b_2;
re3 = re3_1b_2;
re4 = re4_1b_2;
if (re2.test(w)) { w = w + "e"; }
else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
else if (re4.test(w)) { w = w + "e"; }
}
}
// Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
re = re_1c;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
w = stem + "i";
}
// Step 2
re = re_2;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = re_mgr0;
if (re.test(stem)) {
w = stem + step2list[suffix];
}
}
// Step 3
re = re_3;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = re_mgr0;
if (re.test(stem)) {
w = stem + step3list[suffix];
}
}
// Step 4
re = re_4;
re2 = re2_4;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = re_mgr1;
if (re.test(stem)) {
w = stem;
}
} else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = re_mgr1;
if (re2.test(stem)) {
w = stem;
}
}
// Step 5
re = re_5;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = re_mgr1;
re2 = re_meq1;
re3 = re3_5;
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
w = stem;
}
}
re = re_5_1;
re2 = re_mgr1;
if (re.test(w) && re2.test(w)) {
re = re_1b_2;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y") {
w = firstch.toLowerCase() + w.substr(1);
}
return w;
};
return porterStemmer;
})();
elasticlunr.Pipeline.registerFunction(elasticlunr.stemmer, 'stemmer');
/*!
* elasticlunr.stopWordFilter
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.stopWordFilter is an English language stop words filter, any words
* contained in the stop word list will not be passed through the filter.
*
* This is intended to be used in the Pipeline. If the token does not pass the
* filter then undefined will be returned.
* Currently this StopwordFilter using dictionary to do O(1) time complexity stop word filtering.
*
* @module
* @param {String} token The token to pass through the filter
* @return {String}
* @see elasticlunr.Pipeline
*/
elasticlunr.stopWordFilter = function (token) {
if (token && elasticlunr.stopWordFilter.stopWords[token] !== true) {
return token;
}
};
/**
* Remove predefined stop words
* if user want to use customized stop words, user could use this function to delete
* all predefined stopwords.
*
* @return {null}
*/
elasticlunr.clearStopWords = function () {
elasticlunr.stopWordFilter.stopWords = {};
};
/**
* Add customized stop words
* user could use this function to add customized stop words
*
* @params {Array} words customized stop words
* @return {null}
*/
elasticlunr.addStopWords = function (words) {
if (words == null || Array.isArray(words) === false) return;
words.forEach(function (word) {
elasticlunr.stopWordFilter.stopWords[word] = true;
}, this);
};
/**
* Reset to default stop words
* user could use this function to restore default stop words
*
* @return {null}
*/
elasticlunr.resetStopWords = function () {
elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords;
};
elasticlunr.defaultStopWords = {
"": true,
"a": true,
"able": true,
"about": true,
"across": true,
"after": true,
"all": true,
"almost": true,
"also": true,
"am": true,
"among": true,
"an": true,
"and": true,
"any": true,
"are": true,
"as": true,
"at": true,
"be": true,
"because": true,
"been": true,
"but": true,
"by": true,
"can": true,
"cannot": true,
"could": true,
"dear": true,
"did": true,
"do": true,
"does": true,
"either": true,
"else": true,
"ever": true,
"every": true,
"for": true,
"from": true,
"get": true,
"got": true,
"had": true,
"has": true,
"have": true,
"he": true,
"her": true,
"hers": true,
"him": true,
"his": true,
"how": true,
"however": true,
"i": true,
"if": true,
"in": true,
"into": true,
"is": true,
"it": true,
"its": true,
"just": true,
"least": true,
"let": true,
"like": true,
"likely": true,
"may": true,
"me": true,
"might": true,
"most": true,
"must": true,
"my": true,
"neither": true,
"no": true,
"nor": true,
"not": true,
"of": true,
"off": true,
"often": true,
"on": true,
"only": true,
"or": true,
"other": true,
"our": true,
"own": true,
"rather": true,
"said": true,
"say": true,
"says": true,
"she": true,
"should": true,
"since": true,
"so": true,
"some": true,
"than": true,
"that": true,
"the": true,
"their": true,
"them": true,
"then": true,
"there": true,
"these": true,
"they": true,
"this": true,
"tis": true,
"to": true,
"too": true,
"twas": true,
"us": true,
"wants": true,
"was": true,
"we": true,
"were": true,
"what": true,
"when": true,
"where": true,
"which": true,
"while": true,
"who": true,
"whom": true,
"why": true,
"will": true,
"with": true,
"would": true,
"yet": true,
"you": true,
"your": true
};
elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords;
elasticlunr.Pipeline.registerFunction(elasticlunr.stopWordFilter, 'stopWordFilter');
/*!
* elasticlunr.trimmer
* Copyright (C) 2017 Oliver Nightingale
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.trimmer is a pipeline function for trimming non word
* characters from the begining and end of tokens before they
* enter the index.
*
* This implementation may not work correctly for non latin
* characters and should either be removed or adapted for use
* with languages with non-latin characters.
*
* @module
* @param {String} token The token to pass through the filter
* @return {String}
* @see elasticlunr.Pipeline
*/
elasticlunr.trimmer = function (token) {
if (token === null || token === undefined) {
throw new Error('token should not be undefined');
}
return token
.replace(/^\W+/, '')
.replace(/\W+$/, '');
};
elasticlunr.Pipeline.registerFunction(elasticlunr.trimmer, 'trimmer');
/*!
* elasticlunr.InvertedIndex
* Copyright (C) 2017 Wei Song
* Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
*/
/**
* elasticlunr.InvertedIndex is used for efficiently storing and
* lookup of documents that contain a given token.
*
* @constructor
*/
elasticlunr.InvertedIndex = function () {
this.root = { docs: {}, df: 0 };
};
/**
* Loads a previously serialised inverted index.
*
* @param {Object} serialisedData The serialised inverted index to load.
* @return {elasticlunr.InvertedIndex}
*/
elasticlunr.InvertedIndex.load = function (serialisedData) {
var idx = new this;
idx.root = serialisedData.root;
return idx;
};
/**
* Adds a {token: tokenInfo} pair to the inverted index.
* If the token already exist, then update the tokenInfo.
*
* tokenInfo format: { ref: 1, tf: 2}
* tokenInfor should contains the document's ref and the tf(token frequency) of that token in
* the document.
*
* By default this function starts at the root of the current inverted index, however
* it can start at any node of the inverted index if required.
*
* @param {String} token
* @param {Object} tokenInfo format: { ref: 1, tf: 2}
* @param {Object} root An optional node at which to start looking for the
* correct place to enter the doc, by default the root of this elasticlunr.InvertedIndex
* is used.
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.addToken = function (token, tokenInfo, root) {
var root = root || this.root,
idx = 0;
while (idx <= token.length - 1) {
var key = token[idx];
if (!(key in root)) root[key] = {docs: {}, df: 0};
idx += 1;
root = root[key];
}
var docRef = tokenInfo.ref;
if (!root.docs[docRef]) {
// if this doc not exist, then add this doc
root.docs[docRef] = {tf: tokenInfo.tf};
root.df += 1;
} else {
// if this doc already exist, then update tokenInfo
root.docs[docRef] = {tf: tokenInfo.tf};
}
};
/**
* Checks whether a token is in this elasticlunr.InvertedIndex.
*
*
* @param {String} token The token to be checked
* @return {Boolean}
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.hasToken = function (token) {
if (!token) return false;
var node = this.root;
for (var i = 0; i < token.length; i++) {
if (!node[token[i]]) return false;
node = node[token[i]];
}
return true;
};
/**
* Retrieve a node from the inverted index for a given token.
* If token not found in this InvertedIndex, return null.
*
*
* @param {String} token The token to get the node for.
* @return {Object}
* @see InvertedIndex.prototype.get
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.getNode = function (token) {
if (!token) return null;
var node = this.root;
for (var i = 0; i < token.length; i++) {
if (!node[token[i]]) return null;
node = node[token[i]];
}
return node;
};
/**
* Retrieve the documents of a given token.
* If token not found, return {}.
*
*
* @param {String} token The token to get the documents for.
* @return {Object}
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.getDocs = function (token) {
var node = this.getNode(token);
if (node == null) {
return {};
}
return node.docs;
};
/**
* Retrieve term frequency of given token in given docRef.
* If token or docRef not found, return 0.
*
*
* @param {String} token The token to get the documents for.
* @param {String|Integer} docRef
* @return {Integer}
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.getTermFrequency = function (token, docRef) {
var node = this.getNode(token);
if (node == null) {
return 0;
}
if (!(docRef in node.docs)) {
return 0;
}
return node.docs[docRef].tf;
};
/**
* Retrieve the document frequency of given token.
* If token not found, return 0.
*
*
* @param {String} token The token to get the documents for.
* @return {Object}
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.getDocFreq = function (token) {
var node = this.getNode(token);
if (node == null) {
return 0;
}
return node.df;
};
/**
* Remove the document identified by document's ref from the token in the inverted index.
*
*
* @param {String} token Remove the document from which token.
* @param {String} ref The ref of the document to remove from given token.
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.removeToken = function (token, ref) {
if (!token) return;
var node = this.getNode(token);
if (node == null) return;
if (ref in node.docs) {
delete node.docs[ref];
node.df -= 1;
}
};
/**
* Find all the possible suffixes of given token using tokens currently in the inverted index.
* If token not found, return empty Array.
*
* @param {String} token The token to expand.
* @return {Array}
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.expandToken = function (token, memo, root) {
if (token == null || token == '') return [];
var memo = memo || [];
if (root == void 0) {
root = this.getNode(token);
if (root == null) return memo;
}
if (root.df > 0) memo.push(token);
for (var key in root) {
if (key === 'docs') continue;
if (key === 'df') continue;
this.expandToken(token + key, memo, root[key]);
}
return memo;
};
/**
* Returns a representation of the inverted index ready for serialisation.
*
* @return {Object}
* @memberOf InvertedIndex
*/
elasticlunr.InvertedIndex.prototype.toJSON = function () {
return {
root: this.root
};
};
/*!
* elasticlunr.Configuration
* Copyright (C) 2017 Wei Song
*/
/**
* elasticlunr.Configuration is used to analyze the user search configuration.
*
* By elasticlunr.Configuration user could set query-time boosting, boolean model in each field.
*
* Currently configuration supports:
* 1. query-time boosting, user could set how to boost each field.
* 2. boolean model chosing, user could choose which boolean model to use for each field.
* 3. token expandation, user could set token expand to True to improve Recall. Default is False.
*
* Query time boosting must be configured by field category, "boolean" model could be configured
* by both field category or globally as the following example. Field configuration for "boolean"
* will overwrite global configuration.
* Token expand could be configured both by field category or golbally. Local field configuration will
* overwrite global configuration.
*
* configuration example:
* {
* fields:{
* title: {boost: 2},
* body: {boost: 1}
* },
* bool: "OR"
* }
*
* "bool" field configuation overwrite global configuation example:
* {
* fields:{
* title: {boost: 2, bool: "AND"},
* body: {boost: 1}
* },
* bool: "OR"
* }
*
* "expand" example:
* {
* fields:{
* title: {boost: 2, bool: "AND"},
* body: {boost: 1}
* },
* bool: "OR",
* expand: true
* }
*
* "expand" example for field category:
* {
* fields:{
* title: {boost: 2, bool: "AND", expand: true},
* body: {boost: 1}
* },
* bool: "OR"
* }
*
* setting the boost to 0 ignores the field (this will only search the title):
* {
* fields:{
* title: {boost: 1},
* body: {boost: 0}
* }
* }
*
* then, user could search with configuration to do query-time boosting.
* idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}});
*
*
* @constructor
*
* @param {String} config user configuration
* @param {Array} fields fields of index instance
* @module
*/
elasticlunr.Configuration = function (config, fields) {
var config = config || '';
if (fields == undefined || fields == null) {
throw new Error('fields should not be null');
}
this.config = {};
var userConfig;
try {
userConfig = JSON.parse(config);
this.buildUserConfig(userConfig, fields);
} catch (error) {
elasticlunr.utils.warn('user configuration parse failed, will use default configuration');
this.buildDefaultConfig(fields);
}
};
/**
* Build default search configuration.
*
* @param {Array} fields fields of index instance
*/
elasticlunr.Configuration.prototype.buildDefaultConfig = function (fields) {
this.reset();
fields.forEach(function (field) {
this.config[field] = {
boost: 1,
bool: "OR",
expand: false
};
}, this);
};
/**
* Build user configuration.
*
* @param {JSON} config User JSON configuratoin
* @param {Array} fields fields of index instance
*/
elasticlunr.Configuration.prototype.buildUserConfig = function (config, fields) {
var global_bool = "OR";
var global_expand = false;
this.reset();
if ('bool' in config) {
global_bool = config['bool'] || global_bool;
}
if ('expand' in config) {
global_expand = config['expand'] || global_expand;
}
if ('fields' in config) {
for (var field in config['fields']) {
if (fields.indexOf(field) > -1) {
var field_config = config['fields'][field];
var field_expand = global_expand;
if (field_config.expand != undefined) {
field_expand = field_config.expand;
}
this.config[field] = {
boost: (field_config.boost || field_config.boost === 0) ? field_config.boost : 1,
bool: field_config.bool || global_bool,
expand: field_expand
};
} else {
elasticlunr.utils.warn('field name in user configuration not found in index instance fields');
}
}
} else {
this.addAllFields2UserConfig(global_bool, global_expand, fields);
}
};
/**
* Add all fields to user search configuration.
*
* @param {String} bool Boolean model
* @param {String} expand Expand model
* @param {Array} fields fields of index instance
*/
elasticlunr.Configuration.prototype.addAllFields2UserConfig = function (bool, expand, fields) {
fields.forEach(function (field) {
this.config[field] = {
boost: 1,
bool: bool,
expand: expand
};
}, this);
};
/**
* get current user configuration
*/
elasticlunr.Configuration.prototype.get = function () {
return this.config;
};
/**
* reset user search configuration.
*/
elasticlunr.Configuration.prototype.reset = function () {
this.config = {};
};
/**
* sorted_set.js is added only to make elasticlunr.js compatible with lunr-languages.
* if elasticlunr.js support different languages by default, this will make elasticlunr.js
* much bigger that not good for browser usage.
*
*/
/*!
* lunr.SortedSet
* Copyright (C) 2017 Oliver Nightingale
*/
/**
* lunr.SortedSets are used to maintain an array of uniq values in a sorted
* order.
*
* @constructor
*/
lunr.SortedSet = function () {
this.length = 0
this.elements = []
}
/**
* Loads a previously serialised sorted set.
*
* @param {Array} serialisedData The serialised set to load.
* @returns {lunr.SortedSet}
* @memberOf SortedSet
*/
lunr.SortedSet.load = function (serialisedData) {
var set = new this
set.elements = serialisedData
set.length = serialisedData.length
return set
}
/**
* Inserts new items into the set in the correct position to maintain the
* order.
*
* @param {Object} The objects to add to this set.
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.add = function () {
var i, element
for (i = 0; i < arguments.length; i++) {
element = arguments[i]
if (~this.indexOf(element)) continue
this.elements.splice(this.locationFor(element), 0, element)
}
this.length = this.elements.length
}
/**
* Converts this sorted set into an array.
*
* @returns {Array}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.toArray = function () {
return this.elements.slice()
}
/**
* Creates a new array with the results of calling a provided function on every
* element in this sorted set.
*
* Delegates to Array.prototype.map and has the same signature.
*
* @param {Function} fn The function that is called on each element of the
* set.
* @param {Object} ctx An optional object that can be used as the context
* for the function fn.
* @returns {Array}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.map = function (fn, ctx) {
return this.elements.map(fn, ctx)
}
/**
* Executes a provided function once per sorted set element.
*
* Delegates to Array.prototype.forEach and has the same signature.
*
* @param {Function} fn The function that is called on each element of the
* set.
* @param {Object} ctx An optional object that can be used as the context
* @memberOf SortedSet
* for the function fn.
*/
lunr.SortedSet.prototype.forEach = function (fn, ctx) {
return this.elements.forEach(fn, ctx)
}
/**
* Returns the index at which a given element can be found in the
* sorted set, or -1 if it is not present.
*
* @param {Object} elem The object to locate in the sorted set.
* @returns {Number}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.indexOf = function (elem) {
var start = 0,
end = this.elements.length,
sectionLength = end - start,
pivot = start + Math.floor(sectionLength / 2),
pivotElem = this.elements[pivot]
while (sectionLength > 1) {
if (pivotElem === elem) return pivot
if (pivotElem < elem) start = pivot
if (pivotElem > elem) end = pivot
sectionLength = end - start
pivot = start + Math.floor(sectionLength / 2)
pivotElem = this.elements[pivot]
}
if (pivotElem === elem) return pivot
return -1
}
/**
* Returns the position within the sorted set that an element should be
* inserted at to maintain the current order of the set.
*
* This function assumes that the element to search for does not already exist
* in the sorted set.
*
* @param {Object} elem The elem to find the position for in the set
* @returns {Number}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.locationFor = function (elem) {
var start = 0,
end = this.elements.length,
sectionLength = end - start,
pivot = start + Math.floor(sectionLength / 2),
pivotElem = this.elements[pivot]
while (sectionLength > 1) {
if (pivotElem < elem) start = pivot
if (pivotElem > elem) end = pivot
sectionLength = end - start
pivot = start + Math.floor(sectionLength / 2)
pivotElem = this.elements[pivot]
}
if (pivotElem > elem) return pivot
if (pivotElem < elem) return pivot + 1
}
/**
* Creates a new lunr.SortedSet that contains the elements in the intersection
* of this set and the passed set.
*
* @param {lunr.SortedSet} otherSet The set to intersect with this set.
* @returns {lunr.SortedSet}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.intersect = function (otherSet) {
var intersectSet = new lunr.SortedSet,
i = 0, j = 0,
a_len = this.length, b_len = otherSet.length,
a = this.elements, b = otherSet.elements
while (true) {
if (i > a_len - 1 || j > b_len - 1) break
if (a[i] === b[j]) {
intersectSet.add(a[i])
i++, j++
continue
}
if (a[i] < b[j]) {
i++
continue
}
if (a[i] > b[j]) {
j++
continue
}
};
return intersectSet
}
/**
* Makes a copy of this set
*
* @returns {lunr.SortedSet}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.clone = function () {
var clone = new lunr.SortedSet
clone.elements = this.toArray()
clone.length = clone.elements.length
return clone
}
/**
* Creates a new lunr.SortedSet that contains the elements in the union
* of this set and the passed set.
*
* @param {lunr.SortedSet} otherSet The set to union with this set.
* @returns {lunr.SortedSet}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.union = function (otherSet) {
var longSet, shortSet, unionSet
if (this.length >= otherSet.length) {
longSet = this, shortSet = otherSet
} else {
longSet = otherSet, shortSet = this
}
unionSet = longSet.clone()
for(var i = 0, shortSetElements = shortSet.toArray(); i < shortSetElements.length; i++){
unionSet.add(shortSetElements[i])
}
return unionSet
}
/**
* Returns a representation of the sorted set ready for serialisation.
*
* @returns {Array}
* @memberOf SortedSet
*/
lunr.SortedSet.prototype.toJSON = function () {
return this.toArray()
}
/**
* export the module via AMD, CommonJS or as a browser global
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
*/
;(function (root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(factory)
} else if (typeof exports === 'object') {
/**
* Node. Does not work with strict CommonJS, but
* only CommonJS-like enviroments that support module.exports,
* like Node.
*/
module.exports = factory()
} else {
// Browser globals (root is window)
root.elasticlunr = factory()
}
}(this, function () {
/**
* Just return a value to define the module export.
* This example returns an object, but the module
* can return a function as the exported value.
*/
return elasticlunr
}))
})();