diff --git a/.gitignore b/.gitignore index 9244199..ec98b51 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__ _scratch .env +keys.py diff --git a/elasticlunr.js b/elasticlunr.js deleted file mode 100644 index e43b0cd..0000000 --- a/elasticlunr.js +++ /dev/null @@ -1,2507 +0,0 @@ -/** - * elasticlunr - http://weixsong.github.io - * Lightweight full-text search engine in Javascript for browser search and offline search. - 0.9.5 - * - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - * MIT Licensed - * @license - */ - -(function(){ - -/*! - * elasticlunr.js - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * Convenience function for instantiating a new elasticlunr index and configuring it - * with the default pipeline functions and the passed config function. - * - * When using this convenience function a new index will be created with the - * following functions already in the pipeline: - * - * 1. elasticlunr.trimmer - trim non-word character - * 2. elasticlunr.StopWordFilter - filters out any stop words before they enter the - * index - * 3. elasticlunr.stemmer - stems the tokens before entering the index. - * - * - * Example: - * - * var idx = elasticlunr(function () { - * this.addField('id'); - * this.addField('title'); - * this.addField('body'); - * - * //this.setRef('id'); // default ref is 'id' - * - * this.pipeline.add(function () { - * // some custom pipeline function - * }); - * }); - * - * idx.addDoc({ - * id: 1, - * title: 'Oracle released database 12g', - * body: 'Yestaday, Oracle has released their latest database, named 12g, more robust. this product will increase Oracle profit.' - * }); - * - * idx.addDoc({ - * id: 2, - * title: 'Oracle released annual profit report', - * body: 'Yestaday, Oracle has released their annual profit report of 2015, total profit is 12.5 Billion.' - * }); - * - * # simple search - * idx.search('oracle database'); - * - * # search with query-time boosting - * idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}}); - * - * @param {Function} config A function that will be called with the new instance - * of the elasticlunr.Index as both its context and first parameter. It can be used to - * customize the instance of new elasticlunr.Index. - * @namespace - * @module - * @return {elasticlunr.Index} - * - */ -var elasticlunr = function (config) { - var idx = new elasticlunr.Index; - - idx.pipeline.add( - elasticlunr.trimmer, - elasticlunr.stopWordFilter, - elasticlunr.stemmer - ); - - if (config) config.call(idx, idx); - - return idx; -}; - -elasticlunr.version = "0.9.5"; - -// only used this to make elasticlunr.js compatible with lunr-languages -// this is a trick to define a global alias of elasticlunr -lunr = elasticlunr; - -/*! - * elasticlunr.utils - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * A namespace containing utils for the rest of the elasticlunr library - */ -elasticlunr.utils = {}; - -/** - * Print a warning message to the console. - * - * @param {String} message The message to be printed. - * @memberOf Utils - */ -elasticlunr.utils.warn = (function (global) { - return function (message) { - if (global.console && console.warn) { - console.warn(message); - } - }; -})(this); - -/** - * Convert an object to string. - * - * In the case of `null` and `undefined` the function returns - * an empty string, in all other cases the result of calling - * `toString` on the passed object is returned. - * - * @param {object} obj The object to convert to a string. - * @return {String} string representation of the passed object. - * @memberOf Utils - */ -elasticlunr.utils.toString = function (obj) { - if (obj === void 0 || obj === null) { - return ""; - } - - return obj.toString(); -}; -/*! - * elasticlunr.EventEmitter - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * elasticlunr.EventEmitter is an event emitter for elasticlunr. - * It manages adding and removing event handlers and triggering events and their handlers. - * - * Each event could has multiple corresponding functions, - * these functions will be called as the sequence that they are added into the event. - * - * @constructor - */ -elasticlunr.EventEmitter = function () { - this.events = {}; -}; - -/** - * Binds a handler function to a specific event(s). - * - * Can bind a single function to many different events in one call. - * - * @param {String} [eventName] The name(s) of events to bind this function to. - * @param {Function} fn The function to call when an event is fired. - * @memberOf EventEmitter - */ -elasticlunr.EventEmitter.prototype.addListener = function () { - var args = Array.prototype.slice.call(arguments), - fn = args.pop(), - names = args; - - if (typeof fn !== "function") throw new TypeError ("last argument must be a function"); - - names.forEach(function (name) { - if (!this.hasHandler(name)) this.events[name] = []; - this.events[name].push(fn); - }, this); -}; - -/** - * Removes a handler function from a specific event. - * - * @param {String} eventName The name of the event to remove this function from. - * @param {Function} fn The function to remove from an event. - * @memberOf EventEmitter - */ -elasticlunr.EventEmitter.prototype.removeListener = function (name, fn) { - if (!this.hasHandler(name)) return; - - var fnIndex = this.events[name].indexOf(fn); - if (fnIndex === -1) return; - - this.events[name].splice(fnIndex, 1); - - if (this.events[name].length == 0) delete this.events[name]; -}; - -/** - * Call all functions that bounded to the given event. - * - * Additional data can be passed to the event handler as arguments to `emit` - * after the event name. - * - * @param {String} eventName The name of the event to emit. - * @memberOf EventEmitter - */ -elasticlunr.EventEmitter.prototype.emit = function (name) { - if (!this.hasHandler(name)) return; - - var args = Array.prototype.slice.call(arguments, 1); - - this.events[name].forEach(function (fn) { - fn.apply(undefined, args); - }, this); -}; - -/** - * Checks whether a handler has ever been stored against an event. - * - * @param {String} eventName The name of the event to check. - * @private - * @memberOf EventEmitter - */ -elasticlunr.EventEmitter.prototype.hasHandler = function (name) { - return name in this.events; -}; -/*! - * elasticlunr.tokenizer - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * A function for splitting a string into tokens. - * Currently English is supported as default. - * Uses `elasticlunr.tokenizer.seperator` to split strings, you could change - * the value of this property to set how you want strings are split into tokens. - * IMPORTANT: use elasticlunr.tokenizer.seperator carefully, if you are not familiar with - * text process, then you'd better not change it. - * - * @module - * @param {String} str The string that you want to tokenize. - * @see elasticlunr.tokenizer.seperator - * @return {Array} - */ -elasticlunr.tokenizer = function (str) { - if (!arguments.length || str === null || str === undefined) return []; - if (Array.isArray(str)) { - var arr = str.filter(function(token) { - if (token === null || token === undefined) { - return false; - } - - return true; - }); - - arr = arr.map(function (t) { - return elasticlunr.utils.toString(t).toLowerCase(); - }); - - var out = []; - arr.forEach(function(item) { - var tokens = item.split(elasticlunr.tokenizer.seperator); - out = out.concat(tokens); - }, this); - - return out; - } - - return str.toString().trim().toLowerCase().split(elasticlunr.tokenizer.seperator); -}; - -/** - * Default string seperator. - */ -elasticlunr.tokenizer.defaultSeperator = /[\s\-]+/; - -/** - * The sperator used to split a string into tokens. Override this property to change the behaviour of - * `elasticlunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. - * - * @static - * @see elasticlunr.tokenizer - */ -elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator; - -/** - * Set up customized string seperator - * - * @param {Object} sep The customized seperator that you want to use to tokenize a string. - */ -elasticlunr.tokenizer.setSeperator = function(sep) { - if (sep !== null && sep !== undefined && typeof(sep) === 'object') { - elasticlunr.tokenizer.seperator = sep; - } -} - -/** - * Reset string seperator - * - */ -elasticlunr.tokenizer.resetSeperator = function() { - elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator; -} - -/** - * Get string seperator - * - */ -elasticlunr.tokenizer.getSeperator = function() { - return elasticlunr.tokenizer.seperator; -} -/*! - * elasticlunr.Pipeline - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * elasticlunr.Pipelines maintain an ordered list of functions to be applied to - * both documents tokens and query tokens. - * - * An instance of elasticlunr.Index will contain a pipeline - * with a trimmer, a stop word filter, an English stemmer. Extra - * functions can be added before or after either of these functions or these - * default functions can be removed. - * - * When run the pipeline, it will call each function in turn. - * - * The output of the functions in the pipeline will be passed to the next function - * in the pipeline. To exclude a token from entering the index the function - * should return undefined, the rest of the pipeline will not be called with - * this token. - * - * For serialisation of pipelines to work, all functions used in an instance of - * a pipeline should be registered with elasticlunr.Pipeline. Registered functions can - * then be loaded. If trying to load a serialised pipeline that uses functions - * that are not registered an error will be thrown. - * - * If not planning on serialising the pipeline then registering pipeline functions - * is not necessary. - * - * @constructor - */ -elasticlunr.Pipeline = function () { - this._queue = []; -}; - -elasticlunr.Pipeline.registeredFunctions = {}; - -/** - * Register a function in the pipeline. - * - * Functions that are used in the pipeline should be registered if the pipeline - * needs to be serialised, or a serialised pipeline needs to be loaded. - * - * Registering a function does not add it to a pipeline, functions must still be - * added to instances of the pipeline for them to be used when running a pipeline. - * - * @param {Function} fn The function to register. - * @param {String} label The label to register this function with - * @memberOf Pipeline - */ -elasticlunr.Pipeline.registerFunction = function (fn, label) { - if (label in elasticlunr.Pipeline.registeredFunctions) { - elasticlunr.utils.warn('Overwriting existing registered function: ' + label); - } - - fn.label = label; - elasticlunr.Pipeline.registeredFunctions[label] = fn; -}; - -/** - * Get a registered function in the pipeline. - * - * @param {String} label The label of registered function. - * @return {Function} - * @memberOf Pipeline - */ -elasticlunr.Pipeline.getRegisteredFunction = function (label) { - if ((label in elasticlunr.Pipeline.registeredFunctions) !== true) { - return null; - } - - return elasticlunr.Pipeline.registeredFunctions[label]; -}; - -/** - * Warns if the function is not registered as a Pipeline function. - * - * @param {Function} fn The function to check for. - * @private - * @memberOf Pipeline - */ -elasticlunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { - var isRegistered = fn.label && (fn.label in this.registeredFunctions); - - if (!isRegistered) { - elasticlunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn); - } -}; - -/** - * Loads a previously serialised pipeline. - * - * All functions to be loaded must already be registered with elasticlunr.Pipeline. - * If any function from the serialised data has not been registered then an - * error will be thrown. - * - * @param {Object} serialised The serialised pipeline to load. - * @return {elasticlunr.Pipeline} - * @memberOf Pipeline - */ -elasticlunr.Pipeline.load = function (serialised) { - var pipeline = new elasticlunr.Pipeline; - - serialised.forEach(function (fnName) { - var fn = elasticlunr.Pipeline.getRegisteredFunction(fnName); - - if (fn) { - pipeline.add(fn); - } else { - throw new Error('Cannot load un-registered function: ' + fnName); - } - }); - - return pipeline; -}; - -/** - * Adds new functions to the end of the pipeline. - * - * Logs a warning if the function has not been registered. - * - * @param {Function} functions Any number of functions to add to the pipeline. - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.add = function () { - var fns = Array.prototype.slice.call(arguments); - - fns.forEach(function (fn) { - elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn); - this._queue.push(fn); - }, this); -}; - -/** - * Adds a single function after a function that already exists in the - * pipeline. - * - * Logs a warning if the function has not been registered. - * If existingFn is not found, throw an Exception. - * - * @param {Function} existingFn A function that already exists in the pipeline. - * @param {Function} newFn The new function to add to the pipeline. - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.after = function (existingFn, newFn) { - elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn); - - var pos = this._queue.indexOf(existingFn); - if (pos === -1) { - throw new Error('Cannot find existingFn'); - } - - this._queue.splice(pos + 1, 0, newFn); -}; - -/** - * Adds a single function before a function that already exists in the - * pipeline. - * - * Logs a warning if the function has not been registered. - * If existingFn is not found, throw an Exception. - * - * @param {Function} existingFn A function that already exists in the pipeline. - * @param {Function} newFn The new function to add to the pipeline. - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.before = function (existingFn, newFn) { - elasticlunr.Pipeline.warnIfFunctionNotRegistered(newFn); - - var pos = this._queue.indexOf(existingFn); - if (pos === -1) { - throw new Error('Cannot find existingFn'); - } - - this._queue.splice(pos, 0, newFn); -}; - -/** - * Removes a function from the pipeline. - * - * @param {Function} fn The function to remove from the pipeline. - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.remove = function (fn) { - var pos = this._queue.indexOf(fn); - if (pos === -1) { - return; - } - - this._queue.splice(pos, 1); -}; - -/** - * Runs the current list of functions that registered in the pipeline against the - * input tokens. - * - * @param {Array} tokens The tokens to run through the pipeline. - * @return {Array} - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.run = function (tokens) { - var out = [], - tokenLength = tokens.length, - pipelineLength = this._queue.length; - - for (var i = 0; i < tokenLength; i++) { - var token = tokens[i]; - - for (var j = 0; j < pipelineLength; j++) { - token = this._queue[j](token, i, tokens); - if (token === void 0 || token === null) break; - }; - - if (token !== void 0 && token !== null) out.push(token); - }; - - return out; -}; - -/** - * Resets the pipeline by removing any existing processors. - * - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.reset = function () { - this._queue = []; -}; - - /** - * Get the pipeline if user want to check the pipeline. - * - * @memberOf Pipeline - */ - elasticlunr.Pipeline.prototype.get = function () { - return this._queue; - }; - -/** - * Returns a representation of the pipeline ready for serialisation. - * Only serialize pipeline function's name. Not storing function, so when - * loading the archived JSON index file, corresponding pipeline function is - * added by registered function of elasticlunr.Pipeline.registeredFunctions - * - * Logs a warning if the function has not been registered. - * - * @return {Array} - * @memberOf Pipeline - */ -elasticlunr.Pipeline.prototype.toJSON = function () { - return this._queue.map(function (fn) { - elasticlunr.Pipeline.warnIfFunctionNotRegistered(fn); - return fn.label; - }); -}; -/*! - * elasticlunr.Index - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * elasticlunr.Index is object that manages a search index. It contains the indexes - * and stores all the tokens and document lookups. It also provides the main - * user facing API for the library. - * - * @constructor - */ -elasticlunr.Index = function () { - this._fields = []; - this._ref = 'id'; - this.pipeline = new elasticlunr.Pipeline; - this.documentStore = new elasticlunr.DocumentStore; - this.index = {}; - this.eventEmitter = new elasticlunr.EventEmitter; - this._idfCache = {}; - - this.on('add', 'remove', 'update', (function () { - this._idfCache = {}; - }).bind(this)); -}; - -/** - * Bind a handler to events being emitted by the index. - * - * The handler can be bound to many events at the same time. - * - * @param {String} [eventName] The name(s) of events to bind the function to. - * @param {Function} fn The serialised set to load. - * @memberOf Index - */ -elasticlunr.Index.prototype.on = function () { - var args = Array.prototype.slice.call(arguments); - return this.eventEmitter.addListener.apply(this.eventEmitter, args); -}; - -/** - * Removes a handler from an event being emitted by the index. - * - * @param {String} eventName The name of events to remove the function from. - * @param {Function} fn The serialised set to load. - * @memberOf Index - */ -elasticlunr.Index.prototype.off = function (name, fn) { - return this.eventEmitter.removeListener(name, fn); -}; - -/** - * Loads a previously serialised index. - * - * Issues a warning if the index being imported was serialised - * by a different version of elasticlunr. - * - * @param {Object} serialisedData The serialised set to load. - * @return {elasticlunr.Index} - * @memberOf Index - */ -elasticlunr.Index.load = function (serialisedData) { - if (serialisedData.version !== elasticlunr.version) { - elasticlunr.utils.warn('version mismatch: current ' - + elasticlunr.version + ' importing ' + serialisedData.version); - } - - var idx = new this; - - idx._fields = serialisedData.fields; - idx._ref = serialisedData.ref; - idx.documentStore = elasticlunr.DocumentStore.load(serialisedData.documentStore); - idx.pipeline = elasticlunr.Pipeline.load(serialisedData.pipeline); - idx.index = {}; - for (var field in serialisedData.index) { - idx.index[field] = elasticlunr.InvertedIndex.load(serialisedData.index[field]); - } - - return idx; -}; - -/** - * Adds a field to the list of fields that will be searchable within documents in the index. - * - * Remember that inner index is build based on field, which means each field has one inverted index. - * - * Fields should be added before any documents are added to the index, fields - * that are added after documents are added to the index will only apply to new - * documents added to the index. - * - * @param {String} fieldName The name of the field within the document that should be indexed - * @return {elasticlunr.Index} - * @memberOf Index - */ -elasticlunr.Index.prototype.addField = function (fieldName) { - this._fields.push(fieldName); - this.index[fieldName] = new elasticlunr.InvertedIndex; - return this; -}; - -/** - * Sets the property used to uniquely identify documents added to the index, - * by default this property is 'id'. - * - * This should only be changed before adding documents to the index, changing - * the ref property without resetting the index can lead to unexpected results. - * - * @param {String} refName The property to use to uniquely identify the - * documents in the index. - * @param {Boolean} emitEvent Whether to emit add events, defaults to true - * @return {elasticlunr.Index} - * @memberOf Index - */ -elasticlunr.Index.prototype.setRef = function (refName) { - this._ref = refName; - return this; -}; - -/** - * - * Set if the JSON format original documents are save into elasticlunr.DocumentStore - * - * Defaultly save all the original JSON documents. - * - * @param {Boolean} save Whether to save the original JSON documents. - * @return {elasticlunr.Index} - * @memberOf Index - */ -elasticlunr.Index.prototype.saveDocument = function (save) { - this.documentStore = new elasticlunr.DocumentStore(save); - return this; -}; - -/** - * Add a JSON format document to the index. - * - * This is the way new documents enter the index, this function will run the - * fields from the document through the index's pipeline and then add it to - * the index, it will then show up in search results. - * - * An 'add' event is emitted with the document that has been added and the index - * the document has been added to. This event can be silenced by passing false - * as the second argument to add. - * - * @param {Object} doc The JSON format document to add to the index. - * @param {Boolean} emitEvent Whether or not to emit events, default true. - * @memberOf Index - */ -elasticlunr.Index.prototype.addDoc = function (doc, emitEvent) { - if (!doc) return; - var emitEvent = emitEvent === undefined ? true : emitEvent; - - var docRef = doc[this._ref]; - - this.documentStore.addDoc(docRef, doc); - this._fields.forEach(function (field) { - var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field])); - this.documentStore.addFieldLength(docRef, field, fieldTokens.length); - - var tokenCount = {}; - fieldTokens.forEach(function (token) { - if (token in tokenCount) tokenCount[token] += 1; - else tokenCount[token] = 1; - }, this); - - for (var token in tokenCount) { - var termFrequency = tokenCount[token]; - termFrequency = Math.sqrt(termFrequency); - this.index[field].addToken(token, { ref: docRef, tf: termFrequency }); - } - }, this); - - if (emitEvent) this.eventEmitter.emit('add', doc, this); -}; - -/** - * Removes a document from the index by doc ref. - * - * To make sure documents no longer show up in search results they can be - * removed from the index using this method. - * - * A 'remove' event is emitted with the document that has been removed and the index - * the document has been removed from. This event can be silenced by passing false - * as the second argument to remove. - * - * If user setting DocumentStore not storing the documents, then remove doc by docRef is not allowed. - * - * @param {String|Integer} docRef The document ref to remove from the index. - * @param {Boolean} emitEvent Whether to emit remove events, defaults to true - * @memberOf Index - */ -elasticlunr.Index.prototype.removeDocByRef = function (docRef, emitEvent) { - if (!docRef) return; - if (this.documentStore.isDocStored() === false) { - return; - } - - if (!this.documentStore.hasDoc(docRef)) return; - var doc = this.documentStore.getDoc(docRef); - this.removeDoc(doc, false); -}; - -/** - * Removes a document from the index. - * This remove operation could work even the original doc is not store in the DocumentStore. - * - * To make sure documents no longer show up in search results they can be - * removed from the index using this method. - * - * A 'remove' event is emitted with the document that has been removed and the index - * the document has been removed from. This event can be silenced by passing false - * as the second argument to remove. - * - * - * @param {Object} doc The document ref to remove from the index. - * @param {Boolean} emitEvent Whether to emit remove events, defaults to true - * @memberOf Index - */ -elasticlunr.Index.prototype.removeDoc = function (doc, emitEvent) { - if (!doc) return; - - var emitEvent = emitEvent === undefined ? true : emitEvent; - - var docRef = doc[this._ref]; - if (!this.documentStore.hasDoc(docRef)) return; - - this.documentStore.removeDoc(docRef); - - this._fields.forEach(function (field) { - var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field])); - fieldTokens.forEach(function (token) { - this.index[field].removeToken(token, docRef); - }, this); - }, this); - - if (emitEvent) this.eventEmitter.emit('remove', doc, this); -}; - -/** - * Updates a document in the index. - * - * When a document contained within the index gets updated, fields changed, - * added or removed, to make sure it correctly matched against search queries, - * it should be updated in the index. - * - * This method is just a wrapper around `remove` and `add` - * - * An 'update' event is emitted with the document that has been updated and the index. - * This event can be silenced by passing false as the second argument to update. Only - * an update event will be fired, the 'add' and 'remove' events of the underlying calls - * are silenced. - * - * @param {Object} doc The document to update in the index. - * @param {Boolean} emitEvent Whether to emit update events, defaults to true - * @see Index.prototype.remove - * @see Index.prototype.add - * @memberOf Index - */ -elasticlunr.Index.prototype.updateDoc = function (doc, emitEvent) { - var emitEvent = emitEvent === undefined ? true : emitEvent; - - this.removeDocByRef(doc[this._ref], false); - this.addDoc(doc, false); - - if (emitEvent) this.eventEmitter.emit('update', doc, this); -}; - -/** - * Calculates the inverse document frequency for a token within the index of a field. - * - * @param {String} token The token to calculate the idf of. - * @param {String} field The field to compute idf. - * @see Index.prototype.idf - * @private - * @memberOf Index - */ -elasticlunr.Index.prototype.idf = function (term, field) { - var cacheKey = "@" + field + '/' + term; - if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey]; - - var df = this.index[field].getDocFreq(term); - var idf = 1 + Math.log(this.documentStore.length / (df + 1)); - this._idfCache[cacheKey] = idf; - - return idf; -}; - -/** - * get fields of current index instance - * - * @return {Array} - */ -elasticlunr.Index.prototype.getFields = function () { - return this._fields.slice(); -}; - -/** - * Searches the index using the passed query. - * Queries should be a string, multiple words are allowed. - * - * If config is null, will search all fields defaultly, and lead to OR based query. - * If config is specified, will search specified with query time boosting. - * - * All query tokens are passed through the same pipeline that document tokens - * are passed through, so any language processing involved will be run on every - * query term. - * - * Each query term is expanded, so that the term 'he' might be expanded to - * 'hello' and 'help' if those terms were already included in the index. - * - * Matching documents are returned as an array of objects, each object contains - * the matching document ref, as set for this index, and the similarity score - * for this document against the query. - * - * @param {String} query The query to search the index with. - * @param {JSON} userConfig The user query config, JSON format. - * @return {Object} - * @see Index.prototype.idf - * @see Index.prototype.documentVector - * @memberOf Index - */ -elasticlunr.Index.prototype.search = function (query, userConfig) { - if (!query) return []; - if (typeof query === 'string') { - query = {any: query}; - } else { - query = JSON.parse(JSON.stringify(query)); - } - - var configStr = null; - if (userConfig != null) { - configStr = JSON.stringify(userConfig); - } - - var config = new elasticlunr.Configuration(configStr, this.getFields()).get(); - - var queryTokens = {}; - var queryFields = Object.keys(query); - - for (var i = 0; i < queryFields.length; i++) { - var key = queryFields[i]; - - queryTokens[key] = this.pipeline.run(elasticlunr.tokenizer(query[key])); - } - - var queryResults = {}; - - for (var field in config) { - var tokens = queryTokens[field] || queryTokens.any; - if (!tokens) { - continue; - } - - var fieldSearchResults = this.fieldSearch(tokens, field, config); - var fieldBoost = config[field].boost; - - for (var docRef in fieldSearchResults) { - fieldSearchResults[docRef] = fieldSearchResults[docRef] * fieldBoost; - } - - for (var docRef in fieldSearchResults) { - if (docRef in queryResults) { - queryResults[docRef] += fieldSearchResults[docRef]; - } else { - queryResults[docRef] = fieldSearchResults[docRef]; - } - } - } - - var results = []; - var result; - for (var docRef in queryResults) { - result = {ref: docRef, score: queryResults[docRef]}; - if (this.documentStore.hasDoc(docRef)) { - result.doc = this.documentStore.getDoc(docRef); - } - results.push(result); - } - - results.sort(function (a, b) { return b.score - a.score; }); - return results; -}; - -/** - * search queryTokens in specified field. - * - * @param {Array} queryTokens The query tokens to query in this field. - * @param {String} field Field to query in. - * @param {elasticlunr.Configuration} config The user query config, JSON format. - * @return {Object} - */ -elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) { - var booleanType = config[fieldName].bool; - var expand = config[fieldName].expand; - var boost = config[fieldName].boost; - var scores = null; - var docTokens = {}; - - // Do nothing if the boost is 0 - if (boost === 0) { - return; - } - - queryTokens.forEach(function (token) { - var tokens = [token]; - if (expand == true) { - tokens = this.index[fieldName].expandToken(token); - } - // Consider every query token in turn. If expanded, each query token - // corresponds to a set of tokens, which is all tokens in the - // index matching the pattern queryToken* . - // For the set of tokens corresponding to a query token, find and score - // all matching documents. Store those scores in queryTokenScores, - // keyed by docRef. - // Then, depending on the value of booleanType, combine the scores - // for this query token with previous scores. If booleanType is OR, - // then merge the scores by summing into the accumulated total, adding - // new document scores are required (effectively a union operator). - // If booleanType is AND, accumulate scores only if the document - // has previously been scored by another query token (an intersection - // operation0. - // Furthermore, since when booleanType is AND, additional - // query tokens can't add new documents to the result set, use the - // current document set to limit the processing of each new query - // token for efficiency (i.e., incremental intersection). - - var queryTokenScores = {}; - tokens.forEach(function (key) { - var docs = this.index[fieldName].getDocs(key); - var idf = this.idf(key, fieldName); - - if (scores && booleanType == 'AND') { - // special case, we can rule out documents that have been - // already been filtered out because they weren't scored - // by previous query token passes. - var filteredDocs = {}; - for (var docRef in scores) { - if (docRef in docs) { - filteredDocs[docRef] = docs[docRef]; - } - } - docs = filteredDocs; - } - // only record appeared token for retrieved documents for the - // original token, not for expaned token. - // beause for doing coordNorm for a retrieved document, coordNorm only care how many - // query token appear in that document. - // so expanded token should not be added into docTokens, if added, this will pollute the - // coordNorm - if (key == token) { - this.fieldSearchStats(docTokens, key, docs); - } - - for (var docRef in docs) { - var tf = this.index[fieldName].getTermFrequency(key, docRef); - var fieldLength = this.documentStore.getFieldLength(docRef, fieldName); - var fieldLengthNorm = 1; - if (fieldLength != 0) { - fieldLengthNorm = 1 / Math.sqrt(fieldLength); - } - - var penality = 1; - if (key != token) { - // currently I'm not sure if this penality is enough, - // need to do verification - penality = (1 - (key.length - token.length) / key.length) * 0.15; - } - - var score = tf * idf * fieldLengthNorm * penality; - - if (docRef in queryTokenScores) { - queryTokenScores[docRef] += score; - } else { - queryTokenScores[docRef] = score; - } - } - }, this); - - scores = this.mergeScores(scores, queryTokenScores, booleanType); - }, this); - - scores = this.coordNorm(scores, docTokens, queryTokens.length); - return scores; -}; - -/** - * Merge the scores from one set of tokens into an accumulated score table. - * Exact operation depends on the op parameter. If op is 'AND', then only the - * intersection of the two score lists is retained. Otherwise, the union of - * the two score lists is returned. For internal use only. - * - * @param {Object} bool accumulated scores. Should be null on first call. - * @param {String} scores new scores to merge into accumScores. - * @param {Object} op merge operation (should be 'AND' or 'OR'). - * - */ - -elasticlunr.Index.prototype.mergeScores = function (accumScores, scores, op) { - if (!accumScores) { - return scores; - } - if (op == 'AND') { - var intersection = {}; - for (var docRef in scores) { - if (docRef in accumScores) { - intersection[docRef] = accumScores[docRef] + scores[docRef]; - } - } - return intersection; - } else { - for (var docRef in scores) { - if (docRef in accumScores) { - accumScores[docRef] += scores[docRef]; - } else { - accumScores[docRef] = scores[docRef]; - } - } - return accumScores; - } -}; - - -/** - * Record the occuring query token of retrieved doc specified by doc field. - * Only for inner user. - * - * @param {Object} docTokens a data structure stores which token appears in the retrieved doc. - * @param {String} token query token - * @param {Object} docs the retrieved documents of the query token - * - */ -elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs) { - for (var doc in docs) { - if (doc in docTokens) { - docTokens[doc].push(token); - } else { - docTokens[doc] = [token]; - } - } -}; - -/** - * coord norm the score of a doc. - * if a doc contain more query tokens, then the score will larger than the doc - * contains less query tokens. - * - * only for inner use. - * - * @param {Object} results first results - * @param {Object} docs field search results of a token - * @param {Integer} n query token number - * @return {Object} - */ -elasticlunr.Index.prototype.coordNorm = function (scores, docTokens, n) { - for (var doc in scores) { - if (!(doc in docTokens)) continue; - var tokens = docTokens[doc].length; - scores[doc] = scores[doc] * tokens / n; - } - - return scores; -}; - -/** - * Returns a representation of the index ready for serialisation. - * - * @return {Object} - * @memberOf Index - */ -elasticlunr.Index.prototype.toJSON = function () { - var indexJson = {}; - this._fields.forEach(function (field) { - indexJson[field] = this.index[field].toJSON(); - }, this); - - return { - version: elasticlunr.version, - fields: this._fields, - ref: this._ref, - documentStore: this.documentStore.toJSON(), - index: indexJson, - pipeline: this.pipeline.toJSON() - }; -}; - -/** - * Applies a plugin to the current index. - * - * A plugin is a function that is called with the index as its context. - * Plugins can be used to customise or extend the behaviour the index - * in some way. A plugin is just a function, that encapsulated the custom - * behaviour that should be applied to the index. - * - * The plugin function will be called with the index as its argument, additional - * arguments can also be passed when calling use. The function will be called - * with the index as its context. - * - * Example: - * - * var myPlugin = function (idx, arg1, arg2) { - * // `this` is the index to be extended - * // apply any extensions etc here. - * } - * - * var idx = elasticlunr(function () { - * this.use(myPlugin, 'arg1', 'arg2') - * }) - * - * @param {Function} plugin The plugin to apply. - * @memberOf Index - */ -elasticlunr.Index.prototype.use = function (plugin) { - var args = Array.prototype.slice.call(arguments, 1); - args.unshift(this); - plugin.apply(this, args); -}; -/*! - * elasticlunr.DocumentStore - * Copyright (C) 2017 Wei Song - */ - -/** - * elasticlunr.DocumentStore is a simple key-value document store used for storing sets of tokens for - * documents stored in index. - * - * elasticlunr.DocumentStore store original JSON format documents that you could build search snippet by this original JSON document. - * - * user could choose whether original JSON format document should be store, if no configuration then document will be stored defaultly. - * If user care more about the index size, user could select not store JSON documents, then this will has some defects, such as user - * could not use JSON document to generate snippets of search results. - * - * @param {Boolean} save If the original JSON document should be stored. - * @constructor - * @module - */ -elasticlunr.DocumentStore = function (save) { - if (save === null || save === undefined) { - this._save = true; - } else { - this._save = save; - } - - this.docs = {}; - this.docInfo = {}; - this.length = 0; -}; - -/** - * Loads a previously serialised document store - * - * @param {Object} serialisedData The serialised document store to load. - * @return {elasticlunr.DocumentStore} - */ -elasticlunr.DocumentStore.load = function (serialisedData) { - var store = new this; - - store.length = serialisedData.length; - store.docs = serialisedData.docs; - store.docInfo = serialisedData.docInfo; - store._save = serialisedData.save; - - return store; -}; - -/** - * check if current instance store the original doc - * - * @return {Boolean} - */ -elasticlunr.DocumentStore.prototype.isDocStored = function () { - return this._save; -}; - -/** - * Stores the given doc in the document store against the given id. - * If docRef already exist, then update doc. - * - * Document is store by original JSON format, then you could use original document to generate search snippets. - * - * @param {Integer|String} docRef The key used to store the JSON format doc. - * @param {Object} doc The JSON format doc. - */ -elasticlunr.DocumentStore.prototype.addDoc = function (docRef, doc) { - if (!this.hasDoc(docRef)) this.length++; - - if (this._save === true) { - this.docs[docRef] = clone(doc); - } else { - this.docs[docRef] = null; - } -}; - -/** - * Retrieves the JSON doc from the document store for a given key. - * - * If docRef not found, return null. - * If user set not storing the documents, return null. - * - * @param {Integer|String} docRef The key to lookup and retrieve from the document store. - * @return {Object} - * @memberOf DocumentStore - */ -elasticlunr.DocumentStore.prototype.getDoc = function (docRef) { - if (this.hasDoc(docRef) === false) return null; - return this.docs[docRef]; -}; - -/** - * Checks whether the document store contains a key (docRef). - * - * @param {Integer|String} docRef The id to look up in the document store. - * @return {Boolean} - * @memberOf DocumentStore - */ -elasticlunr.DocumentStore.prototype.hasDoc = function (docRef) { - return docRef in this.docs; -}; - -/** - * Removes the value for a key in the document store. - * - * @param {Integer|String} docRef The id to remove from the document store. - * @memberOf DocumentStore - */ -elasticlunr.DocumentStore.prototype.removeDoc = function (docRef) { - if (!this.hasDoc(docRef)) return; - - delete this.docs[docRef]; - delete this.docInfo[docRef]; - this.length--; -}; - -/** - * Add field length of a document's field tokens from pipeline results. - * The field length of a document is used to do field length normalization even without the original JSON document stored. - * - * @param {Integer|String} docRef document's id or reference - * @param {String} fieldName field name - * @param {Integer} length field length - */ -elasticlunr.DocumentStore.prototype.addFieldLength = function (docRef, fieldName, length) { - if (docRef === null || docRef === undefined) return; - if (this.hasDoc(docRef) == false) return; - - if (!this.docInfo[docRef]) this.docInfo[docRef] = {}; - this.docInfo[docRef][fieldName] = length; -}; - -/** - * Update field length of a document's field tokens from pipeline results. - * The field length of a document is used to do field length normalization even without the original JSON document stored. - * - * @param {Integer|String} docRef document's id or reference - * @param {String} fieldName field name - * @param {Integer} length field length - */ -elasticlunr.DocumentStore.prototype.updateFieldLength = function (docRef, fieldName, length) { - if (docRef === null || docRef === undefined) return; - if (this.hasDoc(docRef) == false) return; - - this.addFieldLength(docRef, fieldName, length); -}; - -/** - * get field length of a document by docRef - * - * @param {Integer|String} docRef document id or reference - * @param {String} fieldName field name - * @return {Integer} field length - */ -elasticlunr.DocumentStore.prototype.getFieldLength = function (docRef, fieldName) { - if (docRef === null || docRef === undefined) return 0; - - if (!(docRef in this.docs)) return 0; - if (!(fieldName in this.docInfo[docRef])) return 0; - return this.docInfo[docRef][fieldName]; -}; - -/** - * Returns a JSON representation of the document store used for serialisation. - * - * @return {Object} JSON format - * @memberOf DocumentStore - */ -elasticlunr.DocumentStore.prototype.toJSON = function () { - return { - docs: this.docs, - docInfo: this.docInfo, - length: this.length, - save: this._save - }; -}; - -/** - * Cloning object - * - * @param {Object} object in JSON format - * @return {Object} copied object - */ -function clone(obj) { - if (null === obj || "object" !== typeof obj) return obj; - - var copy = obj.constructor(); - - for (var attr in obj) { - if (obj.hasOwnProperty(attr)) copy[attr] = obj[attr]; - } - - return copy; -} -/*! - * elasticlunr.stemmer - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt - */ - -/** - * elasticlunr.stemmer is an english language stemmer, this is a JavaScript - * implementation of the PorterStemmer taken from http://tartarus.org/~martin - * - * @module - * @param {String} str The string to stem - * @return {String} - * @see elasticlunr.Pipeline - */ -elasticlunr.stemmer = (function(){ - var step2list = { - "ational" : "ate", - "tional" : "tion", - "enci" : "ence", - "anci" : "ance", - "izer" : "ize", - "bli" : "ble", - "alli" : "al", - "entli" : "ent", - "eli" : "e", - "ousli" : "ous", - "ization" : "ize", - "ation" : "ate", - "ator" : "ate", - "alism" : "al", - "iveness" : "ive", - "fulness" : "ful", - "ousness" : "ous", - "aliti" : "al", - "iviti" : "ive", - "biliti" : "ble", - "logi" : "log" - }, - - step3list = { - "icate" : "ic", - "ative" : "", - "alize" : "al", - "iciti" : "ic", - "ical" : "ic", - "ful" : "", - "ness" : "" - }, - - c = "[^aeiou]", // consonant - v = "[aeiouy]", // vowel - C = c + "[^aeiouy]*", // consonant sequence - V = v + "[aeiou]*", // vowel sequence - - mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 - meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 - mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 - s_v = "^(" + C + ")?" + v; // vowel in stem - - var re_mgr0 = new RegExp(mgr0); - var re_mgr1 = new RegExp(mgr1); - var re_meq1 = new RegExp(meq1); - var re_s_v = new RegExp(s_v); - - var re_1a = /^(.+?)(ss|i)es$/; - var re2_1a = /^(.+?)([^s])s$/; - var re_1b = /^(.+?)eed$/; - var re2_1b = /^(.+?)(ed|ing)$/; - var re_1b_2 = /.$/; - var re2_1b_2 = /(at|bl|iz)$/; - var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); - var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - - var re_1c = /^(.+?[^aeiou])y$/; - var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; - - var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; - - var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; - var re2_4 = /^(.+?)(s|t)(ion)$/; - - var re_5 = /^(.+?)e$/; - var re_5_1 = /ll$/; - var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); - - var porterStemmer = function porterStemmer(w) { - var stem, - suffix, - firstch, - re, - re2, - re3, - re4; - - if (w.length < 3) { return w; } - - firstch = w.substr(0,1); - if (firstch == "y") { - w = firstch.toUpperCase() + w.substr(1); - } - - // Step 1a - re = re_1a - re2 = re2_1a; - - if (re.test(w)) { w = w.replace(re,"$1$2"); } - else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } - - // Step 1b - re = re_1b; - re2 = re2_1b; - if (re.test(w)) { - var fp = re.exec(w); - re = re_mgr0; - if (re.test(fp[1])) { - re = re_1b_2; - w = w.replace(re,""); - } - } else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1]; - re2 = re_s_v; - if (re2.test(stem)) { - w = stem; - re2 = re2_1b_2; - re3 = re3_1b_2; - re4 = re4_1b_2; - if (re2.test(w)) { w = w + "e"; } - else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } - else if (re4.test(w)) { w = w + "e"; } - } - } - - // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) - re = re_1c; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - w = stem + "i"; - } - - // Step 2 - re = re_2; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = re_mgr0; - if (re.test(stem)) { - w = stem + step2list[suffix]; - } - } - - // Step 3 - re = re_3; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - suffix = fp[2]; - re = re_mgr0; - if (re.test(stem)) { - w = stem + step3list[suffix]; - } - } - - // Step 4 - re = re_4; - re2 = re2_4; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = re_mgr1; - if (re.test(stem)) { - w = stem; - } - } else if (re2.test(w)) { - var fp = re2.exec(w); - stem = fp[1] + fp[2]; - re2 = re_mgr1; - if (re2.test(stem)) { - w = stem; - } - } - - // Step 5 - re = re_5; - if (re.test(w)) { - var fp = re.exec(w); - stem = fp[1]; - re = re_mgr1; - re2 = re_meq1; - re3 = re3_5; - if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { - w = stem; - } - } - - re = re_5_1; - re2 = re_mgr1; - if (re.test(w) && re2.test(w)) { - re = re_1b_2; - w = w.replace(re,""); - } - - // and turn initial Y back to y - - if (firstch == "y") { - w = firstch.toLowerCase() + w.substr(1); - } - - return w; - }; - - return porterStemmer; -})(); - -elasticlunr.Pipeline.registerFunction(elasticlunr.stemmer, 'stemmer'); -/*! - * elasticlunr.stopWordFilter - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * elasticlunr.stopWordFilter is an English language stop words filter, any words - * contained in the stop word list will not be passed through the filter. - * - * This is intended to be used in the Pipeline. If the token does not pass the - * filter then undefined will be returned. - * Currently this StopwordFilter using dictionary to do O(1) time complexity stop word filtering. - * - * @module - * @param {String} token The token to pass through the filter - * @return {String} - * @see elasticlunr.Pipeline - */ -elasticlunr.stopWordFilter = function (token) { - if (token && elasticlunr.stopWordFilter.stopWords[token] !== true) { - return token; - } -}; - -/** - * Remove predefined stop words - * if user want to use customized stop words, user could use this function to delete - * all predefined stopwords. - * - * @return {null} - */ -elasticlunr.clearStopWords = function () { - elasticlunr.stopWordFilter.stopWords = {}; -}; - -/** - * Add customized stop words - * user could use this function to add customized stop words - * - * @params {Array} words customized stop words - * @return {null} - */ -elasticlunr.addStopWords = function (words) { - if (words == null || Array.isArray(words) === false) return; - - words.forEach(function (word) { - elasticlunr.stopWordFilter.stopWords[word] = true; - }, this); -}; - -/** - * Reset to default stop words - * user could use this function to restore default stop words - * - * @return {null} - */ -elasticlunr.resetStopWords = function () { - elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords; -}; - -elasticlunr.defaultStopWords = { - "": true, - "a": true, - "able": true, - "about": true, - "across": true, - "after": true, - "all": true, - "almost": true, - "also": true, - "am": true, - "among": true, - "an": true, - "and": true, - "any": true, - "are": true, - "as": true, - "at": true, - "be": true, - "because": true, - "been": true, - "but": true, - "by": true, - "can": true, - "cannot": true, - "could": true, - "dear": true, - "did": true, - "do": true, - "does": true, - "either": true, - "else": true, - "ever": true, - "every": true, - "for": true, - "from": true, - "get": true, - "got": true, - "had": true, - "has": true, - "have": true, - "he": true, - "her": true, - "hers": true, - "him": true, - "his": true, - "how": true, - "however": true, - "i": true, - "if": true, - "in": true, - "into": true, - "is": true, - "it": true, - "its": true, - "just": true, - "least": true, - "let": true, - "like": true, - "likely": true, - "may": true, - "me": true, - "might": true, - "most": true, - "must": true, - "my": true, - "neither": true, - "no": true, - "nor": true, - "not": true, - "of": true, - "off": true, - "often": true, - "on": true, - "only": true, - "or": true, - "other": true, - "our": true, - "own": true, - "rather": true, - "said": true, - "say": true, - "says": true, - "she": true, - "should": true, - "since": true, - "so": true, - "some": true, - "than": true, - "that": true, - "the": true, - "their": true, - "them": true, - "then": true, - "there": true, - "these": true, - "they": true, - "this": true, - "tis": true, - "to": true, - "too": true, - "twas": true, - "us": true, - "wants": true, - "was": true, - "we": true, - "were": true, - "what": true, - "when": true, - "where": true, - "which": true, - "while": true, - "who": true, - "whom": true, - "why": true, - "will": true, - "with": true, - "would": true, - "yet": true, - "you": true, - "your": true -}; - -elasticlunr.stopWordFilter.stopWords = elasticlunr.defaultStopWords; - -elasticlunr.Pipeline.registerFunction(elasticlunr.stopWordFilter, 'stopWordFilter'); -/*! - * elasticlunr.trimmer - * Copyright (C) 2017 Oliver Nightingale - * Copyright (C) 2017 Wei Song - */ - -/** - * elasticlunr.trimmer is a pipeline function for trimming non word - * characters from the begining and end of tokens before they - * enter the index. - * - * This implementation may not work correctly for non latin - * characters and should either be removed or adapted for use - * with languages with non-latin characters. - * - * @module - * @param {String} token The token to pass through the filter - * @return {String} - * @see elasticlunr.Pipeline - */ -elasticlunr.trimmer = function (token) { - if (token === null || token === undefined) { - throw new Error('token should not be undefined'); - } - - return token - .replace(/^\W+/, '') - .replace(/\W+$/, ''); -}; - -elasticlunr.Pipeline.registerFunction(elasticlunr.trimmer, 'trimmer'); -/*! - * elasticlunr.InvertedIndex - * Copyright (C) 2017 Wei Song - * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt - */ - -/** - * elasticlunr.InvertedIndex is used for efficiently storing and - * lookup of documents that contain a given token. - * - * @constructor - */ -elasticlunr.InvertedIndex = function () { - this.root = { docs: {}, df: 0 }; -}; - -/** - * Loads a previously serialised inverted index. - * - * @param {Object} serialisedData The serialised inverted index to load. - * @return {elasticlunr.InvertedIndex} - */ -elasticlunr.InvertedIndex.load = function (serialisedData) { - var idx = new this; - idx.root = serialisedData.root; - - return idx; -}; - -/** - * Adds a {token: tokenInfo} pair to the inverted index. - * If the token already exist, then update the tokenInfo. - * - * tokenInfo format: { ref: 1, tf: 2} - * tokenInfor should contains the document's ref and the tf(token frequency) of that token in - * the document. - * - * By default this function starts at the root of the current inverted index, however - * it can start at any node of the inverted index if required. - * - * @param {String} token - * @param {Object} tokenInfo format: { ref: 1, tf: 2} - * @param {Object} root An optional node at which to start looking for the - * correct place to enter the doc, by default the root of this elasticlunr.InvertedIndex - * is used. - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.addToken = function (token, tokenInfo, root) { - var root = root || this.root, - idx = 0; - - while (idx <= token.length - 1) { - var key = token[idx]; - - if (!(key in root)) root[key] = {docs: {}, df: 0}; - idx += 1; - root = root[key]; - } - - var docRef = tokenInfo.ref; - if (!root.docs[docRef]) { - // if this doc not exist, then add this doc - root.docs[docRef] = {tf: tokenInfo.tf}; - root.df += 1; - } else { - // if this doc already exist, then update tokenInfo - root.docs[docRef] = {tf: tokenInfo.tf}; - } -}; - -/** - * Checks whether a token is in this elasticlunr.InvertedIndex. - * - * - * @param {String} token The token to be checked - * @return {Boolean} - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.hasToken = function (token) { - if (!token) return false; - - var node = this.root; - - for (var i = 0; i < token.length; i++) { - if (!node[token[i]]) return false; - node = node[token[i]]; - } - - return true; -}; - -/** - * Retrieve a node from the inverted index for a given token. - * If token not found in this InvertedIndex, return null. - * - * - * @param {String} token The token to get the node for. - * @return {Object} - * @see InvertedIndex.prototype.get - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.getNode = function (token) { - if (!token) return null; - - var node = this.root; - - for (var i = 0; i < token.length; i++) { - if (!node[token[i]]) return null; - node = node[token[i]]; - } - - return node; -}; - -/** - * Retrieve the documents of a given token. - * If token not found, return {}. - * - * - * @param {String} token The token to get the documents for. - * @return {Object} - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.getDocs = function (token) { - var node = this.getNode(token); - if (node == null) { - return {}; - } - - return node.docs; -}; - -/** - * Retrieve term frequency of given token in given docRef. - * If token or docRef not found, return 0. - * - * - * @param {String} token The token to get the documents for. - * @param {String|Integer} docRef - * @return {Integer} - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.getTermFrequency = function (token, docRef) { - var node = this.getNode(token); - - if (node == null) { - return 0; - } - - if (!(docRef in node.docs)) { - return 0; - } - - return node.docs[docRef].tf; -}; - -/** - * Retrieve the document frequency of given token. - * If token not found, return 0. - * - * - * @param {String} token The token to get the documents for. - * @return {Object} - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.getDocFreq = function (token) { - var node = this.getNode(token); - - if (node == null) { - return 0; - } - - return node.df; -}; - -/** - * Remove the document identified by document's ref from the token in the inverted index. - * - * - * @param {String} token Remove the document from which token. - * @param {String} ref The ref of the document to remove from given token. - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.removeToken = function (token, ref) { - if (!token) return; - var node = this.getNode(token); - - if (node == null) return; - - if (ref in node.docs) { - delete node.docs[ref]; - node.df -= 1; - } -}; - -/** - * Find all the possible suffixes of given token using tokens currently in the inverted index. - * If token not found, return empty Array. - * - * @param {String} token The token to expand. - * @return {Array} - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.expandToken = function (token, memo, root) { - if (token == null || token == '') return []; - var memo = memo || []; - - if (root == void 0) { - root = this.getNode(token); - if (root == null) return memo; - } - - if (root.df > 0) memo.push(token); - - for (var key in root) { - if (key === 'docs') continue; - if (key === 'df') continue; - this.expandToken(token + key, memo, root[key]); - } - - return memo; -}; - -/** - * Returns a representation of the inverted index ready for serialisation. - * - * @return {Object} - * @memberOf InvertedIndex - */ -elasticlunr.InvertedIndex.prototype.toJSON = function () { - return { - root: this.root - }; -}; - -/*! - * elasticlunr.Configuration - * Copyright (C) 2017 Wei Song - */ - - /** - * elasticlunr.Configuration is used to analyze the user search configuration. - * - * By elasticlunr.Configuration user could set query-time boosting, boolean model in each field. - * - * Currently configuration supports: - * 1. query-time boosting, user could set how to boost each field. - * 2. boolean model chosing, user could choose which boolean model to use for each field. - * 3. token expandation, user could set token expand to True to improve Recall. Default is False. - * - * Query time boosting must be configured by field category, "boolean" model could be configured - * by both field category or globally as the following example. Field configuration for "boolean" - * will overwrite global configuration. - * Token expand could be configured both by field category or golbally. Local field configuration will - * overwrite global configuration. - * - * configuration example: - * { - * fields:{ - * title: {boost: 2}, - * body: {boost: 1} - * }, - * bool: "OR" - * } - * - * "bool" field configuation overwrite global configuation example: - * { - * fields:{ - * title: {boost: 2, bool: "AND"}, - * body: {boost: 1} - * }, - * bool: "OR" - * } - * - * "expand" example: - * { - * fields:{ - * title: {boost: 2, bool: "AND"}, - * body: {boost: 1} - * }, - * bool: "OR", - * expand: true - * } - * - * "expand" example for field category: - * { - * fields:{ - * title: {boost: 2, bool: "AND", expand: true}, - * body: {boost: 1} - * }, - * bool: "OR" - * } - * - * setting the boost to 0 ignores the field (this will only search the title): - * { - * fields:{ - * title: {boost: 1}, - * body: {boost: 0} - * } - * } - * - * then, user could search with configuration to do query-time boosting. - * idx.search('oracle database', {fields: {title: {boost: 2}, body: {boost: 1}}}); - * - * - * @constructor - * - * @param {String} config user configuration - * @param {Array} fields fields of index instance - * @module - */ -elasticlunr.Configuration = function (config, fields) { - var config = config || ''; - - if (fields == undefined || fields == null) { - throw new Error('fields should not be null'); - } - - this.config = {}; - - var userConfig; - try { - userConfig = JSON.parse(config); - this.buildUserConfig(userConfig, fields); - } catch (error) { - elasticlunr.utils.warn('user configuration parse failed, will use default configuration'); - this.buildDefaultConfig(fields); - } -}; - -/** - * Build default search configuration. - * - * @param {Array} fields fields of index instance - */ -elasticlunr.Configuration.prototype.buildDefaultConfig = function (fields) { - this.reset(); - fields.forEach(function (field) { - this.config[field] = { - boost: 1, - bool: "OR", - expand: false - }; - }, this); -}; - -/** - * Build user configuration. - * - * @param {JSON} config User JSON configuratoin - * @param {Array} fields fields of index instance - */ -elasticlunr.Configuration.prototype.buildUserConfig = function (config, fields) { - var global_bool = "OR"; - var global_expand = false; - - this.reset(); - if ('bool' in config) { - global_bool = config['bool'] || global_bool; - } - - if ('expand' in config) { - global_expand = config['expand'] || global_expand; - } - - if ('fields' in config) { - for (var field in config['fields']) { - if (fields.indexOf(field) > -1) { - var field_config = config['fields'][field]; - var field_expand = global_expand; - if (field_config.expand != undefined) { - field_expand = field_config.expand; - } - - this.config[field] = { - boost: (field_config.boost || field_config.boost === 0) ? field_config.boost : 1, - bool: field_config.bool || global_bool, - expand: field_expand - }; - } else { - elasticlunr.utils.warn('field name in user configuration not found in index instance fields'); - } - } - } else { - this.addAllFields2UserConfig(global_bool, global_expand, fields); - } -}; - -/** - * Add all fields to user search configuration. - * - * @param {String} bool Boolean model - * @param {String} expand Expand model - * @param {Array} fields fields of index instance - */ -elasticlunr.Configuration.prototype.addAllFields2UserConfig = function (bool, expand, fields) { - fields.forEach(function (field) { - this.config[field] = { - boost: 1, - bool: bool, - expand: expand - }; - }, this); -}; - -/** - * get current user configuration - */ -elasticlunr.Configuration.prototype.get = function () { - return this.config; -}; - -/** - * reset user search configuration. - */ -elasticlunr.Configuration.prototype.reset = function () { - this.config = {}; -}; -/** - * sorted_set.js is added only to make elasticlunr.js compatible with lunr-languages. - * if elasticlunr.js support different languages by default, this will make elasticlunr.js - * much bigger that not good for browser usage. - * - */ - - -/*! - * lunr.SortedSet - * Copyright (C) 2017 Oliver Nightingale - */ - -/** - * lunr.SortedSets are used to maintain an array of uniq values in a sorted - * order. - * - * @constructor - */ -lunr.SortedSet = function () { - this.length = 0 - this.elements = [] -} - -/** - * Loads a previously serialised sorted set. - * - * @param {Array} serialisedData The serialised set to load. - * @returns {lunr.SortedSet} - * @memberOf SortedSet - */ -lunr.SortedSet.load = function (serialisedData) { - var set = new this - - set.elements = serialisedData - set.length = serialisedData.length - - return set -} - -/** - * Inserts new items into the set in the correct position to maintain the - * order. - * - * @param {Object} The objects to add to this set. - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.add = function () { - var i, element - - for (i = 0; i < arguments.length; i++) { - element = arguments[i] - if (~this.indexOf(element)) continue - this.elements.splice(this.locationFor(element), 0, element) - } - - this.length = this.elements.length -} - -/** - * Converts this sorted set into an array. - * - * @returns {Array} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.toArray = function () { - return this.elements.slice() -} - -/** - * Creates a new array with the results of calling a provided function on every - * element in this sorted set. - * - * Delegates to Array.prototype.map and has the same signature. - * - * @param {Function} fn The function that is called on each element of the - * set. - * @param {Object} ctx An optional object that can be used as the context - * for the function fn. - * @returns {Array} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.map = function (fn, ctx) { - return this.elements.map(fn, ctx) -} - -/** - * Executes a provided function once per sorted set element. - * - * Delegates to Array.prototype.forEach and has the same signature. - * - * @param {Function} fn The function that is called on each element of the - * set. - * @param {Object} ctx An optional object that can be used as the context - * @memberOf SortedSet - * for the function fn. - */ -lunr.SortedSet.prototype.forEach = function (fn, ctx) { - return this.elements.forEach(fn, ctx) -} - -/** - * Returns the index at which a given element can be found in the - * sorted set, or -1 if it is not present. - * - * @param {Object} elem The object to locate in the sorted set. - * @returns {Number} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.indexOf = function (elem) { - var start = 0, - end = this.elements.length, - sectionLength = end - start, - pivot = start + Math.floor(sectionLength / 2), - pivotElem = this.elements[pivot] - - while (sectionLength > 1) { - if (pivotElem === elem) return pivot - - if (pivotElem < elem) start = pivot - if (pivotElem > elem) end = pivot - - sectionLength = end - start - pivot = start + Math.floor(sectionLength / 2) - pivotElem = this.elements[pivot] - } - - if (pivotElem === elem) return pivot - - return -1 -} - -/** - * Returns the position within the sorted set that an element should be - * inserted at to maintain the current order of the set. - * - * This function assumes that the element to search for does not already exist - * in the sorted set. - * - * @param {Object} elem The elem to find the position for in the set - * @returns {Number} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.locationFor = function (elem) { - var start = 0, - end = this.elements.length, - sectionLength = end - start, - pivot = start + Math.floor(sectionLength / 2), - pivotElem = this.elements[pivot] - - while (sectionLength > 1) { - if (pivotElem < elem) start = pivot - if (pivotElem > elem) end = pivot - - sectionLength = end - start - pivot = start + Math.floor(sectionLength / 2) - pivotElem = this.elements[pivot] - } - - if (pivotElem > elem) return pivot - if (pivotElem < elem) return pivot + 1 -} - -/** - * Creates a new lunr.SortedSet that contains the elements in the intersection - * of this set and the passed set. - * - * @param {lunr.SortedSet} otherSet The set to intersect with this set. - * @returns {lunr.SortedSet} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.intersect = function (otherSet) { - var intersectSet = new lunr.SortedSet, - i = 0, j = 0, - a_len = this.length, b_len = otherSet.length, - a = this.elements, b = otherSet.elements - - while (true) { - if (i > a_len - 1 || j > b_len - 1) break - - if (a[i] === b[j]) { - intersectSet.add(a[i]) - i++, j++ - continue - } - - if (a[i] < b[j]) { - i++ - continue - } - - if (a[i] > b[j]) { - j++ - continue - } - }; - - return intersectSet -} - -/** - * Makes a copy of this set - * - * @returns {lunr.SortedSet} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.clone = function () { - var clone = new lunr.SortedSet - - clone.elements = this.toArray() - clone.length = clone.elements.length - - return clone -} - -/** - * Creates a new lunr.SortedSet that contains the elements in the union - * of this set and the passed set. - * - * @param {lunr.SortedSet} otherSet The set to union with this set. - * @returns {lunr.SortedSet} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.union = function (otherSet) { - var longSet, shortSet, unionSet - - if (this.length >= otherSet.length) { - longSet = this, shortSet = otherSet - } else { - longSet = otherSet, shortSet = this - } - - unionSet = longSet.clone() - - for(var i = 0, shortSetElements = shortSet.toArray(); i < shortSetElements.length; i++){ - unionSet.add(shortSetElements[i]) - } - - return unionSet -} - -/** - * Returns a representation of the sorted set ready for serialisation. - * - * @returns {Array} - * @memberOf SortedSet - */ -lunr.SortedSet.prototype.toJSON = function () { - return this.toArray() -} - /** - * export the module via AMD, CommonJS or as a browser global - * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js - */ - ;(function (root, factory) { - if (typeof define === 'function' && define.amd) { - // AMD. Register as an anonymous module. - define(factory) - } else if (typeof exports === 'object') { - /** - * Node. Does not work with strict CommonJS, but - * only CommonJS-like enviroments that support module.exports, - * like Node. - */ - module.exports = factory() - } else { - // Browser globals (root is window) - root.elasticlunr = factory() - } - }(this, function () { - /** - * Just return a value to define the module export. - * This example returns an object, but the module - * can return a function as the exported value. - */ - return elasticlunr - })) -})(); diff --git a/nasg.py b/nasg.py index 6890383..bc1c2a5 100644 --- a/nasg.py +++ b/nasg.py @@ -14,7 +14,7 @@ import re import imghdr import logging import asyncio -import json +import sqlite3 from shutil import copy2 as cp from math import ceil from urllib.parse import urlparse @@ -28,7 +28,6 @@ import markdown from feedgen.feed import FeedGenerator from bleach import clean from emoji import UNICODE_EMOJI -from py_mini_racer import py_mini_racer import exiftool import settings @@ -445,15 +444,12 @@ class Singular(MarkdownDoc): @property def corpus(self): - return { - 'url': self.url, - 'title': self.title, - 'body': "\n".join([ + return "\n".join([ + self.title, self.name, self.summary, self.content, ]) - } async def render(self): if self.exists: @@ -1036,39 +1032,72 @@ class Category(dict): self.render_feed() self.ping_websub() - class Search(object): def __init__(self): - self.js = py_mini_racer.MiniRacer() - with open('elasticlunr.js') as f: - self.js.eval(f.read()) - - self.js.eval(""" - var index = elasticlunr(); - index.addField('title'); - index.addField('body'); - index.setRef('url'); - - """) - # index.saveDocument(false); - - @property - def fpath(self): - return os.path.join( + self.fpath = os.path.join( settings.paths.get('build'), - 'search.json' + 'search.sqlite' + ) + self.db = sqlite3.connect(self.fpath) + self.db.execute('PRAGMA auto_vacuum = INCREMENTAL;') + self.db.execute('PRAGMA journal_mode = MEMORY;') + self.db.execute('PRAGMA temp_store = MEMORY;') + self.db.execute('PRAGMA locking_mode = NORMAL;') + self.db.execute('PRAGMA synchronous = FULL;') + self.db.execute('PRAGMA encoding = "UTF-8";') + self.db.execute(''' + CREATE VIRTUAL TABLE IF NOT EXISTS data USING fts4( + url, + mtime, + name, + title, + category, + content, + notindexed=category, + notindexed=url, + notindexed=mtime, + tokenize=porter + )''' ) - def add(self, data): - self.js.eval(""" - index.addDoc(%s); - """ % ( - json.dumps(data) + def __exit__(self): + self.db.commit() + self.db.execute('PRAGMA auto_vacuum;') + self.db.close() + + def append(self, url, mtime, name, title, category, content): + # TODO: delete if mtime differs + mtime = int(mtime) + self.db.execute(''' + INSERT OR IGNORE INTO data + (url, mtime, name, title, category, content) + VALUES (?,?,?,?,?,?); + ''', ( + url, + mtime, + name, + title, + category, + content )) - def save(self): - with open(self.fpath, 'wt') as f: - f.write(json.dumps(self.js.eval("index.toJSON()"))) + async def render(self): + r = J2.get_template('Search.j2.php').render({ + 'post': {}, + 'site': settings.site, + 'author': settings.author, + 'meta': settings.meta, + 'licence': settings.licence, + 'tips': settings.tips, + 'labels': settings.labels + }) + target = os.path.join( + settings.paths.get('build'), + 'search.php' + ) + with open(target, 'wt') as f: + logging.info("rendering to %s", target) + f.write(r) def make(): @@ -1104,9 +1133,18 @@ def make(): for i in post.images.values(): worker.append(i.downsize()) worker.append(post.render()) - search.add(post.corpus) sitemap[post.url] = post.mtime + search.append( + url=post.url, + mtime=post.mtime, + name=post.name, + title=post.title, + category=post.category, + content=post.content + ) + search.__exit__() + worker.append(search.render()) for category in categories.values(): worker.append(category.render()) @@ -1128,8 +1166,7 @@ def make(): with open(t, 'wt') as f: f.write("\n".join(sorted(sitemap.keys()))) - # dump search index - search.save() + end = int(round(time.time() * 1000)) logging.info('process took %d ms' % (end - start)) diff --git a/templates/Search.j2.php b/templates/Search.j2.php new file mode 100644 index 0000000..7ddc108 --- /dev/null +++ b/templates/Search.j2.php @@ -0,0 +1,33 @@ +{% extends "base.j2.html" %} +{% block lang %}{% endblock %} +{% block title %}Search results for: {% endblock %} +{% block content %} +
+
+

Search results for:

+
+prepare(" + SELECT + url, category, title, snippet(data, '', '', '[...]', 5, 24) + FROM + data + WHERE + data MATCH :q + ORDER BY + category +"); +$sql->bindValue(':q', $q); +$results = $sql->execute(); + +printf("
"); +while ($row = $results->fetchArray(SQLITE3_ASSOC)) { + printf('
%s
%s
', $row['url'], $row['title'], $row["snippet(data, '', '', '[...]', 5, 24)"]); + +} +printf("
"); +?> +
+{% endblock %} diff --git a/templates/base.j2.html b/templates/base.j2.html index 9c0e659..85fab57 100644 --- a/templates/base.j2.html +++ b/templates/base.j2.html @@ -76,14 +76,12 @@ -