Optimize search

This commit is contained in:
Thibaut 2014-11-02 19:46:00 -05:00
parent 50c18b2f89
commit a3aa03bc3d
2 changed files with 115 additions and 97 deletions

View file

@ -1,8 +1,99 @@
#
# Match functions
#
SEPARATOR = '.'
query =
queryLength =
value =
valueLength =
matcher = # current match function
fuzzyRegexp = # query fuzzy regexp
index = # position of the query in the string being matched
lastIndex = # last position of the query in the string being matched
match = # regexp match data
score = # score for the current match
separators = # counter
i = null # cursor
`function exactMatch() {`
index = value.indexOf(query)
return unless index >= 0
lastIndex = value.lastIndexOf(query)
if index isnt lastIndex
return Math.max(scoreExactMatch(), (index = lastIndex) and scoreExactMatch())
else
return scoreExactMatch()
`}`
`function scoreExactMatch() {`
# Remove one point for each unmatched character.
score = 100 - (valueLength - queryLength)
if index > 0
# If the character preceding the query is a dot, assign the same score
# as if the query was found at the beginning of the string, minus one.
if value.charAt(index - 1) is SEPARATOR
score += index - 1
# Don't match a single-character query unless it's found at the beginning
# of the string or is preceded by a dot.
else if queryLength is 1
return
# (1) Remove one point for each unmatched character up to the nearest
# preceding dot or the beginning of the string.
# (2) Remove one point for each unmatched character following the query.
else
i = index - 2
i-- while i >= 0 and value.charAt(i) isnt SEPARATOR
score -= (index - i) + # (1)
(valueLength - queryLength - index) # (2)
# Remove one point for each dot preceding the query, except for the one
# immediately before the query.
separators = 0
i = index - 2
while i >= 0
separators++ if value.charAt(i) is SEPARATOR
i--
score -= separators
# Remove five points for each dot following the query.
separators = 0
i = valueLength - queryLength - index - 1
while i >= 0
separators++ if value.charAt(index + queryLength + i) is SEPARATOR
i--
score -= separators * 5
return Math.max 1, score
`}`
`function fuzzyMatch() {`
return if valueLength <= queryLength or value.indexOf(query) >= 0
return unless match = fuzzyRegexp.exec(value)
# When the match is at the beginning of the string or preceded by a dot.
if match.index is 0 or value.charAt(match.index - 1) is SEPARATOR
return Math.max 66, 100 - match[0].length
# When the match is at the end of the string.
else if match.index + match[0].length is valueLength
return Math.max 33, 67 - match[0].length
# When the match is in the middle of the string.
else
return Math.max 1, 34 - match[0].length
`}`
#
# Searchers
#
class app.Searcher
$.extend @prototype, Events
CHUNK_SIZE = 20000
SEPARATOR = '.'
DEFAULTS =
max_results: app.config.max_results
@ -11,34 +102,36 @@ class app.Searcher
constructor: (options = {}) ->
@options = $.extend {}, DEFAULTS, options
find: (data, attr, query) ->
find: (data, attr, q) ->
@kill()
@data = data
@attr = attr
@query = query
@query = q
@setup()
if @isValid() then @match() else @end()
return
setup: ->
@query = @normalizeQuery @query
@queryLength = @query.length
query = @query = @normalizeQuery(@query)
queryLength = query.length
@dataLength = @data.length
@matchers = ['exactMatch']
@matchers = [exactMatch]
@totalResults = 0
@setupFuzzy()
return
setupFuzzy: ->
if @queryLength >= @options.fuzzy_min_length
@fuzzyRegexp = @queryToFuzzyRegexp @query
@matchers.push 'fuzzyMatch'
if queryLength >= @options.fuzzy_min_length
fuzzyRegexp = @queryToFuzzyRegexp(query)
@matchers.push(fuzzyMatch)
else
fuzzyRegexp = null
return
isValid: ->
@queryLength > 0
queryLength > 0
end: ->
@triggerResults [] unless @totalResults
@ -53,9 +146,8 @@ class app.Searcher
return
free: ->
@data = @attr = @query = @queryLength = @dataLength =
@fuzzyRegexp = @matchers = @totalResults = @scoreMap =
@cursor = @matcher = @timeout = null
@data = @attr = @dataLength = @matchers = @matcher = @query =
@totalResults = @scoreMap = @cursor = @timeout = null
return
match: =>
@ -82,8 +174,11 @@ class app.Searcher
return
matchChunk: ->
matcher = @matcher
for [0...@chunkSize()]
if score = @[@matcher](@data[@cursor][@attr])
value = @data[@cursor][@attr]
valueLength = value.length
if score = matcher()
@addResult @data[@cursor], score
@cursor++
return
@ -131,83 +226,6 @@ class app.Searcher
chars[i] = $.escapeRegexp(char) for char, i in chars
new RegExp chars.join('.*?') # abc -> /a.*?b.*?c.*?/
#
# Match functions
#
index = # position of the query in the string being matched
lastIndex = # last position of the query in the string being matched
match = # regexp match data
score = # score for the current match
separators = # counter
i = null # cursor
exactMatch: (value) ->
index = value.indexOf @query
return unless index >= 0
lastIndex = value.lastIndexOf @query
if index isnt lastIndex
Math.max(@scoreExactMatch(value, index), @scoreExactMatch(value, lastIndex))
else
@scoreExactMatch(value, index)
scoreExactMatch: (value, index) ->
# Remove one point for each unmatched character.
score = 100 - (value.length - @queryLength)
if index > 0
# If the character preceding the query is a dot, assign the same score
# as if the query was found at the beginning of the string, minus one.
if value[index - 1] is SEPARATOR
score += index - 1
# Don't match a single-character query unless it's found at the beginning
# of the string or is preceded by a dot.
else if @queryLength is 1
return
# (1) Remove one point for each unmatched character up to the nearest
# preceding dot or the beginning of the string.
# (2) Remove one point for each unmatched character following the query.
else
i = index - 2
i-- while i >= 0 and value[i] isnt SEPARATOR
score -= (index - i) + # (1)
(value.length - @queryLength - index) # (2)
# Remove one point for each dot preceding the query, except for the one
# immediately before the query.
separators = 0
i = index - 2
while i >= 0
separators++ if value[i] is SEPARATOR
i--
score -= separators
# Remove five points for each dot following the query.
separators = 0
i = value.length - @queryLength - index - 1
while i >= 0
separators++ if value[index + @queryLength + i] is SEPARATOR
i--
score -= separators * 5
Math.max 1, score
fuzzyMatch: (value) ->
return if value.length <= @queryLength or value.indexOf(@query) >= 0
return unless match = @fuzzyRegexp.exec(value)
# When the match is at the beginning of the string or preceded by a dot.
if match.index is 0 or value[match.index - 1] is SEPARATOR
Math.max 66, 100 - match[0].length
# When the match is at the end of the string.
else if match.index + match[0].length is value.length
Math.max 33, 67 - match[0].length
# When the match is in the middle of the string.
else
Math.max 1, 34 - match[0].length
class app.SynchronousSearcher extends app.Searcher
match: =>
if @matcher

View file

@ -35,15 +35,15 @@ app.Searcher = ->
_match = @match.bind(@)
@match = =>
if @matcher
console.timeEnd @matcher
if @matcher is 'exactMatch'
for entries, score in @scoreMap by -1 when entries
console.log '' + score + ': ' + entries.map((entry) -> entry.text).join("\n ")
console.timeEnd @matcher.name
if @matcher.name is 'exactMatch'
for entries, score in @scoreMap by -1 when entries
console.log '' + score + ': ' + entries.map((entry) -> entry.text).join("\n ")
_match()
_setupMatcher = @setupMatcher.bind(@)
@setupMatcher = ->
console.time @matcher
console.time @matcher.name
_setupMatcher()
_end = @end.bind(@)
@ -56,7 +56,7 @@ app.Searcher = ->
_kill = @kill.bind(@)
@kill = ->
if @timeout
console.timeEnd @matcher if @matcher
console.timeEnd @matcher.name if @matcher
console.groupEnd()
console.timeEnd 'Total'
console.warn 'Killed'