1
0
mirror of https://github.com/go-gitea/gitea synced 2024-11-07 09:15:53 +01:00

Make repository indexer tokenize by camel case selectable

This commit is contained in:
Guillermo Prandi 2019-08-03 19:15:48 -03:00
parent 6c0c5c5310
commit a8d4e40af5
4 changed files with 28 additions and 12 deletions

@ -296,6 +296,9 @@ REPO_INDEXER_ENABLED = false
REPO_INDEXER_PATH = indexers/repos.bleve
UPDATE_BUFFER_LEN = 20
MAX_FILE_SIZE = 1048576
; Break camel case names into separate words for indexing.
; It's imperative to delete any previous indexes from REPO_INDEXER_PATH after changing this setting.
REPO_INDEXER_CAMEL_CASE = true
[admin]
; Disallow regular (non-admin) users from creating organizations.

@ -179,6 +179,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.
- `REPO_INDEXER_CAMEL_CASE`: **true**: When `REPO_INDEXER_CAMEL_CASE` is true, repository indexer will break camel case into words, so thisCameCaseName will be indexed as this, camel, case, name. It's imperative to delete any previous indexes from REPO_INDEXER_PATH after changing this setting.
## Security (`security`)

@ -107,11 +107,20 @@ func createRepoIndexer(path string, latestVersion int) error {
mapping := bleve.NewIndexMapping()
if err = addUnicodeNormalizeTokenFilter(mapping); err != nil {
return err
} else if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
}
var tokenFilters []string
if setting.Indexer.RepoUseCamelCaseTokenizer {
tokenFilters = []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name}
} else {
tokenFilters = []string{unicodeNormalizeName, lowercase.Name, unique.Name}
}
if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name},
"token_filters": tokenFilters,
}); err != nil {
return err
}

@ -19,16 +19,17 @@ const (
var (
// Indexer settings
Indexer = struct {
IssueType string
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
IssueQueueType string
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
IssueType string
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
IssueQueueType string
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
RepoUseCamelCaseTokenizer bool
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
@ -53,6 +54,7 @@ func newIndexerService() {
}
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.RepoUseCamelCaseTokenizer = sec.Key("REPO_INDEXER_CAMEL_CASE").MustBool(true)
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
Indexer.IssueQueueDir = sec.Key("ISSUE_INDEXER_QUEUE_DIR").MustString(path.Join(AppDataPath, "indexers/issues.queue"))
Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, ""))