fix: cleanup word index
This commit is contained in:
parent
1fa7a78169
commit
5a6c083dd2
@ -19,14 +19,14 @@ import { ICommand } from '../../../model/shared/common.dto';
|
||||
import { ObjRemoveHashtagsCommand } from '../hashtag/obj-remove-hashtags.command';
|
||||
import { ObjSnapshotDto } from '../../../model/obj/obj-snapshot.dto';
|
||||
import { ObjectStoreKeys } from '../../../keys/object.store.keys';
|
||||
import { WordNlp } from '../../../text/nlp/word.nlp';
|
||||
import { WordIndex } from '../../../text/index/word.index';
|
||||
|
||||
export class ObjRemoveSnapshotContentCommand implements ICommand<Promise<void>> {
|
||||
constructor(private snapshot: ObjSnapshotDto, private id: number) {}
|
||||
async execute(): Promise<void> {
|
||||
const key = `${ObjectStoreKeys.CONTENT_ID}:${this.snapshot.contentId}`;
|
||||
|
||||
await WordNlp.removeFlat(this.snapshot.words, this.id);
|
||||
await WordIndex.removeFlat(this.snapshot.words, this.id);
|
||||
|
||||
await new ObjRemoveHashtagsCommand(this.id, this.snapshot.hashtags).execute();
|
||||
|
||||
|
@ -24,7 +24,7 @@ import { ObjAddIdCommand } from '../obj/id/obj-add-id.command';
|
||||
import { ObjNextIdCommand } from '../obj/id/obj-next-id.command';
|
||||
import { ObjPagePinDto } from '../../model/obj/obj-pin.dto';
|
||||
import { ObjectStoreKeys } from '../../keys/object.store.keys';
|
||||
import { WordNlp } from '../../text/nlp/word.nlp';
|
||||
import { WordIndex } from '../../text/index/word.index';
|
||||
import { fnConsoleLog } from '../../fn/console.fn';
|
||||
|
||||
export class PinAddCommand implements ICommand<Promise<ObjDto<ObjPagePinDto>>> {
|
||||
@ -51,7 +51,7 @@ export class PinAddCommand implements ICommand<Promise<ObjDto<ObjPagePinDto>>> {
|
||||
}
|
||||
};
|
||||
|
||||
await WordNlp.indexFlat(this.pin.snapshot.words, id);
|
||||
await WordIndex.indexFlat(this.pin.snapshot.words, id);
|
||||
|
||||
const key = `${ObjectStoreKeys.OBJECT_ID}:${id}`;
|
||||
|
||||
|
@ -21,7 +21,7 @@ import { ObjAddIdCommand } from '../obj/id/obj-add-id.command';
|
||||
import { ObjNextIdCommand } from '../obj/id/obj-next-id.command';
|
||||
import { ObjSnapshotDto } from '../../model/obj/obj-snapshot.dto';
|
||||
import { ObjectStoreKeys } from '../../keys/object.store.keys';
|
||||
import { WordNlp } from '../../text/nlp/word.nlp';
|
||||
import { WordIndex } from '../../text/index/word.index';
|
||||
|
||||
export class PageElementSnapshotAddCommand implements ICommand<Promise<void>> {
|
||||
constructor(private dto: ObjSnapshotDto) {}
|
||||
@ -45,7 +45,7 @@ export class PageElementSnapshotAddCommand implements ICommand<Promise<void>> {
|
||||
}
|
||||
};
|
||||
|
||||
await WordNlp.indexFlat(this.dto.words, id);
|
||||
await WordIndex.indexFlat(this.dto.words, id);
|
||||
|
||||
const key = `${ObjectStoreKeys.OBJECT_ID}:${id}`;
|
||||
await BrowserStorageWrapper.set(key, dto);
|
||||
|
@ -21,7 +21,7 @@ import { ObjAddIdCommand } from '../obj/id/obj-add-id.command';
|
||||
import { ObjNextIdCommand } from '../obj/id/obj-next-id.command';
|
||||
import { ObjSnapshotDto } from '../../model/obj/obj-snapshot.dto';
|
||||
import { ObjectStoreKeys } from '../../keys/object.store.keys';
|
||||
import { WordNlp } from '../../text/nlp/word.nlp';
|
||||
import { WordIndex } from '../../text/index/word.index';
|
||||
|
||||
export class PageSnapshotAddCommand implements ICommand<Promise<void>> {
|
||||
constructor(private dto: ObjSnapshotDto) {}
|
||||
@ -44,7 +44,7 @@ export class PageSnapshotAddCommand implements ICommand<Promise<void>> {
|
||||
encrypted: false
|
||||
}
|
||||
};
|
||||
await WordNlp.indexFlat(this.dto.words, id);
|
||||
await WordIndex.indexFlat(this.dto.words, id);
|
||||
|
||||
const key = `${ObjectStoreKeys.OBJECT_ID}:${id}`;
|
||||
await BrowserStorageWrapper.set(key, dto);
|
||||
|
@ -14,7 +14,7 @@
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
export class ConstraintsNlp {
|
||||
export class ConstraintsWord {
|
||||
static readonly PUNCT_CHARS = new Set([
|
||||
' ',
|
||||
'\n',
|
@ -15,21 +15,18 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
import { BrowserStorageWrapper } from '../../service/browser.storage.wrapper';
|
||||
import { ConstraintsNlp } from './constraints.nlp';
|
||||
import { ConstraintsWord } from './constraints.word';
|
||||
import { ObjectStoreKeys } from '../../keys/object.store.keys';
|
||||
import { fnConsoleLog } from '../../fn/console.fn';
|
||||
|
||||
export class WordNlp {
|
||||
private static flatSet = new Set<string>();
|
||||
|
||||
export class WordIndex {
|
||||
static toWordList(sentence: string): string[] {
|
||||
const words: string[] = [];
|
||||
let word = '';
|
||||
let key = '';
|
||||
for (let i = 0; i < sentence.length; i++) {
|
||||
key = sentence.charAt(i).toLowerCase();
|
||||
if (ConstraintsNlp.KEY_MAP[key]) key = ConstraintsNlp.KEY_MAP[key];
|
||||
if (ConstraintsNlp.PUNCT_CHARS.has(key)) {
|
||||
if (ConstraintsWord.KEY_MAP[key]) key = ConstraintsWord.KEY_MAP[key];
|
||||
if (ConstraintsWord.PUNCT_CHARS.has(key)) {
|
||||
if (word.trim().length > 0) words.push(word.trim());
|
||||
word = '';
|
||||
continue;
|
||||
@ -40,27 +37,19 @@ export class WordNlp {
|
||||
}
|
||||
|
||||
static indexFlat = async (words: string[], id: number): Promise<void> => {
|
||||
const a = Date.now();
|
||||
for (const word of words) {
|
||||
await this.saveStorage(word, id);
|
||||
await this.saveWord(word);
|
||||
}
|
||||
fnConsoleLog('indexed', Array.from(this.flatSet), 'count', this.flatSet.size, 'in', Date.now() - a);
|
||||
this.flatSet.clear();
|
||||
};
|
||||
|
||||
static removeFlat = async (words: string[], id: number): Promise<void> => {
|
||||
const a = Date.now();
|
||||
for (const word of words) {
|
||||
await this.removeStorage(word, id);
|
||||
}
|
||||
fnConsoleLog('removed', Array.from(this.flatSet), 'count', this.flatSet.size, 'in', Date.now() - a);
|
||||
};
|
||||
|
||||
private static removeStorage = async (value: string, id: number) => {
|
||||
if (this.flatSet.has(value)) return;
|
||||
this.flatSet.add(value);
|
||||
|
||||
const key = `${ObjectStoreKeys.SEARCH_INDEX}:${value}`;
|
||||
const arr = await BrowserStorageWrapper.get<number[]>(key);
|
||||
if (!arr) return;
|
||||
@ -78,10 +67,6 @@ export class WordNlp {
|
||||
};
|
||||
|
||||
private static saveStorage = async (value: string, id: number) => {
|
||||
// skip existing
|
||||
if (this.flatSet.has(value)) return;
|
||||
this.flatSet.add(value);
|
||||
|
||||
const key = `${ObjectStoreKeys.SEARCH_INDEX}:${value}`;
|
||||
let arr = await BrowserStorageWrapper.get<number[]>(key);
|
||||
if (arr) {
|
@ -16,7 +16,7 @@
|
||||
*/
|
||||
import { DetectLanguage } from '../../common/text/detect-language';
|
||||
import { StopWordRemove } from '../../common/text/stop-word/stop-word-remove';
|
||||
import { WordNlp } from '../../common/text/nlp/word.nlp';
|
||||
import { WordIndex } from '../../common/text/index/word.index';
|
||||
import { fnConsoleLog } from '../../common/fn/console.fn';
|
||||
|
||||
export class AutoTagMediator {
|
||||
@ -46,7 +46,7 @@ export class AutoTagMediator {
|
||||
fnConsoleLog('LANGUAGE', language);
|
||||
let tagList = [];
|
||||
for (const keyword of keywords) {
|
||||
const words = WordNlp.toWordList(keyword);
|
||||
const words = WordIndex.toWordList(keyword);
|
||||
for (const word of words) {
|
||||
if (word.length <= 1) continue;
|
||||
tagList.push(word);
|
||||
|
Loading…
Reference in New Issue
Block a user