fix: cleanup word index

This commit is contained in:
Michal Szczepanski 2023-04-24 16:44:55 +02:00
parent 1fa7a78169
commit 5a6c083dd2
7 changed files with 15 additions and 30 deletions

@ -19,14 +19,14 @@ import { ICommand } from '../../../model/shared/common.dto';
import { ObjRemoveHashtagsCommand } from '../hashtag/obj-remove-hashtags.command';
import { ObjSnapshotDto } from '../../../model/obj/obj-snapshot.dto';
import { ObjectStoreKeys } from '../../../keys/object.store.keys';
import { WordNlp } from '../../../text/nlp/word.nlp';
import { WordIndex } from '../../../text/index/word.index';
export class ObjRemoveSnapshotContentCommand implements ICommand<Promise<void>> {
constructor(private snapshot: ObjSnapshotDto, private id: number) {}
async execute(): Promise<void> {
const key = `${ObjectStoreKeys.CONTENT_ID}:${this.snapshot.contentId}`;
await WordNlp.removeFlat(this.snapshot.words, this.id);
await WordIndex.removeFlat(this.snapshot.words, this.id);
await new ObjRemoveHashtagsCommand(this.id, this.snapshot.hashtags).execute();

@ -24,7 +24,7 @@ import { ObjAddIdCommand } from '../obj/id/obj-add-id.command';
import { ObjNextIdCommand } from '../obj/id/obj-next-id.command';
import { ObjPagePinDto } from '../../model/obj/obj-pin.dto';
import { ObjectStoreKeys } from '../../keys/object.store.keys';
import { WordNlp } from '../../text/nlp/word.nlp';
import { WordIndex } from '../../text/index/word.index';
import { fnConsoleLog } from '../../fn/console.fn';
export class PinAddCommand implements ICommand<Promise<ObjDto<ObjPagePinDto>>> {
@ -51,7 +51,7 @@ export class PinAddCommand implements ICommand<Promise<ObjDto<ObjPagePinDto>>> {
}
};
await WordNlp.indexFlat(this.pin.snapshot.words, id);
await WordIndex.indexFlat(this.pin.snapshot.words, id);
const key = `${ObjectStoreKeys.OBJECT_ID}:${id}`;

@ -21,7 +21,7 @@ import { ObjAddIdCommand } from '../obj/id/obj-add-id.command';
import { ObjNextIdCommand } from '../obj/id/obj-next-id.command';
import { ObjSnapshotDto } from '../../model/obj/obj-snapshot.dto';
import { ObjectStoreKeys } from '../../keys/object.store.keys';
import { WordNlp } from '../../text/nlp/word.nlp';
import { WordIndex } from '../../text/index/word.index';
export class PageElementSnapshotAddCommand implements ICommand<Promise<void>> {
constructor(private dto: ObjSnapshotDto) {}
@ -45,7 +45,7 @@ export class PageElementSnapshotAddCommand implements ICommand<Promise<void>> {
}
};
await WordNlp.indexFlat(this.dto.words, id);
await WordIndex.indexFlat(this.dto.words, id);
const key = `${ObjectStoreKeys.OBJECT_ID}:${id}`;
await BrowserStorageWrapper.set(key, dto);

@ -21,7 +21,7 @@ import { ObjAddIdCommand } from '../obj/id/obj-add-id.command';
import { ObjNextIdCommand } from '../obj/id/obj-next-id.command';
import { ObjSnapshotDto } from '../../model/obj/obj-snapshot.dto';
import { ObjectStoreKeys } from '../../keys/object.store.keys';
import { WordNlp } from '../../text/nlp/word.nlp';
import { WordIndex } from '../../text/index/word.index';
export class PageSnapshotAddCommand implements ICommand<Promise<void>> {
constructor(private dto: ObjSnapshotDto) {}
@ -44,7 +44,7 @@ export class PageSnapshotAddCommand implements ICommand<Promise<void>> {
encrypted: false
}
};
await WordNlp.indexFlat(this.dto.words, id);
await WordIndex.indexFlat(this.dto.words, id);
const key = `${ObjectStoreKeys.OBJECT_ID}:${id}`;
await BrowserStorageWrapper.set(key, dto);

@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
export class ConstraintsNlp {
export class ConstraintsWord {
static readonly PUNCT_CHARS = new Set([
' ',
'\n',

@ -15,21 +15,18 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { BrowserStorageWrapper } from '../../service/browser.storage.wrapper';
import { ConstraintsNlp } from './constraints.nlp';
import { ConstraintsWord } from './constraints.word';
import { ObjectStoreKeys } from '../../keys/object.store.keys';
import { fnConsoleLog } from '../../fn/console.fn';
export class WordNlp {
private static flatSet = new Set<string>();
export class WordIndex {
static toWordList(sentence: string): string[] {
const words: string[] = [];
let word = '';
let key = '';
for (let i = 0; i < sentence.length; i++) {
key = sentence.charAt(i).toLowerCase();
if (ConstraintsNlp.KEY_MAP[key]) key = ConstraintsNlp.KEY_MAP[key];
if (ConstraintsNlp.PUNCT_CHARS.has(key)) {
if (ConstraintsWord.KEY_MAP[key]) key = ConstraintsWord.KEY_MAP[key];
if (ConstraintsWord.PUNCT_CHARS.has(key)) {
if (word.trim().length > 0) words.push(word.trim());
word = '';
continue;
@ -40,27 +37,19 @@ export class WordNlp {
}
static indexFlat = async (words: string[], id: number): Promise<void> => {
const a = Date.now();
for (const word of words) {
await this.saveStorage(word, id);
await this.saveWord(word);
}
fnConsoleLog('indexed', Array.from(this.flatSet), 'count', this.flatSet.size, 'in', Date.now() - a);
this.flatSet.clear();
};
static removeFlat = async (words: string[], id: number): Promise<void> => {
const a = Date.now();
for (const word of words) {
await this.removeStorage(word, id);
}
fnConsoleLog('removed', Array.from(this.flatSet), 'count', this.flatSet.size, 'in', Date.now() - a);
};
private static removeStorage = async (value: string, id: number) => {
if (this.flatSet.has(value)) return;
this.flatSet.add(value);
const key = `${ObjectStoreKeys.SEARCH_INDEX}:${value}`;
const arr = await BrowserStorageWrapper.get<number[]>(key);
if (!arr) return;
@ -78,10 +67,6 @@ export class WordNlp {
};
private static saveStorage = async (value: string, id: number) => {
// skip existing
if (this.flatSet.has(value)) return;
this.flatSet.add(value);
const key = `${ObjectStoreKeys.SEARCH_INDEX}:${value}`;
let arr = await BrowserStorageWrapper.get<number[]>(key);
if (arr) {

@ -16,7 +16,7 @@
*/
import { DetectLanguage } from '../../common/text/detect-language';
import { StopWordRemove } from '../../common/text/stop-word/stop-word-remove';
import { WordNlp } from '../../common/text/nlp/word.nlp';
import { WordIndex } from '../../common/text/index/word.index';
import { fnConsoleLog } from '../../common/fn/console.fn';
export class AutoTagMediator {
@ -46,7 +46,7 @@ export class AutoTagMediator {
fnConsoleLog('LANGUAGE', language);
let tagList = [];
for (const keyword of keywords) {
const words = WordNlp.toWordList(keyword);
const words = WordIndex.toWordList(keyword);
for (const word of words) {
if (word.length <= 1) continue;
tagList.push(word);