feat: removed elements cache for page compute

This commit is contained in:
Michal Szczepanski 2023-10-15 10:41:26 +02:00
parent 3ca91da1de
commit b8616dc362
21 changed files with 135 additions and 186 deletions

38
package-lock.json generated

@ -29,7 +29,6 @@
"marked": "^4.2.5",
"nanoid": "^4.0.0",
"pako": "^2.1.0",
"parse5": "^7.1.2",
"pdfjs-dist": "^3.10.111",
"prosemirror-commands": "^1.3.1",
"prosemirror-dropcursor": "^1.6.1",
@ -21207,28 +21206,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5/node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@ -35973,21 +35950,6 @@
"lines-and-columns": "^1.1.6"
}
},
"parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"requires": {
"entities": "^4.4.0"
},
"dependencies": {
"entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="
}
}
},
"path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",

@ -78,7 +78,6 @@
"marked": "^4.2.5",
"nanoid": "^4.0.0",
"pako": "^2.1.0",
"parse5": "^7.1.2",
"pdfjs-dist": "^3.10.111",
"prosemirror-commands": "^1.3.1",
"prosemirror-dropcursor": "^1.6.1",

@ -16,7 +16,7 @@
*/
import { ObjVideoDataDto } from '../../../../model/obj/page-snapshot.dto';
import { PinEditModel } from '../../model/pin-edit.model';
import { XpathFactory } from '../../../../factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { applyStylesToElement } from '../../../../style.utils';
import { fnVideoSecondsTime } from '../../../../fn/fn-date-format';

@ -17,7 +17,7 @@
import { HtmlComponent } from '../../model/pin-view.model';
import { PinEditModel } from '../../model/pin-edit.model';
import { PinUpdateCommand } from '../../../../command/pin/pin-update.command';
import { XpathFactory } from '../../../../factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { applyStylesToElement } from '../../../../style.utils';
import { fnConsoleLog } from '../../../../fn/fn-console';
import { iconButtonStyles } from '../../styles/icon-button.styles';

@ -14,8 +14,16 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default';
import { parse } from 'parse5';
export const fnParse5 = (value: string): DefaultTreeAdapterMap => {
return parse(value) as unknown as DefaultTreeAdapterMap;
};
import { ObjRectangleDto } from '../model/obj/obj-utils.dto';
export class ElementSizeFactory {
static computeRect = (ref: HTMLElement): ObjRectangleDto => {
const rect = ref.getBoundingClientRect();
return {
x: Math.round(rect.x),
y: Math.round(rect.y),
width: Math.round(rect.width),
height: Math.round(rect.height)
};
};
}

@ -1,117 +0,0 @@
/*
* This file is part of the pinmenote-extension distribution (https://github.com/pinmenote/pinmenote-extension).
* Copyright (c) 2023 Michal Szczepanski.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { ObjRectangleDto } from '../model/obj/obj-utils.dto';
interface XPathTag {
index: number;
tagName: string;
}
interface XPathNode {
index: number;
node: Node;
}
const XPATH_INDEX_REGEX = new RegExp('(\\[)(.*?])', 'g');
export class XpathFactory {
static newXPathString(element: Node): string {
let child = element;
let parent = child.parentElement;
let path: XPathTag[] = [];
while (parent) {
const index = this.findNodeNameIndex(Array.from(parent.childNodes), child);
path.push({
index,
tagName: child.nodeName.toLowerCase()
});
child = parent;
parent = parent.parentElement;
}
path = path.reverse();
path.splice(0, 1);
return `/html/body/${path.map((p) => `${p.tagName}[${p.index}]`).join('/')}`;
}
static newXPathNode(element: Node) {
let child = element;
let parent = child.parentElement;
const path: XPathNode[] = [];
while (parent) {
const index = this.findNodeNameIndex(Array.from(parent.childNodes), child);
path.push({
index,
node: child
});
child = parent;
parent = parent.parentElement;
}
return path.reverse();
}
private static findNodeNameIndex(nodes: ChildNode[], child: Node): number {
let index = 1;
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i];
if (node.nodeName === child.nodeName && child !== node) {
index++;
} else if (child === node) {
break;
}
}
return index;
}
static evaluateTree(path: string, tree: any): object | undefined {
const a = path.split('/');
let subtree = tree;
a.shift();
for (const p of a) {
const m = p.match(XPATH_INDEX_REGEX);
let tag = p.toLowerCase();
let index = 0;
if (m) {
tag = p.replaceAll(XPATH_INDEX_REGEX, '').toLowerCase();
index = parseInt(m[0].substring(1, m[0].length - 1)) - 1;
}
let found = false;
for (const child of subtree.childNodes) {
if (child.tagName === tag) {
index === 0 ? (subtree = child) : index--;
found = true;
break;
}
}
if (!found) return undefined;
}
return subtree;
}
static newXPathResult(document: Document, path: string): XPathResult {
return document.evaluate(path, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE);
}
static computeRect = (ref: HTMLElement): ObjRectangleDto => {
const rect = ref.getBoundingClientRect();
return {
x: Math.round(rect.x),
y: Math.round(rect.y),
width: Math.round(rect.width),
height: Math.round(rect.height)
};
};
}

@ -23,3 +23,16 @@ export const fnSha256 = (value: string): string => {
export const fnSha256Object = (value: any): string => {
return sha256(JSON.stringify(value));
};
export const fnHash = (value: string): number => {
if (value.length === 0) return 0;
let hash = 0,
i,
chr;
for (i = 0; i < value.length; i++) {
chr = value.charCodeAt(i);
hash = (hash << 5) - hash + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
};

@ -15,7 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { ObjVideoDataDto } from '../model/obj/page-snapshot.dto';
import { XpathFactory } from '../factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
export const fnResolveVideoTime = (video?: ObjVideoDataDto): void => {
if (!video) return;

@ -18,7 +18,7 @@ import { ICommand } from '../../../common/model/shared/common.dto';
import { ObjDto } from '../../../common/model/obj/obj.dto';
import { ObjPinDto } from '../../../common/model/obj/obj-pin.dto';
import { PinComponentAddCommand } from './pin-component-add.command';
import { XpathFactory } from '../../../common/factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { fnConsoleLog } from '../../../common/fn/fn-console';
import { fnIsElementHidden } from '../../../common/fn/fn-is-element-hidden';

@ -19,7 +19,7 @@ import { ObjDto } from '../../../common/model/obj/obj.dto';
import { ObjPinDto } from '../../../common/model/obj/obj-pin.dto';
import { PinComponentAddCommand } from './pin-component-add.command';
import { PinPendingStore } from '../../store/pin-pending.store';
import { XpathFactory } from '../../../common/factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { fnIsElementHidden } from '../../../common/fn/fn-is-element-hidden';
export class PinAddXpathCommand implements ICommand<boolean> {

@ -23,6 +23,7 @@ import { PageSegmentAddCommand } from '../../../common/command/snapshot/segment/
import { TinyDispatcher } from '@pinmenote/tiny-dispatcher';
import { fnConsoleLog } from '../../../common/fn/fn-console';
import { fnIframeIndex } from '../../../common/fn/fn-iframe-index';
import { DocumentStore } from '../../store/document.store';
export class ContentFetchIframeCommand implements ICommand<Promise<void>> {
private savedHash = new Set<string>();
@ -33,6 +34,7 @@ export class ContentFetchIframeCommand implements ICommand<Promise<void>> {
fnConsoleLog('ContentFetchIframeCommand->execute', this.href, this.uid, 'children', document.body.children.length);
const snapshot = await PageCompute.compute(
document.body,
DocumentStore.getInstance().cache,
this.contentCallback,
IFrameStore.getInstance(),
[],

@ -21,6 +21,7 @@ import { IFrameStore } from '../../store/iframe.store';
import { PageSegmentAddCommand } from '../../../common/command/snapshot/segment/page-segment-add.command';
import { TinyDispatcher } from '@pinmenote/tiny-dispatcher';
import { fnConsoleLog } from '../../../common/fn/fn-console';
import { DocumentStore } from '../../store/document.store';
export class ContentPageSegmentSaveCommand implements ICommand<Promise<string>> {
private savedHash = new Set<string>();
@ -33,6 +34,7 @@ export class ContentPageSegmentSaveCommand implements ICommand<Promise<string>>
const snapshot = await PageCompute.compute(
this.element,
DocumentStore.getInstance().cache,
this.contentCallback,
IFrameStore.getInstance(),
skipAttributes,

@ -23,17 +23,18 @@ import {
import { AutoTagMediator } from '../../mediator/auto-tag.mediator';
import { ContentPageSegmentSaveCommand } from './content-page-segment-save.command';
import { ContentPageSegmentSaveImageCommand } from './content-page-segment-save-image.command';
import { ElementSizeFactory } from '../../../common/factory/element-size.factory';
import { ICommand } from '../../../common/model/shared/common.dto';
import { ImageResizeFactory } from '../../../common/factory/image-resize.factory';
import { ObjRectangleDto } from '../../../common/model/obj/obj-utils.dto';
import { ObjUrlDto } from '../../../common/model/obj/obj.dto';
import { PageSkipAttribute } from '@pinmenote/page-compute';
import { PinStore } from '../../store/pin.store';
import { ScreenshotFactory } from '../../../common/factory/screenshot.factory';
import { SettingsConfig } from '../../../common/environment';
import { XpathFactory } from '../../../common/factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { fnConsoleLog } from '../../../common/fn/fn-console';
import { fnSha256Object } from '../../../common/fn/fn-hash';
import { ImageResizeFactory } from '../../../common/factory/image-resize.factory';
export class ContentPageSnapshotCreateCommand implements ICommand<Promise<PageSnapshotDto>> {
constructor(
@ -56,7 +57,7 @@ export class ContentPageSnapshotCreateCommand implements ICommand<Promise<PageSn
rect = { x: 0, y: 0, width: window.innerWidth, height: window.innerHeight };
} else {
title = this.element.innerText.substring(0, 100) || document.title || this.url.origin;
rect = this.canvas ? this.canvas.rect : XpathFactory.computeRect(this.element);
rect = this.canvas ? this.canvas.rect : ElementSizeFactory.computeRect(this.element);
xpath = XpathFactory.newXPathString(this.element);
isPartial = true;
}

@ -36,10 +36,13 @@ import { UrlFactory } from '../common/factory/url.factory';
import { fnUid } from '../common/fn/fn-uid';
import { environmentConfig } from '../common/environment';
import { LoginExtensionCommand } from './command/login/login-extension.command';
import { DocumentStore } from './store/document.store';
class PinMeScript {
private href: string;
private timeoutId = 0;
private mutations: MutationObserver;
private doc: DocumentStore = DocumentStore.getInstance();
constructor(private readonly uid: string, private ms: number) {
this.href = UrlFactory.normalizeHref(window.location.href);
@ -47,6 +50,9 @@ class PinMeScript {
// @see iframe/iframe-script.ts for iframe
ContentMessageHandler.start(this.href);
this.mutations = new MutationObserver(this.handleMutations);
this.mutations.observe(document.documentElement || document.body, { childList: true, subtree: true });
fnConsoleLog('PinMeScript->constructor', this.href, 'referrer', document.referrer);
document.addEventListener('visibilitychange', this.handleVisibilityChange);
@ -76,6 +82,16 @@ class PinMeScript {
});
};
private handleMutations = (mutationList: MutationRecord[]) => {
for (const mutation of mutationList) {
if (mutation.type === 'childList') {
for (const removed of mutation.removedNodes) {
this.doc.remove(removed, mutation.target);
}
}
}
};
private handleVisibilityChange = async (): Promise<void> => {
fnConsoleLog('PinMeScript->handleVisibilityChange', this.uid);
if (await this.invalidateContentScript()) {

@ -16,11 +16,12 @@
*/
import { ObjPinDto, PinBorderDataDto, PinIframeDto } from '../../common/model/obj/obj-pin.dto';
import { ContentSettingsStore } from '../store/content-settings.store';
import { ElementSizeFactory } from '../../common/factory/element-size.factory';
import { ObjRectangleDto } from '../../common/model/obj/obj-utils.dto';
import { ObjUrlDto } from '../../common/model/obj/obj.dto';
import { PageCanvasDto } from '../../common/model/obj/page-snapshot.dto';
import { ScreenshotFactory } from '../../common/factory/screenshot.factory';
import { XpathFactory } from '../../common/factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { fnIframeIndex } from '../../common/fn/fn-iframe-index';
import { fnSha256 } from '../../common/fn/fn-hash';
@ -33,7 +34,7 @@ export class PinFactory {
baseUrl?: ObjUrlDto,
canvas?: PageCanvasDto
): Promise<ObjPinDto> => {
const rect = canvas ? canvas.rect : XpathFactory.computeRect(ref);
const rect = canvas ? canvas.rect : ElementSizeFactory.computeRect(ref);
const screenshot = await ScreenshotFactory.takeScreenshot(
document,
window,

@ -0,0 +1,72 @@
/*
* This file is part of the pinmenote-extension distribution (https://github.com/pinmenote/pinmenote-extension).
* Copyright (c) 2023 Michal Szczepanski.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import { PageTreeCache } from '@pinmenote/page-compute';
import { XpathFactory } from '@pinmenote/page-compute';
import { fnConsoleLog } from '../../common/fn/fn-console';
const IGNORED = ['head', 'html', 'title', 'script', 'style', 'header', 'svg'];
export class DocumentStore {
private static instance: DocumentStore;
private elements: PageTreeCache = {};
static getInstance(): DocumentStore {
if (!this.instance) this.instance = new DocumentStore();
return this.instance;
}
get cache(): PageTreeCache {
return this.elements;
}
remove(node: Node, target: Node) {
if (node.nodeType === 8) return;
if (IGNORED.includes(target.nodeName.toLowerCase()) || IGNORED.includes(node.nodeName.toLowerCase())) return;
if (target === document.body) return;
const el = node as HTMLElement;
if (!el.innerHTML) return;
const xpath = XpathFactory.newXPathString(target);
fnConsoleLog('DocumentStore->remove', xpath, el);
/* Experimental - TODO fix
twitter - <div data-testid="cellInnerDiv" style="transform: translateY(5660px); position: absolute; width: 100%;">
- fix sort by translateY position
*/
if (!this.elements[xpath]) {
const attrs = this.computeAttrs(el.tagName.toLowerCase(), Array.from(el.attributes));
this.elements[xpath] = { xpath, html: el.innerHTML, attrs };
} else if (this.elements[xpath].html.length < el.innerHTML.length) {
const attrs = this.computeAttrs(el.tagName.toLowerCase(), Array.from(el.attributes));
this.elements[xpath] = { xpath, html: el.innerHTML, attrs };
}
}
private computeAttrs = (tagName: string, attributes: Attr[]): string => {
let html = '';
for (const attr of attributes) {
let attrValue = attr.value;
attrValue = attrValue.replaceAll('"', '&quot;');
if ((tagName === 'input' || tagName === 'textarea') && attr.name === 'value') continue;
if (attrValue) {
html += `${attr.name}="${attrValue}" `;
} else {
html += `${attr.name} `;
}
}
return html.trimEnd();
};
}

@ -20,7 +20,6 @@ import Button from '@mui/material/Button';
import { MainFooterButton } from './main-footer.button';
import { PopupActiveTabStore } from '../../store/popup-active-tab.store';
import Typography from '@mui/material/Typography';
import Link from '@mui/material/Link';
export const ConnectionErrorComponent: FunctionComponent = () => {
const isExtension = PopupActiveTabStore.isExtension;

@ -23,7 +23,7 @@ import { ObjTitleFactory } from '../../../common/factory/obj-title.factory';
interface Props {
obj?: ObjDto;
removeCallback: (obj?: ObjDto) => void;
removeCallback: (obj?: ObjDto) => Promise<void>;
}
export const ObjRemoveComponent: FunctionComponent<Props> = (props) => {
@ -32,9 +32,10 @@ export const ObjRemoveComponent: FunctionComponent<Props> = (props) => {
let title = '';
if (props.obj) title = ObjTitleFactory.computeTitle(props.obj);
const handleRemove = () => {
const handleRemove = async () => {
setIsRemoving(true);
props.removeCallback(props.obj);
await props.removeCallback(props.obj);
setIsRemoving(false);
};
return (
<div style={{ display: 'flex' }}>

@ -19,7 +19,7 @@ import { ObjPinDto } from '../../../common/model/obj/obj-pin.dto';
import { ObjDto } from '../../../common/model/obj/obj.dto';
import { PinComponent } from '../../../common/components/pin/pin.component';
import { SettingsStore } from '../../store/settings.store';
import { XpathFactory } from '../../../common/factory/xpath.factory';
import { XpathFactory } from '@pinmenote/page-compute';
import { fnConsoleLog } from '../../../common/fn/fn-console';
export class PreviewPinRenderer {

@ -35,7 +35,6 @@ import {
LICENSE_NOBLE_HASHES_CURVES,
LICENSE_NODE_LANGUAGE_DETECT,
LICENSE_PAKO,
LICENSE_PARSE5,
LICENSE_PINMENOTE_MIT,
LICENSE_PROSEMIRROR,
LICENSE_REACT,
@ -172,12 +171,6 @@ const ELEMENTS: OpensourceElement[] = [
license: LICENSE_MIT,
url: 'https://github.com/nodeca/pako'
},
{
title: 'parse5',
licenseHeader: LICENSE_PARSE5,
license: LICENSE_MIT,
url: 'https://github.com/inikulin/parse5'
},
{
title: 'pdfjs-dist',
license: LICENSE_APACHE_20,

@ -100,9 +100,6 @@ export const LICENSE_PAKO = `(The MIT License)
Copyright (C) 2014-2017 by Vitaly Puzrin and Andrei Tuputcyn
`;
export const LICENSE_PARSE5 = `Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
`;
export const LICENSE_REACT = `MIT License
Copyright (c) Meta Platforms, Inc. and affiliates.