From 07c396fd667a4a1d0b6d2159d6f25c5e10091e6d Mon Sep 17 00:00:00 2001 From: josc146 Date: Sat, 25 Mar 2023 08:59:46 +0800 Subject: [PATCH] feat: getCoreContentText for any websites --- src/utils/get-core-content-text.mjs | 45 +++++++++++++++++++++++++++++ src/utils/index.mjs | 1 + 2 files changed, 46 insertions(+) create mode 100644 src/utils/get-core-content-text.mjs diff --git a/src/utils/get-core-content-text.mjs b/src/utils/get-core-content-text.mjs new file mode 100644 index 0000000..9f1af09 --- /dev/null +++ b/src/utils/get-core-content-text.mjs @@ -0,0 +1,45 @@ +function getArea(e) { + const rect = e.getBoundingClientRect() + return rect.width * rect.height +} + +function findLargestElement(e) { + let maxArea = 0 + let largestElement = null + const limitedArea = 0.8 * getArea(e) + + function traverseDOM(node) { + if (node.nodeType === Node.ELEMENT_NODE) { + const area = getArea(node) + + if (area > maxArea && area < limitedArea) { + maxArea = area + largestElement = node + } + + Array.from(node.children).forEach(traverseDOM) + } + } + + traverseDOM(e) + return largestElement +} + +export function getCoreContentText() { + const largestElement = findLargestElement(document.body) + const secondLargestElement = findLargestElement(largestElement) + console.log(largestElement) + console.log(secondLargestElement) + + if (!largestElement) return + + let ret + if (secondLargestElement && getArea(secondLargestElement) > 0.5 * getArea(largestElement)) { + ret = secondLargestElement.textContent + console.log('use second') + } else { + ret = largestElement.textContent + console.log('use first') + } + return ret.trim().replaceAll(' ', '').replaceAll('\n\n', '').replaceAll(',,', '') +} diff --git a/src/utils/index.mjs b/src/utils/index.mjs index bdcfdd8..b645e06 100644 --- a/src/utils/index.mjs +++ b/src/utils/index.mjs @@ -4,6 +4,7 @@ export * from './ends-with-question-mark' export * from './fetch-sse' export * from './get-client-position' export * from './get-conversation-pairs' +export * from './get-core-content-text' export * from './get-possible-element-by-query-selector' export * from './init-session' export * from './is-firefox.mjs'