feat: getCoreContentText for any websites

This commit is contained in:
josc146
2023-03-25 08:59:46 +08:00
parent 14d380f947
commit 07c396fd66
2 changed files with 46 additions and 0 deletions
+45
View File
@@ -0,0 +1,45 @@
function getArea(e) {
const rect = e.getBoundingClientRect()
return rect.width * rect.height
}
function findLargestElement(e) {
let maxArea = 0
let largestElement = null
const limitedArea = 0.8 * getArea(e)
function traverseDOM(node) {
if (node.nodeType === Node.ELEMENT_NODE) {
const area = getArea(node)
if (area > maxArea && area < limitedArea) {
maxArea = area
largestElement = node
}
Array.from(node.children).forEach(traverseDOM)
}
}
traverseDOM(e)
return largestElement
}
export function getCoreContentText() {
const largestElement = findLargestElement(document.body)
const secondLargestElement = findLargestElement(largestElement)
console.log(largestElement)
console.log(secondLargestElement)
if (!largestElement) return
let ret
if (secondLargestElement && getArea(secondLargestElement) > 0.5 * getArea(largestElement)) {
ret = secondLargestElement.textContent
console.log('use second')
} else {
ret = largestElement.textContent
console.log('use first')
}
return ret.trim().replaceAll(' ', '').replaceAll('\n\n', '').replaceAll(',,', '')
}
+1
View File
@@ -4,6 +4,7 @@ export * from './ends-with-question-mark'
export * from './fetch-sse'
export * from './get-client-position'
export * from './get-conversation-pairs'
export * from './get-core-content-text'
export * from './get-possible-element-by-query-selector'
export * from './init-session'
export * from './is-firefox.mjs'