Skip to content

Commit

Permalink
Merge pull request #50 from imjohnbo/treewalker
Browse files Browse the repository at this point in the history
Fix: target correct link when multiple matches are present
  • Loading branch information
srt32 authored Apr 21, 2022
2 parents 2906188 + d3ec435 commit 287767d
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 50 deletions.
87 changes: 49 additions & 38 deletions src/paste-markdown-html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ export function uninstall(el: HTMLElement): void {
el.removeEventListener('paste', onPaste)
}

type MarkdownTransformer = (element: HTMLElement | HTMLAnchorElement, args: string[]) => string

function onPaste(event: ClipboardEvent) {
const transfer = event.clipboardData
// if there is no clipboard data, or
Expand All @@ -20,65 +18,78 @@ function onPaste(event: ClipboardEvent) {
if (!(field instanceof HTMLTextAreaElement)) return

// Get the plaintext and html version of clipboard contents
let text = transfer.getData('text/plain')
let plaintext = transfer.getData('text/plain')
const textHTML = transfer.getData('text/html')
// Replace Unicode equivalent of "&nbsp" with a space
const textHTMLClean = textHTML.replace(/\u00A0/g, ' ')
const textHTMLClean = textHTML.replace(/\u00A0/g, ' ').replace(/\uC2A0/g, ' ')
if (!textHTML) return

text = text.trim()
if (!text) return
plaintext = plaintext.trim()
if (!plaintext) return

// Generate DOM tree from HTML string
const parser = new DOMParser()
const doc = parser.parseFromString(textHTMLClean, 'text/html')
const walker = doc.createTreeWalker(doc.body, NodeFilter.SHOW_ELEMENT)

const a = doc.getElementsByTagName('a')
const markdown = transform(a, text, linkify as MarkdownTransformer)
const markdown = convertToMarkdown(plaintext, walker)

// If no changes made by transforming
if (markdown === text) return
if (markdown === plaintext) return

event.stopPropagation()
event.preventDefault()

insertText(field, markdown)
}

// Build a markdown string from a DOM tree and plaintext
function transform(
elements: HTMLCollectionOf<HTMLElement>,
text: string,
transformer: MarkdownTransformer,
...args: string[]
): string {
const markdownParts = []
for (const element of elements) {
const textContent = element.textContent || ''
const {part, index} = trimAfter(text, textContent)
if (index >= 0) {
markdownParts.push(part.replace(textContent, transformer(element, args)))
text = text.slice(index)
function convertToMarkdown(plaintext: string, walker: TreeWalker): string {
let currentNode = walker.firstChild()
let markdown = plaintext
let markdownIgnoreBeforeIndex = 0
let index = 0
const NODE_LIMIT = 10000

// Walk through the DOM tree
while (currentNode && index < NODE_LIMIT) {
index++
const text = isLink(currentNode) ? currentNode.textContent || '' : (currentNode.firstChild as Text)?.wholeText || ''

// No need to transform whitespace
if (isEmptyString(text)) {
currentNode = walker.nextNode()
continue
}

// Find the index where "text" is found in "markdown" _after_ "markdownIgnoreBeforeIndex"
const markdownFoundIndex = markdown.indexOf(text, markdownIgnoreBeforeIndex)

if (markdownFoundIndex >= 0) {
if (isLink(currentNode)) {
const markdownLink = linkify(currentNode)
// Transform 'example link plus more text' into 'example [link](example link) plus more text'
// Method: 'example [link](example link) plus more text' = 'example ' + '[link](example link)' + ' plus more text'
markdown =
markdown.slice(0, markdownFoundIndex) + markdownLink + markdown.slice(markdownFoundIndex + text.length)
markdownIgnoreBeforeIndex = markdownFoundIndex + markdownLink.length
} else {
markdownIgnoreBeforeIndex = markdownFoundIndex + text.length
}
}

currentNode = walker.nextNode()
}
markdownParts.push(text)
return markdownParts.join('')
}

// Trim text at index of last character of the first occurrence of "search" and
// return a new string with the substring until the index
// Example: trimAfter('Hello world', 'world') => {part: 'Hello world', index: 11}
// Example: trimAfter('Hello world', 'bananas') => {part: '', index: -1}
function trimAfter(text: string, search = ''): {part: string; index: number} {
let index = text.indexOf(search)
if (index === -1) return {part: '', index}
// Unless we hit the node limit, we should have processed all nodes
return index === NODE_LIMIT ? plaintext : markdown
}

index += search.length
function isEmptyString(text: string): boolean {
return !text || text?.trim().length === 0
}

return {
part: text.substring(0, index),
index
}
function isLink(node: Node): node is HTMLAnchorElement {
return (node as HTMLElement).tagName?.toLowerCase() === 'a' && (node as HTMLElement).hasAttribute('href')
}

function hasHTML(transfer: DataTransfer): boolean {
Expand Down
34 changes: 22 additions & 12 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ describe('paste-markdown', function () {

it('turns mixed html content containing several links into appropriate markdown', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<meta charset='utf-8'><meta charset="utf-8">
const sentence = `<meta charset='utf-8'>
<b style="font-weight:normal;"><p dir="ltr"><span>This is a </span>
<a href="https://github.com/"><span>link</span></a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"><span>another link</span></a></p>
<a href="https://github.com/">link</a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">another link</a></p>
<br /><a href="https://github.com/"><span>Link</span></a><span> at the beginning, link at the </span>
<a href="https://github.com/"><span>end</span></a></b>`
// eslint-disable-next-line i18n-text/no-en
Expand Down Expand Up @@ -186,19 +186,29 @@ describe('paste-markdown', function () {

it('leaves plaintext links alone', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<meta charset='utf-8'><meta charset="utf-8">
const sentence = `<meta charset='utf-8'>
<b style="font-weight:normal;"><p dir="ltr"><span>This is a </span>
<a href="https://github.com/"><span>https://github.com</span></a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ"><span>another link</span></a></p>
<br /><a href="https://github.com/"><span>Link</span></a><span> at the beginning, link at the </span>
<a href="https://github.com/"><span>https://github.com/</span></a></b>`
<a href="https://github.com/">link</a><span> and </span>
<a href="https://www.youtube.com/watch?v=dQw4w9WgXcQ">another link</a></p>
<br /><a href="https://github.com/">Link</a><span> at the beginning, link at the </span>
<a href="https://github.com/"><span>end</span></a></b>`
/* eslint-disable i18n-text/no-en */
const plaintextSentence =
'This is a https://github.com and another link\n\nLink at the beginning, link at the https://github.com/'
const plaintextSentence = 'This is a link and another link\n\nLink at the beginning, link at the end'
/* eslint-enable i18n-text/no-en */
const markdownSentence =
'This is a https://github.com/ and [another link](https://www.youtube.com/watch?v=dQw4w9WgXcQ)\n\n' +
'[Link](https://github.com/) at the beginning, link at the https://github.com/'
'This is a [link](https://github.com/) and [another link](https://www.youtube.com/watch?v=dQw4w9WgXcQ)\n\n' +
'[Link](https://github.com/) at the beginning, link at the [end](https://github.com/)'

paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
assert.equal(textarea.value, markdownSentence)
})

it('finds the right link when identical labels are present', function () {
// eslint-disable-next-line github/unescaped-html-literal
const sentence = `<meta charset='utf-8'><span>example<span> </span>
</span><a href="https://example.com/">example</a>`
const plaintextSentence = 'example example'
const markdownSentence = 'example [example](https://example.com/)'

paste(textarea, {'text/html': sentence, 'text/plain': plaintextSentence})
assert.equal(textarea.value, markdownSentence)
Expand Down

0 comments on commit 287767d

Please sign in to comment.