-
Notifications
You must be signed in to change notification settings - Fork 131
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #131 from JohannesKaufmann/improve-nested-lists
improve-nested-lists
- Loading branch information
Showing
8 changed files
with
305 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package domutils | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
|
||
"github.com/JohannesKaufmann/dom" | ||
"golang.org/x/net/html" | ||
"golang.org/x/net/html/atom" | ||
) | ||
|
||
// MoveListItems moves non-"li" nodes into the previous "li" nodes. | ||
func MoveListItems(ctx context.Context, n *html.Node) { | ||
if n.Type == html.ElementNode && (n.Data == "ol" || n.Data == "ul") { | ||
var previousLi *html.Node | ||
|
||
// Collect children to avoid modifying the slice while iterating. | ||
children := dom.AllChildNodes(n) | ||
|
||
for _, child := range children { | ||
if child.Type == html.ElementNode && child.Data == "li" { | ||
previousLi = child | ||
} else if child.Type == html.TextNode && strings.TrimSpace(child.Data) == "" { | ||
// Skip the node, probably just formatting of code | ||
} else { | ||
// We expect that inside an "ol"/"ul" there are *only* "li" nodes. | ||
// But sometimes that is not the case... | ||
|
||
if previousLi != nil { | ||
// There is a previous "li" node, | ||
// so we move this content into the other "li" node. | ||
n.RemoveChild(child) | ||
|
||
previousLi.AppendChild(child) | ||
} else { | ||
// There is no previous "li" node, | ||
// so we wrap this node with it's own "li" node. | ||
|
||
newNode := &html.Node{ | ||
Type: html.ElementNode, | ||
DataAtom: atom.Li, | ||
Data: "li", | ||
} | ||
previousLi = dom.WrapNode(child, newNode) | ||
} | ||
} | ||
} | ||
} | ||
|
||
for c := n.FirstChild; c != nil; c = c.NextSibling { | ||
MoveListItems(ctx, c) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package domutils | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/tester" | ||
) | ||
|
||
func TestMoveListItems(t *testing.T) { | ||
runs := []struct { | ||
desc string | ||
input string | ||
expected string | ||
}{ | ||
{ | ||
desc: "not needed in normal list", | ||
input: "<div><ul><li>A</li><li>B</li><li>C</li></ul></div>", | ||
expected: ` | ||
├─body | ||
│ ├─div | ||
│ │ ├─ul | ||
│ │ │ ├─li | ||
│ │ │ │ ├─#text "A" | ||
│ │ │ ├─li | ||
│ │ │ │ ├─#text "B" | ||
│ │ │ ├─li | ||
│ │ │ │ ├─#text "C" | ||
`, | ||
}, | ||
{ | ||
desc: "#text moves into the previous li", | ||
input: "<ul><li>A</li>B</ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─#text "A" | ||
│ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "div moves into the previous li", | ||
input: "<ul><li>A</li><div>B</div></ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─#text "A" | ||
│ │ │ ├─div | ||
│ │ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "ol moves into the previous li", | ||
input: "<ul><li>A</li><ol><li>B</li></ol></ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─#text "A" | ||
│ │ │ ├─ol | ||
│ │ │ │ ├─li | ||
│ │ │ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "no existing li", | ||
input: "<ul><span>A</span><span>B</span></ul>", | ||
expected: ` | ||
├─body | ||
│ ├─ul | ||
│ │ ├─li | ||
│ │ │ ├─span | ||
│ │ │ │ ├─#text "A" | ||
│ │ │ ├─span | ||
│ │ │ │ ├─#text "B" | ||
`, | ||
}, | ||
{ | ||
desc: "basic moved list", | ||
input: ` | ||
<ol> | ||
<li>One</li> | ||
<li>Two</li> | ||
<ol> | ||
<li>Two point one</li> | ||
<li>Two point two</li> | ||
</ol> | ||
</ol> | ||
`, | ||
expected: ` | ||
├─body | ||
│ ├─ol | ||
│ │ ├─#text "\n\t" | ||
│ │ ├─li | ||
│ │ │ ├─#text "One" | ||
│ │ ├─#text "\n\t" | ||
│ │ ├─li | ||
│ │ │ ├─#text "Two" | ||
│ │ │ ├─ol | ||
│ │ │ │ ├─#text "\n\t\t" | ||
│ │ │ │ ├─li | ||
│ │ │ │ │ ├─#text "Two point one" | ||
│ │ │ │ ├─#text "\n\t\t" | ||
│ │ │ │ ├─li | ||
│ │ │ │ │ ├─#text "Two point two" | ||
│ │ │ │ ├─#text "\n\t" | ||
│ │ ├─#text "\n\t" | ||
│ │ ├─#text "\n" | ||
`, | ||
}, | ||
} | ||
for _, run := range runs { | ||
t.Run(run.desc, func(t *testing.T) { | ||
doc := tester.Parse(t, run.input, "") | ||
|
||
MoveListItems(context.TODO(), doc) | ||
|
||
tester.ExpectRepresentation(t, doc, "output", run.expected) | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,7 +79,8 @@ Wir freuen uns über eine [Mail](mailto:[email protected]?body=Hello%0AJohannes)! | |
<!--list with link--> | ||
|
||
- [a(b)\[c\]](/page.html) | ||
- [a\]](/page.html) | ||
|
||
[a\]](/page.html) | ||
|
||
<!--TODO: list with paragraph--> | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.