diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs index 565ad5b..1615bb6 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs @@ -452,6 +452,21 @@ private static string GetCardUrl(string url) return UrlBookToCard().Replace(url, "$1card$2$3"); } + private SplittedLineBuilder ParagraphLineBuilder = new SplittedLineBuilder(); + private SplittedLineBuilder ScriptLineLineBuilder = new SplittedLineBuilder(); + + + + /// + /// ある要素のChildrenに応じた処理を行います。 + /// + /// 処理を行う要素 + internal void ProcessChildren(IElement element) + { + + } + + [System.Text.RegularExpressions.GeneratedRegex(@"(https://www\.aozora\.gr\.jp/cards/\d{6}/)files/(\d{1,})_\d{1,}(\.html)")] private static partial System.Text.RegularExpressions.Regex UrlBookToCard(); diff --git a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs index 31c5876..251e2af 100644 --- a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs +++ b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs @@ -5,37 +5,91 @@ using KoeBook.Epub.Services; using System.Runtime.CompilerServices; using System.Linq; +using System.Net.Http; namespace KoeBook.Test.Epub; public class ScrapingAozoraServiceTest { - private static readonly EpubDocument EmptySingleParagraph = new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph()] }] }] }; + private static readonly EpubDocument EmptySingleParagraph = new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph()] }] }] }; - public static object[][] TestCases() + public static object[][] ProcessChildrenTestCases() + { + (string, EpubDocument, EpubDocument)[] cases = [ + // レイアウト + // 1.1 改丁 + (ToMainText(@"[#改丁]"), EmptySingleParagraph , new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改丁]", ScriptLine = new Core.Models.ScriptLine("", "", "") }] }] }] }), + ]; + return cases.Select(c => new object[] { c.Item1, c.Item2 }).ToArray(); + } + + /// + /// を"
"で囲む + ///
+ /// divタグで囲むhtmlの要素 + /// divタグで囲まれた + private static string ToMainText(string text) + { + return @$"
{text}
"; + } + + [Theory] + [MemberData(nameof(ProcessChildrenTestCases))] + public async void ProcessChildrenTest(string html, EpubDocument initial, EpubDocument expexted) + { + var config = Configuration.Default.WithDefaultLoader(); + using var context = BrowsingContext.New(config); + var doc = await context.OpenAsync(request => request.Content(html)); + var mainText = doc.QuerySelector(".main_text"); + var scraper = new ScrapingAozoraService(new SplitBraceService(), new ScrapingClientService(new httpClientFactory(), TimeProvider.System)); + scraper._document() = initial; + + scraper.ProcessChildren(mainText); + + Assert.True(HaveSmaeText(scraper._document(), expexted)); + } + + /// + /// 2つのEpubdocumentの内容(Guidを除く)内容が一致するかを判定する。 + /// + /// 比較するEpubdocument + /// 比較するEpubdocument + /// + private static bool HaveSmaeText(EpubDocument document, EpubDocument comparison) + { + bool same = true; + + same = (document.Title == comparison.Title); + same = (document.Author == comparison.Author); + same = (document.CssClasses == comparison.CssClasses); + + foreach ((Chapter selfChapter, Chapter comparisonChapter) in document.Chapters.Zip(comparison.Chapters)) { - (string, EpubDocument, EpubDocument)[] cases = [ - // レイアウト - // 1.1 改丁 - (ToMainText(@"[#改丁]"), EmptySingleParagraph , new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改丁]", ScriptLine = new Core.Models.ScriptLine("", "", "") }] }] }] }), - ]; - return cases.Select(c => new object[] { c.Item1, c.Item2 }).ToArray(); + same = (selfChapter.Title == comparisonChapter.Title); + + foreach ((Section selfSection, Section comparisonSection) in selfChapter.Sections.Zip(comparisonChapter.Sections)) + { + same = (selfSection.Title == comparisonSection.Title); + + same = selfSection.Elements.Equals(comparisonSection.Elements); + } } - /// - /// を"
"で囲む - ///
- /// divタグで囲むhtmlの要素 - /// divタグで囲まれた - private static string ToMainText(string text) + return same; + } + + internal class httpClientFactory : IHttpClientFactory + { + public HttpClient CreateClient(string name) { - var builder = new StringBuilder(); - builder.Append(@"
"); - builder.Append(text); - builder.Append("
"); - return builder.ToString(); + return httpClient; } + private static readonly HttpClient httpClient = new HttpClient(); + + } + + [Theory] [InlineData("", "")] public async Task TextProcess(string input, string expected) @@ -87,4 +141,7 @@ file static class ScrapingAozora [UnsafeAccessor(UnsafeAccessorKind.StaticMethod)] public static extern (List contentsIds, bool hasChapter, bool hasSection) LoadToc(ScrapingAozoraService? _, IDocument doc, EpubDocument epubDocument); + + [UnsafeAccessor(UnsafeAccessorKind.Field)] + public static extern ref EpubDocument _document(this ScrapingAozoraService scraper); }