Skip to content

Commit

Permalink
TYPO3-Documentation#49 Adapt documentation indexing for new content s…
Browse files Browse the repository at this point in the history
…tructure

Implemented new documentation rendering based on PHP, which works well with the current code. However, due to minor changes in the rendered HTML file structure, it was necessary to introduce some adjustments.

When checking HTML files for content to be indexed, the old rendering method used the crawler to search for elements by the tag and attribute `div[itemprop="articleBody"]`. In contrast, the new rendering uses the `article` tag. Additionally, in the old rendering, snippets were generated based on `div.section` elements, whereas the new rendering matches them by `section` tags.

These updates are made in the `ParseDocumentationHTMLService` class. This class also determines whether the file content was generated with the old or new rendering method by checking the meta attribute `generator`. The new rendering produces:

`<meta content="phpdocumentor/guides" name="generator">`

This commit also includes:

- Fixes for minor issues in the code to ensure local functionality as intended.
- Code cleanup for improved readability and maintenance.
- Addition and updates of various unit tests.
- Updates to assets configuration.

Resolves issue: TYPO3-Documentation#49
  • Loading branch information
Marcin Sągol committed Dec 14, 2023
1 parent 07626d2 commit fa028ef
Show file tree
Hide file tree
Showing 36 changed files with 5,709 additions and 2,539 deletions.
6 changes: 6 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@
],
"post-update-cmd": [
"@auto-scripts"
],
"ci:test:unit": [
"php bin/phpunit -c phpunit.xml.dist"
],
"fix:php:cs-fixer": [
"php-cs-fixer fix src -v --using-cache no"
]
},
"conflict": {
Expand Down
22 changes: 10 additions & 12 deletions config/services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,20 @@ parameters:
assets:
css:
header:
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/css/theme.css'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/css/webfonts.css'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/css/fontawesome.css'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/css/theme.css'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/css/webfonts.css'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/css/fontawesome.css'
footer:
js:
header:
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/modernizr.min.js'

footer:
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/jquery.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/underscore.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/doctools.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/popper.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/bootstrap.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/autocomplete.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.5.1/js/theme.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/jquery.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/underscore.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/doctools.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/popper.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/bootstrap.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/autocomplete.min.js'
- 'https://typo3.azureedge.net/typo3documentation/theme/sphinx_typo3_theme/4.9.0/js/theme.min.js'

services:
# default configuration for services in *this* file
Expand Down
21 changes: 12 additions & 9 deletions src/Command/SnippetImporter.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,13 @@ protected function execute(InputInterface $input, OutputInterface $output): int
return Command::SUCCESS;
}

protected function importManual($manual, $input)
protected function importManual($manual, $input): void
{
$this->io->section('Importing ' . $this->makePathRelative(
$input->getOption('rootPath'),
$manual->getAbsolutePath()
) . ' - sit tight.');
) . ' ...');
$this->importer->deleteManual($manual);

$this->importer->importManual($manual);
}

Expand All @@ -122,28 +121,32 @@ private function getManuals(InputInterface $input, OutputInterface $output): Fin
return $folders;
}

private function makePathRelative(string $base, string $path)
private function makePathRelative(string $base, string $path): string
{
return str_replace(rtrim($base, '/') . '/', '', $path);
$normalizedBase = rtrim($base, DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR;
if (str_starts_with($path, $normalizedBase)) {
return substr($path, strlen($normalizedBase));
}
return $path;
}

private function formatMilliseconds(int $milliseconds): string
{
$t = round($milliseconds / 1000);
$t = intdiv($milliseconds, 1000);
return sprintf('%02d:%02d:%02d', (int)($t / 3600), (int)($t / 60) % 60, $t % 60);
}

public function startProgress(Event $event)
public function startProgress(Event $event): void
{
$this->io->progressStart($event->getFiles()->count());
}

public function advanceProgress(Event $event)
public function advanceProgress(Event $event): void
{
$this->io->progressAdvance();
}

public function finishProgress(Event $event)
public function finishProgress(Event $event): void
{
$this->io->progressFinish();
}
Expand Down
3 changes: 3 additions & 0 deletions src/Dto/Manual.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ public function getFilesWithSections(): Finder
/**
* TYPO3 Core Changelogs are treated as submanuals from typo3/cms-core manual
*
* Changelogs from other packages are not treated as submanuals, because they
* can have different structure and will be threat as normal manual pages.
*
* @return array<Manual>
*/
public function getSubManuals(): array
Expand Down
13 changes: 5 additions & 8 deletions src/Dto/SearchDemand.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

use Symfony\Component\HttpFoundation\Request;

class SearchDemand
readonly class SearchDemand
{
public function __construct(protected string $query, protected int $page, protected array $filters)
public function __construct(private string $query, private int $page, private array $filters)
{
}

public static function createFromRequest(Request $request): SearchDemand
{
$requestFilters = $request->query->get('filters');
$requestFilters = $request->query->all()['filters'] ?? [];
$filters = [];
if (!empty($requestFilters)) {
foreach ($requestFilters as $filter => $value) {
Expand All @@ -33,11 +33,8 @@ public static function createFromRequest(Request $request): SearchDemand
}
$page = (int)$request->query->get('page', '1');
$query = $request->query->get('q', '');
return new self(
$query,
max($page, 1),
$filters,
);

return new self($query, max($page, 1), $filters);
}

public function getQuery(): string
Expand Down
2 changes: 1 addition & 1 deletion src/Helper/VersionSorter.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class VersionSorter
{
public static function sortVersions(array $versions, string $direction = 'asc'): array
{
usort($versions, function ($a, $b) {
usort($versions, static function ($a, $b) {
if ($a === 'main') {
return 1;
}
Expand Down
3 changes: 1 addition & 2 deletions src/Repository/ElasticRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,9 @@ public function suggest(SearchDemand $searchDemand): array
$elasticaResultSet = $search->search();
$results = $elasticaResultSet->getResults();

$out = [
return [
'results' => $results,
];
return $out;
}

/**
Expand Down
25 changes: 9 additions & 16 deletions src/Service/DirectoryFinderService.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,20 @@

class DirectoryFinderService
{
public function __construct(private readonly array $allowedPaths, private readonly array $excludedDirectories)
{
public function __construct(
private readonly array $allowedPaths,
private readonly array $excludedDirectories
) {
}

/**
* Finds all directories containing documentation under rootPath (DOCS_ROOT_PATH)
* taking into account 'allowed_paths' and 'excluded_directories'
*
* @return Finder
*/
public function getAllManualDirectories(string $rootPath): Finder
{
$allowedPathsRegexs = $this->wrapValuesWithPregDelimiters($this->allowedPaths);

$finder = $this->getDirectoriesByPath($rootPath);
return $finder->path($allowedPathsRegexs);
return $this->getDirectoriesByPath($rootPath)->path($allowedPathsRegexs);
}

/**
Expand All @@ -30,13 +28,13 @@ public function getAllManualDirectories(string $rootPath): Finder
*
* @throws \InvalidArgumentException
*/
public function getDirectoriesByPath(string $docRootPath, string $packagePath=''): Finder
public function getDirectoriesByPath(string $docRootPath, string $packagePath = ''): Finder
{
$combinedPath = $docRootPath . ($packagePath ? '/' . $packagePath : '');

$finder = new Finder();

// checks if given path is already a manual, as finder only checks subfolders
// If the path is a manual, use append; otherwise, set up the usual directory search
if ($combinedPath !== $docRootPath && $this->objectsFileExists($combinedPath)) {
$finder->append([$combinedPath]);
} else {
Expand All @@ -49,7 +47,7 @@ public function getDirectoriesByPath(string $docRootPath, string $packagePath=''
return $finder;
}

private function getFolderFilter()
private function getFolderFilter(): \Closure
{
$self = $this;
return static function (\SplFileInfo $file) use ($self) {
Expand All @@ -64,14 +62,9 @@ private function objectsFileExists(string $path): bool

/**
* Wraps array values with regular expression delimiters
*
* @return array
*/
private function wrapValuesWithPregDelimiters(array $regexs): array
{
array_walk($regexs, function (&$value, $key) {
$value = '#' . $value . '#';
});
return $regexs;
return array_map(static fn ($value) => "#{$value}#", $regexs);
}
}
43 changes: 21 additions & 22 deletions src/Service/ParseDocumentationHTMLService.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,34 @@

class ParseDocumentationHTMLService
{
private bool $newRendering = true;

public function getSectionsFromFile(SplFileInfo $file): array
{
return $this->getSections($file->getContents());
$fileContents = $file->getContents();
$crawler = new Crawler($fileContents);
$this->newRendering = $crawler->filterXPath("//meta[@name='generator' and @content='phpdocumentor/guides']")->count();

return $this->getSections($crawler);
}

private function getSections(string $html): array
private function getSections(Crawler $html): array
{
$crawler = new Crawler($html);
$sections = $crawler->filter('div[itemprop="articleBody"]');

if ($sections->count() === 0) {
return [];
}
$sections = $html->filter($this->newRendering ? 'article' : 'div[itemprop="articleBody"]');

return $this->getAllSections($sections);
return $sections->count() === 0 ? [] : $this->getAllSections($sections);
}

/**
* When multiple sections are present, including nested sections,
* the process iterates over each section to fetch its content snippet.
* However, child sections are excluded from this content retrieval,
* instead, they are treated as distinct sections individually.
*/
private function getAllSections(Crawler $sections): array
{
$sectionPieces = [];
foreach ($sections->filter('div.section') as $section) {
foreach ($sections->filter($this->newRendering ? 'section' : 'div.section') as $section) {
$foundHeadline = $this->findHeadline($section);
if ($foundHeadline === []) {
continue;
Expand Down Expand Up @@ -56,20 +63,16 @@ private function findHeadline(\DOMElement $section): array
$crawler = new Crawler($section);
$headline = $crawler->filter('h1, h2, h3, h4, h5, h6')->getNode(0);

if (($headline instanceof \DOMElement) === false) {
return [];
}

return [
return $headline instanceof \DOMElement ? [
'headlineText' => filter_var(htmlspecialchars($headline->textContent), FILTER_UNSAFE_RAW, FILTER_FLAG_STRIP_HIGH),
'node' => $headline,
];
] : [];
}

private function stripSubSectionsIfAny(\DOMElement $section): \DOMElement
{
$crawler = new Crawler($section);
$subSections = $crawler->filter('div.section div.section');
$subSections = $crawler->filter($this->newRendering ? 'section section' : 'div.section div.section');
if ($subSections->count() === 0) {
return $section;
}
Expand Down Expand Up @@ -105,10 +108,6 @@ private function stripCodeExamples(\DOMElement $section): \DOMElement

private function sanitizeString(string $input): string
{
$pattern = [
'/\s\s+/',
];
$regexBuildName = preg_replace($pattern, ' ', $input);
return trim($regexBuildName);
return trim(preg_replace('/\s\s+/', ' ', $input));
}
}
12 changes: 0 additions & 12 deletions symfony.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,6 @@
"cweagans/composer-patches": {
"version": "1.7.0"
},
"doctrine/annotations": {
"version": "1.0",
"recipe": {
"repo": "github.com/symfony/recipes",
"branch": "master",
"version": "1.0",
"ref": "cb4152ebcadbe620ea2261da1a1c5a9b8cea7672"
}
},
"doctrine/lexer": {
"version": "v1.0.1"
},
"elasticsearch/elasticsearch": {
"version": "v5.3.0"
},
Expand Down
19 changes: 10 additions & 9 deletions tests/Unit/Command/SnippetImporterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ public function rootPathIsUsedFromConfiguration(): void
$directoryFinder = $this->prophesize(DirectoryFinderService::class);

$finder = $this->prophesize(Finder::class);
$finder->hasResults()->shouldBeCalledTimes(2)->willReturn(false);
$finder->hasResults()->willReturn(true);
$finder->getIterator()->willReturn(new \AppendIterator());
$directoryFinder
->getAllManualDirectories('_docsDefault')
->shouldBeCalledTimes(1)
Expand All @@ -52,8 +53,8 @@ public function rootPathCanBeDefinedViaOption(): void
$directoryFinder = $this->prophesize(DirectoryFinderService::class);

$finder = $this->prophesize(Finder::class);
$finder->hasResults()->shouldBeCalledTimes(2)->willReturn(false);

$finder->hasResults()->willReturn(true);
$finder->getIterator()->willReturn(new \AppendIterator());
$directoryFinder
->getAllManualDirectories('_docsCustom')
->shouldBeCalledTimes(1)
Expand Down Expand Up @@ -85,7 +86,7 @@ public function callsImportProcedureManualForAllReturnedManuals(): void
$folder2->__toString()->willReturn('_docsFolder/c/typo3/manual-2/master/en-us');

$finder = new Finder();
$finder->Append([$folder->reveal(), $folder2->reveal()]);
$finder->append([$folder->reveal(), $folder2->reveal()]);

$importer->importManual(Argument::which('getTitle', 'typo3/manual-1'))->shouldBeCalledTimes(1);
$importer->deleteManual(Argument::which('getTitle', 'typo3/manual-1'))->shouldBeCalledTimes(1);
Expand Down Expand Up @@ -118,13 +119,13 @@ public function importsOnlyProvidedPackage(): void
$folder->__toString()->willReturn('_docsFolder/c/typo3/cms-core/master/en-us');

$finder = new Finder();
$finder->Append([$folder->reveal()]);
$finder->append([$folder->reveal()]);

$directoryFinder = $this->prophesize(DirectoryFinderService::class);
$directoryFinder->getDirectoriesByPath(
'_docsDefault',
'c/typo3/cms-core/master/en-us'
)->willReturn($finder)->shouldBeCalledTimes(1);
$directoryFinder
->getDirectoriesByPath('_docsDefault', 'c/typo3/cms-core/master/en-us')
->willReturn($finder)
->shouldBeCalledTimes(1);

$importer->deleteManual(Argument::which('getTitle', 'typo3/cms-core'))->shouldBeCalledTimes(1);
$importer->importManual(Argument::which('getTitle', 'typo3/cms-core'))->shouldBeCalledTimes(1);
Expand Down
2 changes: 1 addition & 1 deletion tests/Unit/Controller/SearchControllerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public function searchActionAssignsQueryToTemplate(): void
/**
* @test
*/
public function searchActionAssignsResultsToTemplate(): never
public function searchActionAssignsResultsToTemplate(): void
{
self::markTestIncomplete('Need to move repository to DI and replace by mock');

Expand Down
Loading

0 comments on commit fa028ef

Please sign in to comment.