Skip to content

Commit

Permalink
Added method to parse language from HTML as 'html-lang' key + tests for
Browse files Browse the repository at this point in the history
  • Loading branch information
gRegorLove committed May 8, 2016
1 parent 0ccc493 commit 9dbb991
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 1 deletion.
35 changes: 34 additions & 1 deletion Mf2/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,35 @@ public function innerText($el, $implied=false) {
return ($out === '') ? NULL : $out;
}

/**
* This method parses the language of an element
* @param DOMElement $el
* @access public
* @return string
*/
public function language(DOMElement $el)
{
// element has a lang attribute; use it
if ($el->hasAttribute('lang')) {
return trim($el->getAttribute('lang'));
}

if ($el->tagName == 'html') {
// we're at the <html> element and no lang; check <meta> http-equiv Content-Language
foreach ( $this->xpath->query('.//meta[@http-equiv]') as $node )
{
if ($node->hasAttribute('http-equiv') && $node->hasAttribute('content') && strtolower($node->getAttribute('http-equiv')) == 'content-language') {
return trim($node->getAttribute('content'));
}
}
} else {
// check the parent node
return $this->language($el->parentNode);
}

return '';
} # end method language()

// TODO: figure out if this has problems with sms: and geo: URLs
public function resolveUrl($url) {
// If the URL is seriously malformed it’s probably beyond the scope of this
Expand Down Expand Up @@ -741,7 +770,8 @@ public function parseE(\DOMElement $e) {

return array(
'html' => $html,
'value' => unicodeTrim($this->innerText($e))
'value' => unicodeTrim($this->innerText($e)),
'html-lang' => $this->language($e)
);
}

Expand Down Expand Up @@ -1000,6 +1030,9 @@ public function parseH(\DOMElement $e) {
$return['url'][] = $this->resolveUrl($url);
}

// Language
$return['html-lang'] = $this->language($e);

// Make sure things are in alphabetical order
sort($mfTypes);

Expand Down
97 changes: 97 additions & 0 deletions tests/Mf2/ParseLanguageTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
<?php

/**
* Tests of the language parsing methods within mf2\Parser
*/

namespace Mf2\Parser\Test;

use Mf2\Parser;
use Mf2;
use PHPUnit_Framework_TestCase;

class ParseLanguageTest extends PHPUnit_Framework_TestCase {

public function setUp() {
date_default_timezone_set('Europe/London');
}

/**
* Test with only <html lang>
*/
public function testHtmlLangOnly()
{
$input = '<html lang="en"> <div class="h-entry">This test is in English.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();

$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
} # end method testHtmlLangOnly()

/**
* Test with only h-entry lang
*/
public function testHEntryLangOnly()
{
$input = '<html> <div class="h-entry" lang="en">This test is in English.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();

$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
} # end method testHEntryLangOnly()

/**
* Test with different <html lang> and h-entry lang
*/
public function testHtmlAndHEntryLang()
{
$input = '<html lang="en"> <div class="h-entry" lang="es">Esta prueba está en español.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();

$this->assertEquals('es', $result['items'][0]['properties']['html-lang']);
} # end method testHtmlAndHEntryLang()

/**
* Test with different <html lang>, h-entry lang, and h-entry without lang,
* which should inherit from the <html lang>
*/
public function testMultiLanguageInheritance()
{
$input = '<html lang="en"> <div class="h-entry">This test is in English.</div> <div class="h-entry" lang="es">Esta prueba está en español.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();

$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
$this->assertEquals('es', $result['items'][1]['properties']['html-lang']);
} # end method testMultiLanguageInheritance()

/**
* Test feed with .h-feed lang which contains multiple h-entries of different languages
* (or none specified), which should inherit from the .h-feed lang.
*/
public function testMultiLanguageFeed()
{
$input = '<html> <div class="h-feed" lang="en"> <h1 class="p-name">Test Feed</h1> <div class="h-entry">This test is in English.</div> <div class="h-entry" lang="es">Esta prueba está en español.</div> <div class="h-entry" lang="fr">Ce test est en français.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();

$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
$this->assertEquals('en', $result['items'][0]['children'][0]['properties']['html-lang']);
$this->assertEquals('es', $result['items'][0]['children'][1]['properties']['html-lang']);
$this->assertEquals('fr', $result['items'][0]['children'][2]['properties']['html-lang']);
} # end method testMultiLanguageFeed()

/**
* Test with language specified in <meta> http-equiv Content-Language
*/
public function testMetaContentLanguage()
{
$input = '<html> <meta http-equiv="Content-Language" content="es"/> <div class="h-entry">Esta prueba está en español.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();

$this->assertEquals('es', $result['items'][0]['properties']['html-lang']);
} # end method testMetaContentLanguage()

}

0 comments on commit 9dbb991

Please sign in to comment.