diff --git a/Changes b/Changes index d181d8d43..a4960262f 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,12 @@ +0.8.9 2024-Winter + - This release addresses a large variety of usability, fidelity, robustness, + portability and output-quality issues. + - + Incompatible changes: The API of several low-level internal functions have changed. + In the unlikely event you have used these in your own bindings, they will need to be + updated. + - LaTeXML::Core::CharDef->new($cs,$mode,$value) + - decodeMathCode($mathcode,$reversion) returns ($glyph,$font,$reversion,%mathproperties) 0.8.8 2024-02-29 - This release addresses a large variety of usability, fidelity, robustness, portability and output-quality issues. diff --git a/lib/LaTeXML/Core/Definition/CharDef.pm b/lib/LaTeXML/Core/Definition/CharDef.pm index 999a6b581..a0b448b91 100644 --- a/lib/LaTeXML/Core/Definition/CharDef.pm +++ b/lib/LaTeXML/Core/Definition/CharDef.pm @@ -23,14 +23,14 @@ use base qw(LaTeXML::Core::Definition::Register); # A CharDef is a specialized register; # You can't assign it; when you invoke the control sequence, it returns # the result of evaluating the character (more like a regular primitive). +# When $mode is 'math', interprets $value as a (3-part) mathcode, otherwise just index into current font. # When $mathglyph is provided, it is the unicode corresponding to the \mathchar of $value sub new { - my ($class, $cs, $value, $mathglyph, %traits) = @_; + my ($class, $cs, $mode, $value) = @_; return bless { cs => $cs, parameters => undef, - value => $value, mathglyph => $mathglyph, - registerType => 'Number', readonly => 1, - locator => $STATE->getStomach->getGullet->getMouth->getLocator, - %traits }, $class; } + mode => $mode, value => $value, + registerType => 'Number', readonly => 1, + locator => $STATE->getStomach->getGullet->getMouth->getLocator }, $class; } sub valueOf { my ($self) = @_; @@ -43,22 +43,26 @@ sub setValue { sub invoke { my ($self, $stomach) = @_; - my $value = $$self{value}; - my $mathglyph = $$self{mathglyph}; + my $value = $$self{value}; + my $nvalue = $value->valueOf; # A dilemma: If the \chardef were in a style file, you're prefer to revert to the $cs # but if defined in the document source, better to use \char ###\relax, so it still "works" my $src = $$self{locator} && $$self{locator}->toString; my $local = $src && $src !~ /\.(?:sty|ltxml|ltxmlc)/; # Dumps currently have undefined src! - if (defined $mathglyph) { # Must be a math char - return Box($mathglyph, undef, undef, - ($local ? Tokens(T_CS('\mathchar'), $value->revert, T_CS('\relax')) : $$self{cs}), - role => $$self{role}); } - else { # else text; but note defered font/encoding till digestion! - # Decode the codepoint using current font & encoding - my ($glyph, $adjfont) = LaTeXML::Package::FontDecode($value->valueOf); + if ($$self{mode} eq 'text') { # text; but note defered font/encoding till digestion! + # Decode the codepoint using current font & encoding + my ($glyph, $adjfont) = LaTeXML::Package::FontDecode($nvalue); return Box($glyph, $adjfont, undef, - ($local ? Tokens(T_CS('\char'), $value->revert, T_CS('\relax')) : $$self{cs}), - ); } } + ($local ? Tokens(T_CS('\char'), $value->revert, T_CS('\relax')) : $$self{cs})); } + else { # Else math mode, mathDecode! + my ($glyph, $f, $rev, %props) = LaTeXML::Package::decodeMathChar($nvalue); + if (!defined $props{name}) { # Synthesize name attribute from CS, if needed (Clarify purpose of name!) + my $n = $self->getCSName; + $n =~ s/^\\//; + $props{name} = $n if !$props{meaning} || ($n ne $props{meaning}); } + return Box($glyph, undef, undef, + ($local ? Tokens(T_CS('\mathchar'), $value->revert, T_CS('\relax')) : $$self{cs}), + %props); } } sub equals { my ($self, $other) = @_; diff --git a/lib/LaTeXML/Core/Stomach.pm b/lib/LaTeXML/Core/Stomach.pm index 26c69c9ca..e745fe6b9 100644 --- a/lib/LaTeXML/Core/Stomach.pm +++ b/lib/LaTeXML/Core/Stomach.pm @@ -244,8 +244,8 @@ sub invokeToken_simple { $STATE->clearPrefixes; # prefixes shouldn't apply here. if (my $mathcode = $STATE->lookupValue('IN_MATH') && $STATE->lookupMathcode($meaning->toString)) { - my ($role, $glyph, $f, $reversion) = LaTeXML::Package::decodeMathChar($mathcode, $meaning); - return Box($glyph, $f, undef, $reversion, role => $role); } + my ($glyph, $f, $reversion, %props) = LaTeXML::Package::decodeMathChar($mathcode, $meaning); + return Box($glyph, $f, undef, $reversion, %props); } else { return Box(LaTeXML::Package::FontDecodeString($meaning->toString, undef, 1), undef, undef, $meaning); } } } diff --git a/lib/LaTeXML/Engine/LaTeX.pool.ltxml b/lib/LaTeXML/Engine/LaTeX.pool.ltxml index 1fac2dd67..a97396bbd 100644 --- a/lib/LaTeXML/Engine/LaTeX.pool.ltxml +++ b/lib/LaTeXML/Engine/LaTeX.pool.ltxml @@ -6293,8 +6293,8 @@ Let('\mathalpha', '\relax'); DefPrimitive('\mathhexbox {}{}{}', sub { my ($stomach, $a, $b, $c) = @_; my $n = ToString($a) * 256 + ToString($b) * 16 + ToString($c); - my ($role, $glyph) = decodeMathChar($n); - return Box($glyph, LookupValue('font')->specialize($glyph)); }); + my ($glyph, $f, $rev, %props) = decodeMathChar($n); + return Box($glyph, $f, undef, undef, %props); }); DefMacroI('\nocorrlist', undef, ',.'); Let('\nocorr', '\relax'); diff --git a/lib/LaTeXML/Engine/TeX_Character.pool.ltxml b/lib/LaTeXML/Engine/TeX_Character.pool.ltxml index b2d20a76b..034555c01 100644 --- a/lib/LaTeXML/Engine/TeX_Character.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Character.pool.ltxml @@ -105,7 +105,7 @@ DefPrimitive('\chardef Token SkipSpaces SkipMatch:=', sub { my ($stomach, $newcs) = @_; $STATE->assignMeaning($newcs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssignment my $value = $stomach->getGullet->readNumber(); - $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value)); + $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, 'text', $value)); AfterAssignment(); return; }); diff --git a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml index c924b3021..a751e21f4 100644 --- a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml @@ -276,9 +276,9 @@ DeclareFontMap('OMX', "\x{23A7}", "\x{23AB}", "\x{23A9}", "\x{23AD}", "\x{23A8}", "\x{23AC}", "\x{23AA}", "\x{23D0}", # l.bot.paren r.bot.paren l.paren.ext r.paren.ext "\x{239D}", "\x{23A0}", "\x{239C}", "\x{239F}", "\x{27E8}", "\x{27E9}", "\x{2294}", "\x{2294}", - "\x{222E}", "\x{222E}", "\x{2299}", "\x{2299}", "\x{2295}", "\x{2295}", "\x{2297}", "\x{2297}", - "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}", - "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}", + "\x{222E}", "\x{222E}", "\x{2A00}", "\x{2A00}", "\x{2A01}", "\x{2A01}", "\x{2A02}", "\x{2A02}", + "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{2A04}", "\x{22C0}", "\x{22C1}", + "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{2A04}", "\x{22C0}", "\x{22C1}", "\x{2210}", "\x{2210}", UTF(0x5E), UTF(0x5E), UTF(0x5E), UTF(0x7E), UTF(0x7E), UTF(0x7E), "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}", # [missing rad frags] double arrow ext. diff --git a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml index be3e8d277..c8c17ffe7 100644 --- a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml @@ -569,21 +569,21 @@ DefRegister('\everydisplay', Tokens()); DefPrimitive('\mathchar Number', sub { my ($stomach, $code) = @_; - my ($role, $glyph, $font, $reversion) = decodeMathChar($code, + my ($glyph, $font, $reversion, %props) = decodeMathChar($code, Tokens(T_CS('\mathchar'), $_[1]->revert, T_CS('\relax'))); - return Box($glyph, $font, undef, $reversion, role => $role); }); + return Box($glyph, $font, undef, $reversion, %props); }); DefConstructor('\delimiter Number', - "?#glyph(?#isMath(#glyph)(#glyph))", +"?#glyph(?#isMath(#glyph)(#glyph))", sizer => '#glyph', afterDigest => sub { my ($stomach, $whatsit) = @_; my $n = $whatsit->getArg(1)->valueOf; $n = $n >> 12; # Ignore 3 rightmost digits and treat as \mathchar - my ($role, $glyph) = decodeMathChar($n); - $whatsit->setProperty(glyph => $glyph) if $glyph; - $whatsit->setProperty(role => $role) if defined $role; - $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; + my ($glyph, $f, $rev, %props) = decodeMathChar($n); + $whatsit->setProperty(glyph => $glyph) if $glyph; + $whatsit->setProperties(%props) if %props; + $whatsit->setProperty(font => $f) if $glyph; return; }); # Almost like a register, but different... @@ -591,26 +591,27 @@ DefPrimitive('\mathchardef Token SkipSpaces SkipMatch:=', sub { my ($stomach, $newcs) = @_; $STATE->assignMeaning($newcs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssignment my $value = $stomach->getGullet->readNumber(); - my ($role, $glyph) = decodeMathChar($value->valueOf); - $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value, - $glyph, role => $role)); + $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, 'math', $value)); AfterAssignment(); return; }); DefConstructor('\mathaccent Number Digested', - "#glyph#2", +"#glyph#2", sizer => '#2', # Close enough? afterDigest => sub { my ($stomach, $whatsit) = @_; my $n = $whatsit->getArg(1)->valueOf; - my ($role, $glyph) = decodeMathChar($n); - my $accrole = 'OVERACCENT'; - if (my $entry = unicode_accent($glyph)) { - $glyph = $$entry{unwrapped}; - $accrole = $$entry{role}; } - $whatsit->setProperty(glyph => $glyph) if $glyph; - $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; - $whatsit->setProperty(accrole => $accrole) if $glyph; + my ($glyph, $f, $rev, %props) = decodeMathChar($n); + my $name; + my $acc_props = unicode_accent($glyph) || {}; + $glyph = $$acc_props{unwrapped} if $$acc_props{unwrapped}; + my $accrole = $$acc_props{role} || 'OVERACCENT'; + $name = $$acc_props{name}; + $whatsit->setProperty(glyph => $glyph) if $glyph; + $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; + $whatsit->setProperty(accrole => $accrole) if $glyph; + $whatsit->setProperty(name => $name) if $name; + $whatsit->setProperty(stretchy => $$acc_props{stretchy} || 'false'); # stretchy ? return; }); # # Only used for active math characters, so far @@ -649,23 +650,48 @@ DefRegister('\fam' => Number(-1), # \mathpunct c assigns class 6 (punctuation) to following character or subformula. # \mathrel c assigns class 3 (relation) to following character or subformula. +# Add an XMWrap, adjusting the math role unless it's already a sub-class of the requested coarse TeX math classes # Is XMWrap the right thing to wrap with (instead of XMArg)? -# We can't really assume that the stuff inside is sensible math. -# NOTE that \mathord and \mathbin aren't really right here. -# We need a finer granularity than TeX does: an ORD could be several things, -# a BIN could be a MULOP or ADDOP. -# AND, rarely, they're empty.... Is it wrong to drop them? -DefConstructor('\mathord Digested', "?#1(#1)()", bounded => 1); -# Parameter Should be Digested, but that throws off doScriptPos's position depth !?!?! -DefConstructor('\mathop {}', "?#1(#1)()", - bounded => 1, properties => { scriptpos => \&doScriptpos }); - -DefConstructor('\mathbin Digested', "?#1(#1)()", bounded => 1); -DefConstructor('\mathrel Digested', "?#1(#1)()", bounded => 1); -DefConstructor('\mathopen Digested', "?#1(#1)()", bounded => 1); -DefConstructor('\mathclose Digested', "?#1(#1)()", bounded => 1); -DefConstructor('\mathpunct Digested', "?#1(#1)()", bounded => 1); -DefConstructor('\mathinner Digested', "?#1(#1)()", bounded => 1); +our %mathclass_subclass = ( + BIGOP => { ARROW => 1, SUMOP => 1, INTOP => 1, DIFFOP => 1 }, + BINOP => { ADDOP => 1, MULOP => 1 }, + RELOP => {}, + OPEN => {}, + CLOSE => {}, + PUNCT => { PERIOD => 1 }, + ID => { NUMBER => 1 }, + ATOM => {}, # really any role +); + +sub adjustMathRole { + my ($role, $document, $node, %props) = @_; + if (!$node) { } # Nothing? do nothing! + else { + my $wrapper = $document->openElement('ltx:XMWrap'); + $document->absorb($node); + $document->closeElement('ltx:XMWrap'); + my @nodes = element_nodes($wrapper); + @nodes = grep { $document->getNodeQName($_) ne 'ltx:XMHint'; } @nodes; + my $applied = 0; + my $gotrole = ''; + if ((scalar(@nodes) == 1) # Got single node + && ($gotrole = $nodes[0]->getAttribute('role')) # with a role + && (($role eq 'ATOM') || $mathclass_subclass{$role}{$gotrole})) { } # and acceptable? Do nothing + else { + $applied = 1; + $wrapper->setAttribute(role => $role); } # Else, assign the requested role + $wrapper->setAttribute(scriptpos => $props{scriptpos}) if defined $props{scriptpos}; + $wrapper->setAttribute(mathstyle => $props{mathstyle}) if defined $props{mathstyle}; } + return; } +DefConstructor('\mathord Digested', sub { adjustMathRole('ID', @_); }); +DefConstructor('\mathop Digested', sub { adjustMathRole('BIGOP', @_); }, + properties => { scriptpos => \&doScriptpos }); +DefConstructor('\mathbin Digested', sub { adjustMathRole('BINOP', @_); }); +DefConstructor('\mathrel Digested', sub { adjustMathRole('RELOP', @_); }); +DefConstructor('\mathopen Digested', sub { adjustMathRole('OPEN', @_); }); +DefConstructor('\mathclose Digested', sub { adjustMathRole('CLOSE', @_); }); +DefConstructor('\mathpunct Digested', sub { adjustMathRole('PUNCT', @_); }); +DefConstructor('\mathinner Digested', sub { adjustMathRole('ATOM', @_); }); #====================================================================== # Delimiters diff --git a/lib/LaTeXML/Package.pm b/lib/LaTeXML/Package.pm index 5dca9669a..e46913611 100644 --- a/lib/LaTeXML/Package.pm +++ b/lib/LaTeXML/Package.pm @@ -1789,7 +1789,7 @@ sub defmath_cons { ? $cs : $presentation->unlist); }; } $STATE->installDefinition(LaTeXML::Core::Definition::Constructor->new($defcs, $paramlist, ($nargs == 0 - # If trivial presentation, allow it in Text + # If trivial presentation, allow it in Text ? ($presentation !~ /(?:\(|\)|\\)/ ? "?#isMath(unlist } - map { (blessed $_ ? $_ : TokenizeInternal($_)) } ($defn->getExpansion, @tokens)), + map { (blessed $_ ? $_ : TokenizeInternal($_)) } ($defn->getExpansion, @tokens)), nopackParameters => 1, scope => 'global', locked => $$defn{locked}); } return; } @@ -2833,7 +2833,8 @@ sub decodeMathChar { my $curfam = $STATE->lookupValue('fontfamily') // -1; my $initfont = $STATE->lookupValue('initial_math_font') || $curfont; my ($fontdef, $fontinfo); - my ($oclass, $ofam) = ($class, $fam); + my ($oclass, $ofam) = ($class, $fam); + my $downsize = 0; # Special case: class 7 means use the \fam as the family code, if 0<=f<=15; if ($class == 7) { $fam = $curfam if (defined $curfam) && (0 <= $curfam) && ($curfam <= 15); } @@ -2846,24 +2847,48 @@ sub decodeMathChar { $fontdef = T_CS('\font'); # Assume specified by \mathrm or something similar! $fontinfo = $STATE->lookupValue('font')->asFontinfo; } else { - $fontdef = LookupValue('textfont_' . $fam); + my $style = $curfont->getMathstyle; + $style = 'text' unless $style && ($style =~ /^(:?scriptscript|script|text)$/); + my $basefontdef = LookupValue('textfont_0'); + my $basefontdefn = $STATE->lookupDefinition($basefontdef); + my $basefontinfo = $basefontdefn && $basefontdefn->isFontDef; + if ($style eq 'text') { # Lookup the requested font according to script level, but with adjusted fallbacks + $fontdef = LookupValue('textfont_' . $fam); } + elsif ($style eq 'script') { + if ($fontdef = LookupValue('scriptfont_' . $fam)) { } + elsif ($fontdef = LookupValue('textfont_' . $fam)) { $downsize = 1; } } + elsif ($style eq 'scriptscript') { + if ($fontdef = LookupValue('scriptscriptfont_' . $fam)) { } + elsif ($fontdef = LookupValue('scriptfont_' . $fam)) { $downsize = 1; } + elsif ($fontdef = LookupValue('textfont_' . $fam)) { $downsize = 2; } } my $defn = $STATE->lookupDefinition($fontdef); - $fontinfo = $defn && $defn->isFontDef; } - my $font = $curfont->merge(%$fontinfo); + $fontinfo = $defn && $defn->isFontDef; + if ($fontinfo && ($$basefontinfo{size} != $curfont->getSize)) { # If we've gotten an explicit font SIZE change; Adjust! + $fontinfo = {%$fontinfo}; $$fontinfo{size} = $curfont->getSize; } } + my $font = $curfont->merge(%$fontinfo); + if ($downsize > 0) { $font = $curfont->merge(scripted => 1); } + if ($downsize > 1) { $font = $curfont->merge(scripted => 1); } + my $encoding = $fontinfo && $$fontinfo{encoding} || ''; my ($glyph, $f) = ($encoding ? FontDecode($n, $encoding, $font) : ($char, $font)); # If no specific class, Lookup properties from a DefMath? [Eventually: Unicode data!] - my $charinfo = (defined $glyph ? LookupValue('math_token_attributes_' . $glyph) : ()); + my $charinfo = unicode_math_properties($glyph); my $role = ($charinfo && $$charinfo{role}) || $mathclassrole[$class]; - my $size = $curfont->getSize; - $f = $f->merge(size => $size); + my %props = (); + %props = %$charinfo if $charinfo; + $props{role} = $role if $role && !$props{role}; + my $in_display = $curfont->getMathstyle eq 'display'; + if ($props{need_scriptpos}) { + $props{scriptpos} = ($in_display ? 'mid' : 'post'); } + if ($props{need_mathstyle}) { + $props{mathstyle} = ($in_display ? 'display' : 'text'); } my %d = $f->relativeTo($curfont); if ($reversion) { %d = () if LookupValue('LaTeX.pool.ltxml_loaded'); my $rev = ($maybe_rev && %d ? Tokens(T_BEGIN, $fontdef, $reversion, T_END) : $reversion); - return ($role, $glyph, $f, $rev); } + return ($glyph, $f, $rev, %props); } else { - return ($role, $glyph, $f); } } + return ($glyph, $f, undef, %props); } } #====================================================================== # Color diff --git a/lib/LaTeXML/Util/Unicode.pm b/lib/LaTeXML/Util/Unicode.pm index 01f8cc14b..854871b52 100644 --- a/lib/LaTeXML/Util/Unicode.pm +++ b/lib/LaTeXML/Util/Unicode.pm @@ -14,7 +14,7 @@ use strict; use warnings; use base qw(Exporter); use charnames ':full'; -our @EXPORT = qw( &UTF &unicode_accent &unicode_mathvariant &unicode_convert); +our @EXPORT = qw( &UTF &unicode_accent &unicode_mathvariant &unicode_convert &unicode_math_properties); #====================================================================== # Unicode manipulation utilities useful for LaTeXML # Mostly, but not exclusively, about Mathematics @@ -156,7 +156,7 @@ my %unicode_map = ( # CONSTANT 'h' => "\x{02B0}", # aspirated!? 'j' => "\x{02B2}", 'r' => "\x{02B3}", - 'W' => "\x{02B7}", + 'w' => "\x{02B7}", 'y' => "\x{02B8}", 's' => "\x{02E2}", 'x' => "\x{02E3}", @@ -202,10 +202,8 @@ my %unicode_map = ( # CONSTANT "\x{03C6}" => "\x{1D60}", # \varphi "\x{03D5}" => "\x{1D60}", # \phi; close enough? "\x{03BE}" => "\x{1D61}", # \xi - 'H' => "\x{1D78}", 'c' => "\x{1D9C}", 'f' => "\x{1DA0}", - 'g' => "\x{1DA2}", "\x{03A6}" => "\x{1DB2}", # \Phi? "\x{03C5}" => "\x{1DB7}", # \upsilon 'z' => "\x{1DBB}", @@ -326,6 +324,233 @@ sub unicode_mathvariant { # return $variant if $variant = $mathvariants{$font}; return 'normal'; } +#====================================================================== +our %math_props = ( + #====================================================================== + "0" => { role => 'NUMBER', meaning => 0 }, + "1" => { role => 'NUMBER', meaning => 1 }, + "2" => { role => 'NUMBER', meaning => 2 }, + "3" => { role => 'NUMBER', meaning => 3 }, + "4" => { role => 'NUMBER', meaning => 4 }, + "5" => { role => 'NUMBER', meaning => 5 }, + "6" => { role => 'NUMBER', meaning => 6 }, + "7" => { role => 'NUMBER', meaning => 7 }, + "8" => { role => 'NUMBER', meaning => 8 }, + "9" => { role => 'NUMBER', meaning => 9 }, + #====================================================================== + '=' => { role => 'RELOP', meaning => 'equals' }, + '+' => { role => 'ADDOP', meaning => 'plus' }, + '-' => { role => 'ADDOP', meaning => 'minus' }, + '*' => { role => 'MULOP', meaning => 'times' }, + '/' => { role => 'MULOP', meaning => 'divide' }, + '!' => { role => 'POSTFIX', meaning => 'factorial' }, + ',' => { role => 'PUNCT' }, + '.' => { role => 'PERIOD' }, + ';' => { role => 'PUNCT' }, + ':' => { role => 'METARELOP', name => 'colon' }, # plausible default? + '|' => { role => 'VERTBAR', stretchy => 'false' }, + '<' => { role => 'RELOP', meaning => 'less-than' }, + '>' => { role => 'RELOP', meaning => 'greater-than' }, + '(' => { role => 'OPEN', stretchy => 'false' }, + ')' => { role => 'CLOSE', stretchy => 'false' }, + '[' => { role => 'OPEN', stretchy => 'false' }, + ']' => { role => 'CLOSE', stretchy => 'false' }, + '{' => { role => 'OPEN', stretchy => 'false' }, + '}' => { role => 'CLOSE', stretchy => 'false' }, + +## ':' => { role => 'METARELOP' }, # \colon # Seems like good default role + + #====================================================================== + UTF(0x5C) => { role => 'ADDOP', meaning => 'set-minus' }, # \backslash + UTF(0xAC) => { role => 'BIGOP', meaning => 'not' }, # \neg, \lnot + UTF(0xAC) => { role => 'BIGOP', meaning => 'not' }, # \neg + UTF(0xB1) => { role => 'ADDOP', meaning => 'plus-or-minus' }, # \pm + UTF(0xD7) => { role => 'MULOP', meaning => 'times' }, # \times + UTF(0xF7) => { role => 'MULOP', meaning => 'divide' }, # \div + + #====================================================================== + "\x{2020}" => { role => 'MULOP' }, # \dagger + "\x{2021}" => { role => 'MULOP' }, # \ddagger + "\x{2032}" => { role => 'SUPOP', }, # \prime + "\x{2061}" => { role => 'APPLYOP', name => '', reversion => '' }, + "\x{2062}" => { role => 'MULOP', meaning => 'times', name => '', reversion => '' }, + "\x{2063}" => { role => 'PUNCT', name => '', reversion => '' }, + "\x{2064}" => { role => 'ADDOP', meaning => 'plus', name => '', reversion => '' }, + "\x{210F}" => { role => 'ID', meaning => 'Planck-constant-over-2-pi' }, # \hbar + "\x{2111}" => { role => 'OPFUNCTION', meaning => 'imaginary-part' }, # \Im + "\x{2118}" => { role => 'OPFUNCTION', meaning => 'Weierstrass-p' }, # \wp + "\x{211C}" => { role => 'OPFUNCTION', meaning => 'real-part' }, # \Re + "\x{2190}" => { role => 'ARROW' }, # \leftarrow # LEFTWARDS ARROW + "\x{2191}" => { role => 'ARROW', name => 'uparrow' }, # \uparrow # UPWARDS ARROW + "\x{2192}" => { role => 'ARROW' }, # \to, \rightarrow # RIGHTWARDS ARROW + "\x{2193}" => { role => 'ARROW', name => 'downarrow' }, # \downarrow # DOWNWARDS ARROW + "\x{2194}" => { role => 'METARELOP' }, # \leftrightarrow # LEFT RIGHT ARROW + "\x{2195}" => { role => 'ARROW', name => 'updownarrow' }, # \updownarrow # UP DOWN ARROW + "\x{2196}" => { role => 'ARROW' }, # \nwarrow # NORTH WEST ARROW + "\x{2197}" => { role => 'ARROW' }, # \nearrow # NORTH EAST ARROW + "\x{2198}" => { role => 'ARROW' }, # \searrow # SOUTH EAST ARROW + "\x{2199}" => { role => 'ARROW' }, # \swarrow # SOUTH WEST ARROW + "\x{219D}" => { role => 'ARROW', meaning => 'leads-to' }, # \leadsto # + "\x{21A6}" => { role => 'ARROW', meaning => 'maps-to' }, # \mapsto # + "\x{21A9}" => { role => 'ARROW' }, # \hookleftarrow # LEFTWARDS ARROW WITH HOOK + "\x{21AA}" => { role => 'ARROW' }, # \hookrightarrow # RIGHTWARDS ARROW WITH HO}, + "\x{21BC}" => { role => 'ARROW' }, # \leftharpoonup # LEFTWARDS HARPOON WITH BARB UPWARDS + "\x{21BD}" => { role => 'ARROW' }, # \leftharpoondown # LEFTWARDS HARPOON WITH BARB DOWNWARDS, + "\x{21C0}" => { role => 'ARROW' }, # \rightharpoonup # RIGHTWARDS HARPOON WITH BARB UPWARDS + "\x{21C1}" => { role => 'ARROW' }, # \rightharpoondown # RIGHTWARDS HARPOON WITH BARB DOWNWARDS + "\x{21CC}" => { role => 'METARELOP' }, # \rightleftharpoons # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON + "\x{21D0}" => { role => 'ARROW' }, # \Leftarrow # LEFTWARDS DOUBLE ARROW + "\x{21D1}" => { role => 'ARROW', name => 'Uparrow' }, # \Uparrow # UPWARDS DOUBLE ARROW + "\x{21D2}" => { role => 'ARROW' }, # \Rightarrow # RIGHTWARDS DOUBLE ARROW + "\x{21D3}" => { role => 'ARROW', name => 'Downarrow' }, # \Downarrow # DOWNWARDS DOUBLE ARROW + "\x{21D4}" => { role => 'METARELOP', meaning => 'iff' }, # ,\Leftrightarrow, \iff # LEFT RIGHT DOUBLE ARROW + "\x{21D5}" => { role => 'ARROW', name => 'Updownarror' }, # \Updownarrow # UP DOWN DOUBLE ARROW + "\x{2200}" => { role => 'BIGOP', meaning => 'for-all' }, # \forall + "\x{2202}" => { role => 'DIFFOP', meaning => 'partial-differential' }, # \partial + "\x{2203}" => { role => 'BIGOP', meaning => 'exists' }, # \exists + "\x{2205}" => { role => 'ID', meaning => 'empty-set' }, # \emptyset + "\x{2207}" => { role => 'OPERATOR' }, # \nabla + "\x{2208}" => { role => 'RELOP', meaning => 'element-of' }, # \in + "\x{2209}" => { role => 'RELOP', meaning => 'not-element-of' }, # \notin + "\x{220B}" => { role => 'RELOP', meaning => 'contains' }, # \ni + "\x{220F}" => { role => 'SUMOP', meaning => 'product', need_scriptpos => 1, need_mathstyle => 1 }, # \prod +"\x{2210}" => { role => 'SUMOP', meaning => 'coproduct', need_scriptpos => 1, need_mathstyle => 1 }, # \amalg, \coprod + "\x{2211}" => { role => 'SUMOP', meaning => 'sum', need_scriptpos => 1, need_mathstyle => 1 }, # \sum + "\x{2213}" => { role => 'ADDOP', meaning => 'minus-or-plus' }, # \mp + "\x{2216}" => { role => 'ADDOP', meaning => 'set-minus' }, # \setminus + "\x{2217}" => { role => 'MULOP', meaning => 'times' }, # \ast + "\x{2218}" => { role => 'MULOP', meaning => 'compose' }, # \circ + "\x{2219}" => { role => 'MULOP' }, # \bullet + "\x{221A}" => { role => 'OPERATOR', meaning => 'square-root' }, # \surd + "\x{221D}" => { role => 'RELOP', meaning => 'proportional-to' }, # \propto + "\x{221E}" => { role => 'ID', meaning => 'infinity' }, # \infty + "\x{2223}" => { role => 'VERTBAR' }, # \midDIVIDES (RELOP?) ?? well, sometimes.}, + "\x{2225}" => { role => 'VERTBAR', meaning => 'parallel-to', name => '||' }, # \parallel + "\x{2227}" => { role => 'ADDOP', meaning => 'and' }, # \land, \wedge + "\x{2228}" => { role => 'ADDOP', meaning => 'or' }, # \lor, \vee + "\x{2229}" => { role => 'ADDOP', meaning => 'intersection' }, # \cap + "\x{222A}" => { role => 'ADDOP', meaning => 'union' }, # \cup + "\x{222B}" => { role => 'INTOP', meaning => 'integral', need_mathstyle => 1 }, # \int, (\smallint ?) + "\x{222E}" => { role => 'INTOP', meaning => 'contour-integral', need_mathstyle => 1 }, # \oint + "\x{223C}" => { role => 'RELOP', meaning => 'similar-to' }, # \sim + "\x{2240}" => { role => 'MULOP' }, # \wr + "\x{2243}" => { role => 'RELOP', meaning => 'similar-to-or-equals' }, # \simeq + "\x{2245}" => { role => 'RELOP', meaning => 'approximately-equals' }, # \cong + "\x{2248}" => { role => 'RELOP', meaning => 'approximately-equals' }, # \approx + "\x{224D}" => { role => 'RELOP', meaning => 'asymptotically-equals' }, # \asymp + "\x{2250}" => { role => 'RELOP', meaning => 'approaches-limit' }, # \doteq + "\x{2260}" => { role => 'RELOP', meaning => 'not-equals' }, # \neq + "\x{2261}" => { role => 'RELOP', meaning => 'equivalent-to' }, # \equiv + "\x{2264}" => { role => 'RELOP', meaning => 'less-than-or-equals' }, # \leq + "\x{2265}" => { role => 'RELOP', meaning => 'greater-than-or-equals' }, # \geq + "\x{226A}" => { role => 'RELOP', meaning => 'much-less-than' }, # \ll + "\x{226B}" => { role => 'RELOP', meaning => 'much-greater-than' }, # \gg + "\x{227A}" => { role => 'RELOP', meaning => 'precedes' }, # \prec + "\x{227B}" => { role => 'RELOP', meaning => 'succeeds' }, # \succ + "\x{2282}" => { role => 'RELOP', meaning => 'subset-of' }, # \subset + "\x{2283}" => { role => 'RELOP', meaning => 'superset-of' }, # \supset + "\x{2286}" => { role => 'RELOP', meaning => 'subset-of-or-equals' }, # \subseteq + "\x{2287}" => { role => 'RELOP', meaning => 'superset-of-or-equals' }, # \supseteq + "\x{228E}" => { role => 'ADDOP' }, # \uplus + "\x{228F}" => { role => 'RELOP', meaning => 'square-image-of' }, # \sqsubset + "\x{2290}" => { role => 'RELOP', meaning => 'square-original-of' }, # \sqsupset + "\x{2291}" => { role => 'RELOP', meaning => 'square-image-of-or-equals' }, # \sqsubseteq + "\x{2292}" => { role => 'RELOP', meaning => 'square-original-of-or-equals' }, # \sqsupseteq + "\x{2293}" => { role => 'ADDOP', meaning => 'square-intersection' }, # \sqcap + "\x{2294}" => { role => 'ADDOP', meaning => 'square-union' }, # \sqcup + "\x{2295}" => { role => 'ADDOP', meaning => 'direct-sum' }, # \oplus + "\x{2296}" => { role => 'ADDOP', meaning => 'symmetric-difference' }, # \ominus + "\x{2297}" => { role => 'MULOP', meaning => 'tensor-product' }, # \otimes + "\x{2298}" => { role => 'MULOP' }, # \oslash + "\x{2299}" => { role => 'MULOP', meaning => 'direct-product' }, # \odot + "\x{22A2}" => { role => 'METARELOP', meaning => 'proves' }, # \vdash + "\x{22A3}" => { role => 'METARELOP', meaning => 'does-not-prove' }, # \dashv + "\x{22A4}" => { role => 'ADDOP', meaning => 'top' }, # \top + "\x{22A5}" => { role => 'ADDOP', meaning => 'bottom' }, # \bot + "\x{22A7}" => { role => 'RELOP', meaning => 'models' }, # \models + "\x{22B2}" => { role => 'ADDOP', meaning => 'subgroup-of' }, # \lhd + "\x{22B3}" => { role => 'ADDOP', meaning => 'contains-as-subgroup' }, # \rhd + "\x{22B4}" => { role => 'ADDOP', meaning => 'subgroup-of-or-equals' }, # \unlhd + "\x{22B5}" => { role => 'ADDOP', meaning => 'contains-as-subgroup-or-equals' }, # \unrhd + "\x{22C0}" => { role => 'SUMOP', meaning => 'and', need_scriptpos => 1, need_mathstyle => 1 }, # \bigwedge + "\x{22C1}" => { role => 'SUMOP', meaning => 'or', need_scriptpos => 1, need_mathstyle => 1 }, # \bigvee +"\x{22C2}" => { role => 'SUMOP', meaning => 'intersection', need_scriptpos => 1, need_mathstyle => 1 }, # \bigcap + "\x{22C3}" => { role => 'SUMOP', meaning => 'union', need_scriptpos => 1, need_mathstyle => 1 }, # \bigcup + "\x{22C4}" => { role => 'ADDOP' }, # \diamond + "\x{22C5}" => { role => 'MULOP' }, # \cdot + "\x{22C6}" => { role => 'MULOP' }, # \star + "\x{22C8}" => { role => 'RELOP' }, # \bowtieBOWTIE + "\x{22EF}" => { role => 'ID' }, # \cdots # MIDLINE HORIZONTAL ELLIPSIS + "\x{22F1}" => { role => 'ID' }, # \ddots # DOWN RIGHT DIAGONAL ELLIPSIS + "\x{2308}" => { role => 'OPEN', name => 'lceil', stretchy => 'false' }, # \lceil # LEFT CEILING + "\x{2309}" => { role => 'CLOSE', name => 'rceil', stretchy => 'false' }, # \rceil # RIGHT CEILING + "\x{230A}" => { role => 'OPEN', name => 'lfloor', stretchy => 'false' }, # \lfloor # LEFT FLOOR + "\x{230B}" => { role => 'CLOSE', name => 'rfloor', stretchy => 'false' }, # \rfloor # RIGHT FLOOR + "\x{2322}" => { role => 'RELOP' }, # \frownFRO}, + "\x{2323}" => { role => 'RELOP' }, # \smileSMI}, + "\x{25B3}" => { role => 'ADDOP' }, # \bigtriangleup + "\x{25B7}" => { role => 'ADDOP' }, # \triangleright + "\x{25BD}" => { role => 'ADDOP' }, # \bigtriangledown + "\x{25C1}" => { role => 'ADDOP' }, # \triangleleft + "\x{25CB}" => { role => 'MULOP' }, # \bigcirc + "\x{27C2}" => { role => 'RELOP', meaning => 'perpendicular-to' }, # \perp + "\x{27E8}" => { role => 'OPEN', name => 'langle', stretchy => 'false' }, # \langle # LEFT-POINTING ANGLE BRACKET + "\x{27E9}" => { role => 'CLOSE', name => 'rangle', stretchy => 'false' }, # \rangle # RIGHT-POINTING ANGLE BRACKET + "\x{27F5}" => { role => 'ARROW' }, # \longleftarrow # LONG LEFTWARDS ARROW + "\x{27F6}" => { role => 'ARROW' }, # \longrightarrow # LONG RIGHTWARDS ARROW + "\x{27F7}" => { role => 'METARELOP' }, # \longleftrightarrow # LONG LEFT RIGHT ARROW + "\x{27F8}" => { role => 'ARROW' }, # \Longleftarrow # LONG LEFTWARDS DOUBLE ARROW + "\x{27F9}" => { role => 'ARROW' }, # \Longrightarrow # LONG RIGHTWARDS DOUBLE ARROW + "\x{27FA}" => { role => 'METARELOP' }, # \Longleftrightarrow # LONG LEFT RIGHT DOUBLE ARROW + "\x{27FC}" => { role => 'ARROW' }, # \longmapsto # LONG RIGHTWARDS ARROW FROM B}, + "\x{2A00}" => { role => 'SUMOP', need_scriptpos => 1, need_mathstyle => 1 }, # \bigodotmeaning=> ? +"\x{2A01}" => { role => 'SUMOP', meaning => 'direct-sum', need_scriptpos => 1, need_mathstyle => 1 }, # \bigoplus +"\x{2A02}" => { role => 'SUMOP', meaning => 'tensor-product', need_scriptpos => 1, need_mathstyle => 1 }, # \bigotimes +"\x{2A04}" => { role => 'SUMOP', meaning => 'symmetric-difference', need_scriptpos => 1, need_mathstyle => 1 }, # \biguplus +"\x{2A06}" => { role => 'SUMOP', meaning => 'square-union', need_scriptpos => 1, need_mathstyle => 1 }, # \bigsqcup + "\x{2A1D}" => { role => 'RELOP', meaning => 'join' }, # \Join + "\x{2AAF}" => { role => 'RELOP', meaning => 'precedes-or-equals' }, # \preceq + "\x{2AB0}" => { role => 'RELOP', meaning => 'succeeds-or-equals' }, # \succeq + "\x{FF0F}" => { role => 'OPFUNCTION', meaning => 'not' }, # \not + #====================================================================== + "arccos" => { role => 'OPFUNCTION', meaning => 'inverse-cosine' }, # \arccos # + "arcsin" => { role => 'OPFUNCTION', meaning => 'inverse-sine' }, # \arcsin # + "arctan" => { role => 'OPFUNCTION', meaning => 'inverse-tangent' }, # \arctan # + "arg" => { role => 'OPFUNCTION', meaning => 'argument' }, # \arg # + "cos" => { role => 'TRIGFUNCTION', meaning => 'cosine' }, # \cos # + "cosh" => { role => 'TRIGFUNCTION', meaning => 'hyperbolic-cosine' }, # \cosh # + "cot" => { role => 'TRIGFUNCTION', meaning => 'cotangent' }, # \cot # + "coth" => { role => 'TRIGFUNCTION', meaning => 'hyperbolic-cotangent' }, # \coth # + "csc" => { role => 'TRIGFUNCTION', meaning => 'cosecant' }, # \csc # + "deg" => { role => 'OPFUNCTION', meaning => 'degree' }, # \deg # + "det" => { role => 'LIMITOP', meaning => 'determinant', need_scriptpos => 1 }, # \det # + "dim" => { role => 'LIMITOP', meaning => 'dimension' }, # \dim # + "exp" => { role => 'OPFUNCTION', meaning => 'exponential' }, # \exp # + "gcd" => { role => 'OPFUNCTION', meaning => 'gcd', need_scriptpos => 1 }, # \gcd # + "hom" => { role => 'OPFUNCTION', need_scriptpos => 1 }, # \hom # + "inf" => { role => 'LIMITOP', meaning => 'infimum', need_scriptpos => 1 }, # \inf # + "ker" => { role => 'OPFUNCTION', meaning => 'kernel' }, # \ker # + "lg" => { role => 'OPFUNCTION' }, # \lg # + "lim" => { role => 'LIMITOP', meaning => 'limit', need_scriptpos => 1 }, # \lim # + "lim inf" => { role => 'LIMITOP', meaning => 'limit-infimum', need_scriptpos => 1 }, # \liminf # + "lim sup" => { role => 'LIMITOP', meaning => 'limit-supremum', need_scriptpos => 1 }, # \limsup # + "ln" => { role => 'OPFUNCTION', meaning => 'natural-logarithm' }, # \ln # + "log" => { role => 'OPFUNCTION', meaning => 'logarithm' }, # \log # + "max" => { role => 'OPFUNCTION', meaning => 'maximum', need_scriptpos => 1 }, # \max # + "min" => { role => 'OPFUNCTION', meaning => 'minimum', need_scriptpos => 1 }, # \min # + "Pr" => { role => 'OPFUNCTION', need_scriptpos => 1 }, # \Pr # + "sec" => { role => 'TRIGFUNCTION', meaning => 'secant' }, # \sec # + "sin" => { role => 'TRIGFUNCTION', meaning => 'sine' }, # \sin # + "sinh" => { role => 'TRIGFUNCTION', meaning => 'hyperbolic-sine' }, # \sinh # + "sup" => { role => 'LIMITOP', meaning => 'supremum', need_scriptpos => 1 }, # \sup # + "tan" => { role => 'TRIGFUNCTION', meaning => 'tangent' }, # \tan # + "tanh" => { role => 'TRIGFUNCTION', meaning => 'hyperbolic-tangent' }, # \tanh # +); + +sub unicode_math_properties { + my ($char) = @_; + return (defined $char) && $math_props{$char}; } + #====================================================================== 1; diff --git a/t/digestion/chardefs.xml b/t/digestion/chardefs.xml index f8f09acad..af1d74d16 100644 --- a/t/digestion/chardefs.xml +++ b/t/digestion/chardefs.xml @@ -32,9 +32,9 @@ <tag close=" ">2</tag>mathchardef -

Sum = +

Sum = - +

@@ -68,23 +68,23 @@

OMX: ].

-

OML: +

OML: - + .

-

OMS: +

OMS: - + .

-

OMX: +

OMX: - + .

diff --git a/t/digestion/testctr.xml b/t/digestion/testctr.xml index 84b696087..7f486f0c0 100644 --- a/t/digestion/testctr.xml +++ b/t/digestion/testctr.xml @@ -362,9 +362,9 @@ followed by a usage or assignment (See TeX Program §1224)

@

-

+

- +

diff --git a/t/fonts/mathaccents.xml b/t/fonts/mathaccents.xml index d675dbe00..49a1954ff 100644 --- a/t/fonts/mathaccents.xml +++ b/t/fonts/mathaccents.xml @@ -9,12 +9,12 @@ ? - + - ˚ + ˚ u s @@ -34,10 +34,10 @@ - + - ´ + ´ a @@ -65,10 +65,10 @@ - + - ¯ + ¯ a @@ -96,10 +96,10 @@ - + - ˘ + ˘ a @@ -127,10 +127,10 @@ - + - ˇ + ˇ a @@ -158,10 +158,10 @@ - + - ¨ + ¨ a @@ -189,10 +189,10 @@ - + - ˙ + ˙ a @@ -220,10 +220,10 @@ - + - ` + ` a @@ -251,10 +251,10 @@ - + - ^ + ^ a @@ -282,10 +282,10 @@ - + - ~ + ~ a @@ -316,7 +316,7 @@ - + a @@ -337,10 +337,10 @@
- + - ˚ + ˚ a @@ -364,7 +364,7 @@ - ~ + ~ a @@ -393,7 +393,7 @@ - ^ + ^ a diff --git a/t/fonts/omencodings.xml b/t/fonts/omencodings.xml index bcbea9ec0..d3ccf6f62 100644 --- a/t/fonts/omencodings.xml +++ b/t/fonts/omencodings.xml @@ -511,12 +511,12 @@ ´11 - - - - - - + + + + + +
´12 @@ -525,9 +525,9 @@ - - - + + +
´13 @@ -536,9 +536,9 @@ - - - + + +
´14 diff --git a/t/fonts/plainfonts.xml b/t/fonts/plainfonts.xml index 1feff6752..8f68bb493 100644 --- a/t/fonts/plainfonts.xml +++ b/t/fonts/plainfonts.xml @@ -114,16 +114,28 @@ but compare meanings:

-

“cal: abc123 and +

“cal: abc123 and - - - - - - - + + + + + + + + + + + + + + + + + + +