diff --git a/lib/LaTeXML/Package/LaTeX.pool.ltxml b/lib/LaTeXML/Package/LaTeX.pool.ltxml index 5b21235ab..36d2ce3fe 100644 --- a/lib/LaTeXML/Package/LaTeX.pool.ltxml +++ b/lib/LaTeXML/Package/LaTeX.pool.ltxml @@ -4418,9 +4418,22 @@ our %index_style = (textbf => 'bold', bf => 'bold', textrm => '', rm => '', sub process_index_phrases { my ($gullet, $phrases, $inlist) = @_; my @expansion = (); + my @tokens = $phrases->unlist; + # check we have a well-formed argument + return unless @tokens; + my $group_level = 0; + for my $t (@tokens) { + my $cc = $t->getCatcode; + if ($cc == CC_BEGIN) { $group_level++; } + elsif ($cc == CC_END) { + $group_level--; + # discard if unbalanced close group; + last if ($group_level < 0); } } + if ($group_level != 0) { # if a group is still open by the end, ill-formed, discard; + Warn("malformed", "indexentry", $gullet, + 'index entry has unbalanced groups, discarding: "' . ToString($phrases) . '"'); + return; } # Split the text into phrases, separated by "!" - my @tokens = $phrases->unlist; - return unless @tokens; push(@tokens, T_OTHER('!')) unless $tokens[-1]->getString eq '!'; # Add terminal ! my @phrase = (); my @sortas = (); @@ -4458,7 +4471,37 @@ sub process_index_phrases { T_BEGIN, @expansion, T_END); return @expansion; } -DefMacro('\index{}', \&process_index_phrases); +# read verbatim, as perl latex.ltx \@sanitize; +# useful for \index (maybe others?) +DefParameterType('SanitizedVerbatim', sub { + my ($gullet) = @_; + $gullet->readUntil(T_BEGIN); + # crucial: deactivate the backslash to avoid activating command sequences + # chars switched to CC_OTHER by \@sanitize: ' ', '\\', '$', '&', '#', '^', '_', '%', '~' + # some of those are already in state's "SPECIALS", so only adding the rest: + StartSemiverbatim(' ', '\\', '%'); + my $arg = $gullet->readBalanced(1); + EndSemiverbatim(); + # now that we have the semiverbatim tokens, retokenize with the standard catcode scheme, + # this may seem like wasted work, but it avoids very unfortunate error propagation in cases + # where the \index argument was malformed for one reason or another. + # + # the strangeness comes from the original TeX workflow requiring multiple conversion calls, + # alongside a call to the `makeidx` binary, which we don't do in latexml. This parameter type + # emulates one important aspect implied by those steps. + $arg = Tokenize(ToString($arg)); + return $arg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(family => 'typewriter'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# real-world LaTeX \index +DefMacro('\index SanitizedVerbatim', \&process_index_phrases); +# simple \index for internal use in bindings with pre-sanitized arguments +DefMacro('\lx@simple@index Plain', \&process_index_phrases); Tag('ltx:indexphrase', afterClose => \&addIndexPhraseKey); Tag('ltx:glossaryphrase', afterClose => \&addIndexPhraseKey); diff --git a/lib/LaTeXML/Package/listings.sty.ltxml b/lib/LaTeXML/Package/listings.sty.ltxml index 3956c00e4..c5397304f 100644 --- a/lib/LaTeXML/Package/listings.sty.ltxml +++ b/lib/LaTeXML/Package/listings.sty.ltxml @@ -1019,7 +1019,9 @@ DefKeyVal('LST', 'indexstyle', ''); DefMacro('\lst@@indexstyle [Number] Until:\end', sub { lstSetClassStyle(lstClassName('index', $_[1]), $_[2]); }); -DefMacro('\lstindexmacro{}', '\index{{\ttfamily #1}}'); +# use the simple plain-argument index, +# otherwise we need to double-check any relevant Semiverbatim cases +DefMacro('\lstindexmacro{}', '\lx@simple@index{{\ttfamily #1}}'); #====================================================================== # 4.13 Column alignment