Skip to content

Commit

Permalink
respect \@sanitize guard for \index via new SanitizedVerbatim paramet…
Browse files Browse the repository at this point in the history
…er type
  • Loading branch information
dginev committed Oct 26, 2023
1 parent 1ad2590 commit c1571cb
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
49 changes: 46 additions & 3 deletions lib/LaTeXML/Package/LaTeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -4418,9 +4418,22 @@ our %index_style = (textbf => 'bold', bf => 'bold', textrm => '', rm => '',
sub process_index_phrases {
my ($gullet, $phrases, $inlist) = @_;
my @expansion = ();
my @tokens = $phrases->unlist;
# check we have a well-formed argument
return unless @tokens;
my $group_level = 0;
for my $t (@tokens) {
my $cc = $t->getCatcode;
if ($cc == CC_BEGIN) { $group_level++; }
elsif ($cc == CC_END) {
$group_level--;
# discard if unbalanced close group;
last if ($group_level < 0); } }
if ($group_level != 0) { # if a group is still open by the end, ill-formed, discard;
Warn("malformed", "indexentry", $gullet,
'index entry has unbalanced groups, discarding: "' . ToString($phrases) . '"');
return; }
# Split the text into phrases, separated by "!"
my @tokens = $phrases->unlist;
return unless @tokens;
push(@tokens, T_OTHER('!')) unless $tokens[-1]->getString eq '!'; # Add terminal !
my @phrase = ();
my @sortas = ();
Expand Down Expand Up @@ -4458,7 +4471,37 @@ sub process_index_phrases {
T_BEGIN, @expansion, T_END);
return @expansion; }

DefMacro('\index{}', \&process_index_phrases);
# read verbatim, as perl latex.ltx \@sanitize;
# useful for \index (maybe others?)
DefParameterType('SanitizedVerbatim', sub {
my ($gullet) = @_;
$gullet->readUntil(T_BEGIN);
# crucial: deactivate the backslash to avoid activating command sequences
# chars switched to CC_OTHER by \@sanitize: ' ', '\\', '$', '&', '#', '^', '_', '%', '~'
# some of those are already in state's "SPECIALS", so only adding the rest:
StartSemiverbatim(' ', '\\', '%');
my $arg = $gullet->readBalanced(1);
EndSemiverbatim();
# now that we have the semiverbatim tokens, retokenize with the standard catcode scheme,
# this may seem like wasted work, but it avoids very unfortunate error propagation in cases
# where the \index argument was malformed for one reason or another.
#
# the strangeness comes from the original TeX workflow requiring multiple conversion calls,
# alongside a call to the `makeidx` binary, which we don't do in latexml. This parameter type
# emulates one important aspect implied by those steps.
$arg = Tokenize(ToString($arg));
return $arg; },
beforeDigest => sub {
$_[0]->bgroup;
MergeFont(family => 'typewriter'); },
afterDigest => sub {
$_[0]->egroup; },
reversion => sub { (T_BEGIN, Revert($_[0]), T_END); });

# real-world LaTeX \index
DefMacro('\index SanitizedVerbatim', \&process_index_phrases);
# simple \index for internal use in bindings with pre-sanitized arguments
DefMacro('\lx@simple@index Plain', \&process_index_phrases);

Tag('ltx:indexphrase', afterClose => \&addIndexPhraseKey);
Tag('ltx:glossaryphrase', afterClose => \&addIndexPhraseKey);
Expand Down
4 changes: 3 additions & 1 deletion lib/LaTeXML/Package/listings.sty.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,9 @@ DefKeyVal('LST', 'indexstyle', '');
DefMacro('\lst@@indexstyle [Number] Until:\end', sub {
lstSetClassStyle(lstClassName('index', $_[1]), $_[2]); });

DefMacro('\lstindexmacro{}', '\index{{\ttfamily #1}}');
# use the simple plain-argument index,
# otherwise we need to double-check any relevant Semiverbatim cases
DefMacro('\lstindexmacro{}', '\lx@simple@index{{\ttfamily #1}}');

#======================================================================
# 4.13 Column alignment
Expand Down

0 comments on commit c1571cb

Please sign in to comment.