Skip to content

Commit

Permalink
fix(localize): render ICU placeholders in extracted translation files (
Browse files Browse the repository at this point in the history
…angular#38484)

Previously placeholders were only rendered for dynamic interpolation
expressons in `$localize` tagged strings. But there are also potentially
dynamic values in ICU expressions too, so we need to render these as
placeholders when extracting i18n messages into translation files.

PR Close angular#38484
  • Loading branch information
petebacondarwin authored and atscott committed Aug 17, 2020
1 parent be96510 commit 81c3e80
Show file tree
Hide file tree
Showing 10 changed files with 437 additions and 32 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/

/**
* Split the given `text` into an array of "static strings" and ICU "placeholder names".
*
* This is required because ICU expressions in `$localize` tagged messages may contain "dynamic"
* piece (e.g. interpolations or element markers). These markers need to be translated to
* placeholders in extracted translation files. So we must parse ICUs to identify them and separate
* them out so that the translation serializers can render them appropriately.
*
* An example of an ICU with interpolations:
*
* ```
* {VAR_PLURAL, plural, one {{INTERPOLATION}} other {{INTERPOLATION_1} post}}
* ```
*
* In this ICU, `INTERPOLATION` and `INTERPOLATION_1` are actually placeholders that will be
* replaced with dynamic content at runtime.
*
* Such placeholders are identifiable as text wrapped in curly braces, within an ICU case
* expression.
*
* To complicate matters, it is possible for ICUs to be nested indefinitely within each other. In
* such cases, the nested ICU expression appears enclosed in a set of curly braces in the same way
* as a placeholder. The nested ICU expressions can be differentiated from placeholders as they
* contain a comma `,`, which separates the ICU value from the ICU type.
*
* Furthermore, nested ICUs can have placeholders of their own, which need to be extracted.
*
* An example of a nested ICU containing its own placeholders:
*
* ```
* {VAR_SELECT_1, select,
* invoice {Invoice for {INTERPOLATION}}
* payment {{VAR_SELECT, select,
* processor {Payment gateway}
* other {{INTERPOLATION_1}}
* }}
* ```
*
* @param text Text to be broken.
* @returns an array of strings, where
* - even values are static strings (e.g. 0, 2, 4, etc)
* - odd values are placeholder names (e.g. 1, 3, 5, etc)
*/
export function extractIcuPlaceholders(text: string): string[] {
const state = new StateStack();
const pieces = new IcuPieces();
const braces = /[{}]/g;

let lastPos = 0;
let match: RegExpMatchArray|null;
while (match = braces.exec(text)) {
if (match[0] == '{') {
state.enterBlock();
} else {
// We must have hit a `}`
state.leaveBlock();
}

if (state.getCurrent() === 'placeholder') {
const name = tryParsePlaceholder(text, braces.lastIndex);
if (name) {
// We found a placeholder so store it in the pieces;
// store the current static text (minus the opening curly brace);
// skip the closing brace and leave the placeholder block.
pieces.addText(text.substring(lastPos, braces.lastIndex - 1));
pieces.addPlaceholder(name);
braces.lastIndex += name.length + 1;
state.leaveBlock();
} else {
// This is not a placeholder, so it must be a nested ICU;
// store the current static text (including the opening curly brace).
pieces.addText(text.substring(lastPos, braces.lastIndex));
state.nestedIcu();
}
} else {
pieces.addText(text.substring(lastPos, braces.lastIndex));
}
lastPos = braces.lastIndex;
}

// Capture the last piece of text after the ICUs (if any).
pieces.addText(text.substring(lastPos));
return pieces.toArray();
}

/**
* A helper class to store the pieces ("static text" or "placeholder name") in an ICU.
*/
class IcuPieces {
private pieces: string[] = [''];

/**
* Add the given `text` to the current "static text" piece.
*
* Sequential calls to `addText()` will append to the current text piece.
*/
addText(text: string): void {
this.pieces[this.pieces.length - 1] += text;
}

/**
* Add the given placeholder `name` to the stored pieces.
*/
addPlaceholder(name: string): void {
this.pieces.push(name);
this.pieces.push('');
}

/**
* Return the stored pieces as an array of strings.
*
* Even values are static strings (e.g. 0, 2, 4, etc)
* Odd values are placeholder names (e.g. 1, 3, 5, etc)
*/
toArray(): string[] {
return this.pieces;
}
}

/**
* A helper class to track the current state of parsing the strings for ICU placeholders.
*
* State changes happen when we enter or leave a curly brace block.
* Since ICUs can be nested the state is stored as a stack.
*/
class StateStack {
private stack: ParserState[] = [];

/**
* Update the state upon entering a block.
*
* The new state is computed from the current state and added to the stack.
*/
enterBlock(): void {
const current = this.getCurrent();
switch (current) {
case 'icu':
this.stack.push('case');
break;
case 'case':
this.stack.push('placeholder');
break;
case 'placeholder':
this.stack.push('case');
break;
default:
this.stack.push('icu');
break;
}
}

/**
* Update the state upon leaving a block.
*
* The previous state is popped off the stack.
*/
leaveBlock(): ParserState {
return this.stack.pop();
}

/**
* Update the state upon arriving at a nested ICU.
*
* In this case, the current state of "placeholder" is incorrect, so this is popped off and the
* correct "icu" state is stored.
*/
nestedIcu(): void {
const current = this.stack.pop();
assert(current === 'placeholder', 'A nested ICU must replace a placeholder but got ' + current);
this.stack.push('icu');
}

/**
* Get the current (most recent) state from the stack.
*/
getCurrent() {
return this.stack[this.stack.length - 1];
}
}
type ParserState = 'icu'|'case'|'placeholder'|undefined;

/**
* Attempt to parse a simple placeholder name from a curly braced block.
*
* If the block contains a comma `,` then it cannot be a placeholder - and is probably a nest ICU
* instead.
*
* @param text the whole string that is being parsed.
* @param start the index of the character in the `text` string where this placeholder may start.
* @returns the placeholder name or `null` if it is not a placeholder.
*/
function tryParsePlaceholder(text: string, start: number): string|null {
for (let i = start; i < text.length; i++) {
if (text[i] === ',') {
break;
}
if (text[i] === '}') {
return text.substring(start, i);
}
}
return null;
}

function assert(test: boolean, message: string): void {
if (!test) {
throw new Error('Assertion failure: ' + message);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';

import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';

Expand Down Expand Up @@ -63,11 +64,22 @@ export class Xliff1TranslationSerializer implements TranslationSerializer {
}

private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
xml.text(message.messageParts[0]);
for (let i = 1; i < message.messageParts.length; i++) {
xml.startTag('x', {id: message.placeholderNames[i - 1]}, {selfClosing: true});
xml.text(message.messageParts[i]);
const length = message.messageParts.length - 1;
for (let i = 0; i < length; i++) {
this.serializeTextPart(xml, message.messageParts[i]);
xml.startTag('x', {id: message.placeholderNames[i]}, {selfClosing: true});
}
this.serializeTextPart(xml, message.messageParts[length]);
}

private serializeTextPart(xml: XmlFile, text: string): void {
const pieces = extractIcuPlaceholders(text);
const length = pieces.length - 1;
for (let i = 0; i < length; i += 2) {
xml.text(pieces[i]);
xml.startTag('x', {id: pieces[i + 1]}, {selfClosing: true});
}
xml.text(pieces[length]);
}

private serializeNote(xml: XmlFile, name: string, value: string): void {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage} from '@angular/localize';

import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';

Expand All @@ -22,6 +23,7 @@ const MAX_LEGACY_XLIFF_2_MESSAGE_LENGTH = 20;
* @see Xliff2TranslationParser
*/
export class Xliff2TranslationSerializer implements TranslationSerializer {
private currentPlaceholderId = 0;
constructor(
private sourceLocale: string, private basePath: AbsoluteFsPath,
private useLegacyIds: boolean) {}
Expand Down Expand Up @@ -74,21 +76,38 @@ export class Xliff2TranslationSerializer implements TranslationSerializer {
}

private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
xml.text(message.messageParts[0]);
for (let i = 1; i < message.messageParts.length; i++) {
const placeholderName = message.placeholderNames[i - 1];
if (placeholderName.startsWith('START_')) {
xml.startTag('pc', {
id: `${i}`,
equivStart: placeholderName,
equivEnd: placeholderName.replace(/^START/, 'CLOSE')
});
} else if (placeholderName.startsWith('CLOSE_')) {
xml.endTag('pc');
} else {
xml.startTag('ph', {id: `${i}`, equiv: placeholderName}, {selfClosing: true});
}
xml.text(message.messageParts[i]);
this.currentPlaceholderId = 0;
const length = message.messageParts.length - 1;
for (let i = 0; i < length; i++) {
this.serializeTextPart(xml, message.messageParts[i]);
this.serializePlaceholder(xml, message.placeholderNames[i]);
}
this.serializeTextPart(xml, message.messageParts[length]);
}

private serializeTextPart(xml: XmlFile, text: string): void {
const pieces = extractIcuPlaceholders(text);
const length = pieces.length - 1;
for (let i = 0; i < length; i += 2) {
xml.text(pieces[i]);
this.serializePlaceholder(xml, pieces[i + 1]);
}
xml.text(pieces[length]);
}

private serializePlaceholder(xml: XmlFile, placeholderName: string): void {
if (placeholderName.startsWith('START_')) {
xml.startTag('pc', {
id: `${this.currentPlaceholderId++}`,
equivStart: placeholderName,
equivEnd: placeholderName.replace(/^START/, 'CLOSE')
});
} else if (placeholderName.startsWith('CLOSE_')) {
xml.endTag('pc');
} else {
xml.startTag(
'ph', {id: `${this.currentPlaceholderId++}`, equiv: placeholderName},
{selfClosing: true});
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';

import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';

Expand Down Expand Up @@ -77,11 +78,22 @@ export class XmbTranslationSerializer implements TranslationSerializer {
}

private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
xml.text(message.messageParts[0]);
for (let i = 1; i < message.messageParts.length; i++) {
xml.startTag('ph', {name: message.placeholderNames[i - 1]}, {selfClosing: true});
xml.text(message.messageParts[i]);
const length = message.messageParts.length - 1;
for (let i = 0; i < length; i++) {
this.serializeTextPart(xml, message.messageParts[i]);
xml.startTag('ph', {name: message.placeholderNames[i]}, {selfClosing: true});
}
this.serializeTextPart(xml, message.messageParts[length]);
}

private serializeTextPart(xml: XmlFile, text: string): void {
const pieces = extractIcuPlaceholders(text);
const length = pieces.length - 1;
for (let i = 0; i < length; i += 2) {
xml.text(pieces[i]);
xml.startTag('ph', {name: pieces[i + 1]}, {selfClosing: true});
}
xml.text(pieces[length]);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,12 @@ runInEachFileSystem(() => {
` <file>`,
` <unit id="3291030485717846467">`,
` <segment>`,
` <source>Hello, <ph id="1" equiv="PH"/>!</source>`,
` <source>Hello, <ph id="0" equiv="PH"/>!</source>`,
` </segment>`,
` </unit>`,
` <unit id="8669027859022295761">`,
` <segment>`,
` <source>try<ph id="1" equiv="PH"/>me</source>`,
` <source>try<ph id="0" equiv="PH"/>me</source>`,
` </segment>`,
` </unit>`,
` </file>`,
Expand Down
Loading

0 comments on commit 81c3e80

Please sign in to comment.