Skip to content

Commit

Permalink
Improved region merging for Word documents
Browse files Browse the repository at this point in the history
  • Loading branch information
rmraya committed Mar 17, 2024
1 parent f4e401a commit f19b4e2
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 80 deletions.
Binary file modified lib/openxliff.jar
Binary file not shown.
4 changes: 2 additions & 2 deletions src/com/maxprograms/converters/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ private Constants() {

public static final String TOOLID = "OpenXLIFF";
public static final String TOOLNAME = "OpenXLIFF Filters";
public static final String VERSION = "3.19.0";
public static final String BUILD = "20240307_1908";
public static final String VERSION = "3.20.0";
public static final String BUILD = "20240316_1220";

public static final String SUCCESS = "0";
public static final String ERROR = "1";
Expand Down
119 changes: 41 additions & 78 deletions src/com/maxprograms/converters/msoffice/MSOffice2Xliff.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.Vector;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -465,96 +464,60 @@ private static void cleanPara(Element e) {
}
removeProperties(e, "w:lang");
removeProperties(e, "w:noProof");
mergeRegions(e);
e.clone(mergeRegions(e));
}

private static void mergeRegions(Element paragraph) {
int curr = 0;
while (curr < paragraph.getChildren().size()) {
List<Element> children = paragraph.getChildren();
Element currRegion = children.get(curr);
if (!currRegion.getName().equals("w:r")) {
curr++;
continue;
}
if (currRegion.getChild("w:t") == null) {
curr++;
continue;
}
int next = curr + 1;
boolean merge = true;
while (next < paragraph.getChildren().size() && merge) {
Element nextRegion = paragraph.getChildren().get(next);
if (!nextRegion.getName().equals("w:r")) {
merge = false;
private static Element mergeRegions(Element paragraph) {
Element result = new Element(paragraph.getName());
result.setAttributes(paragraph.getAttributes());
List<Element> children = paragraph.getChildren();
for (int i = 0; i < children.size(); i++) {
if (i + 1 < children.size()) {
Element a = children.get(i);
if (!"w:r".equals(a.getName())) {
continue;
}
Element b = children.get(i + 1);
if (!"w:r".equals(b.getName())) {
continue;
}
Element aT = a.getChild("w:t");
if (aT == null) {
continue;
}
Element bT = b.getChild("w:t");
if (bT == null) {
continue;
}
Element br = b.getChild("w:br");
if (br != null) {
continue;
}
if (nextRegion.getChild("w:t") == null) {
merge = false;
Map<String, Element> aProps = buildProps(a);
Map<String, Element> bProps = buildProps(b);
if (!aProps.equals(bProps)) {
continue;
}
Map<String, Element> currProps = buildProps(currRegion);
Map<String, Element> nextProps = buildProps(nextRegion);
if (currProps.size() != nextProps.size()) {
merge = false;
aT.setAttribute("xml:space", "preserve");
if (b.getChild("w:tab") == null) {
aT.addContent(bT.getContent());
} else {
Set<String> keys = currProps.keySet();
Iterator<String> it = keys.iterator();
while (it.hasNext()) {
String key = it.next();
if (!nextProps.containsKey(key)) {
merge = false;
break;
}
if (!currProps.get(key).equals(nextProps.get(key))) {
merge = false;
break;
}
}
if (merge) {
currRegion.getChild("w:t").setAttribute("xml:space", "preserve");
List<Element> content = nextRegion.getChildren();
for (int i = 0; i < content.size(); i++) {
Element e = content.get(i);
if ("w:tab".equals(e.getName()) || "w:t".equals(e.getName())) {
currRegion.addContent(e);
}
List<Element> bContent = b.getChildren();
for (Element e : bContent) {
if ("w:tab".equals(e.getName())) {
aT.addContent("\t");
}
paragraph.removeChild(nextRegion);
}
}
}
curr++;
}

List<Element> regions = paragraph.getChildren("w:r");
for (int i = 0; i < regions.size(); i++) {
Element region = regions.get(i);
List<XMLNode> newContent = new ArrayList<>();
List<XMLNode> oldContent = region.getContent();
Iterator<XMLNode> it = oldContent.iterator();
Element last = null;
while (it.hasNext()) {
XMLNode node = it.next();
if (node.getNodeType() == XMLNode.ELEMENT_NODE) {
Element e = (Element) node;
if (last == null) {
last = e;
newContent.add(node);
} else {
if (last.getName().equals(e.getName())) {
last.addContent(e.getContent());
} else {
newContent.add(e);
last = e;
if ("w:t".equals(e.getName())) {
aT.addContent(bT.getContent());
}
}
} else {
newContent.add(node);
}
children.remove(i + 1);
i--;
}
region.setContent(newContent);
}
result.setChildren(children);
return result;
}

private static Map<String, Element> buildProps(Element region) {
Expand Down

0 comments on commit f19b4e2

Please sign in to comment.