-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvertXls.py
159 lines (122 loc) · 4.69 KB
/
convertXls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# -*- coding: utf-8 -*-
#!/usr/bin/env python3
from __future__ import absolute_import, division, print_function
from copy import copy
import os
import re
import sys
# Read and process MS Excel documents from on script into another
# Unicode characters.
# TODO: define data ranges (worksheet and cell ranges) on command line,
# then apply changes to only those cells.
# TODO: Create and add named style for the newly included font.
# https://openpyxl.readthedocs.io/en/default/tutorial.html
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.styles import NamedStyle, PatternFill, Border, Side, Alignment, Protection, Font
import convertUtil
import adlamConversion
class cellData():
def __init__(self):
self.text = ''
class convertWorkbook():
def __init__(self, input_path, output_dir, converter, debug=False, cell_ranges=None):
self.oldFonts = []
self.input_path = input_path
self.output_dir = output_dir
self.converter = converter
self.old_fonts = converter.oldFonts # List of font names
self.unicode_font = converter.unicodeFont
self.cell_ranges = cell_ranges
self.workbook = None
if self.input_path:
self.workbook = load_workbook(filename=self.input_path)
# The sheets
for sheet in self.workbook:
print(sheet.title)
def process(self):
print('process path = %s, output_dir = %s\n' % (
self.input_path, self.output_dir))
# Get the first sheet
sheets = self.workbook.sheetnames
ws = self.workbook[sheets[0]]
# Get the output font and create a new named style for it
new_style = NamedStyle('ConvertedFont')
new_font = Font()
new_font.name = self.converter.defaultOutputFont
new_style.font = new_font
self.workbook.add_named_style(new_style)
self.converter.setScriptIndex(adlamConversion.LATIN2ADLAM)
# Get the range to convert.
# for cell in self.cells_to_convert:
# latin = ws[cell].value
# adlam = self.converter.convertText(latin, fontIndex=adlamConversion.LATIN2ADLAM)
# ws[cell] = adlam
# Special case
cells = []
for range1 in self.cell_ranges:
this_range = ws[range1[0]:range1[1]]
cells.extend(this_range)
for cell in cells:
the_cell = cell[0]
latin = the_cell.value
if not latin:
# Nothing there to process
continue
adlam = self.converter.convertText(latin, fontIndex=adlamConversion.LATIN2ADLAM)
the_cell.value = adlam
# TODO: Set the font
old_font = the_cell.font
new_font = copy(old_font)
new_font.name = self.converter.defaultOutputFont
the_cell.style = new_style
the_cell.font = new_font
def processText(self):
# Get the converter
# For each item in the range specified, check the font information
# Convert contents if needed
# For each sheet, create copy
# For each cell, convert the cell(s) as neeed
# Save the new .xlsx file.
return
def info(self):
# Get the workbook
# chartsheets: list of all of them
self.workbook = Woorbook
#
return
def parse_cell_ranges(cell_string):
# Get list of cell ranges from comma separated list of cells and ranges
# also, ignore white space
split_ranges_pattern = re.compile(r'[\s\,\;]+')
rough_ranges = split_ranges_pattern.split(cell_string)
ranges = []
for g in rough_ranges:
if g.find(':') >= 0:
gs = g.split(':')
ranges.append(gs)
else:
ranges.append([g, g])
return ranges
# For standalone and testing.
def main(argv):
global debug_output
args = convertUtil.parseArgs()
# Ranges of cells, e.g., "c2:c100, d2, e3:g9]"
# Separated by comma, semicolon, or space
cell_ranges = parse_cell_ranges(args.cells)
debug_output = True
paths_to_doc = args.filenames
print('ARGS = %s' % args)
for path in paths_to_doc:
extension = os.path.splitext(path)[-1]
converter = adlamConversion.AdlamConverter()
if extension == '.xlsx':
out_file_name = converter.get_outfile_name(path) # Temporary
processor = convertWorkbook(path, out_file_name, converter, debug_output, cell_ranges=cell_ranges)
processor.process() # Do the requested conversion
processor.workbook.save(out_file_name)
else:
print('!!! Not processing file %s !' % path)
if __name__ == "__main__":
main(sys.argv)