-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathlcNrD.py
256 lines (211 loc) · 9.04 KB
/
lcNrD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import argparse
import re
import random as r
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=False)
parser.add_argument('-v', '--version', action='version',
version='%(prog)s 1.6', help="Show program's version number and exit.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS,
help='** = required')
parser.add_argument(
"-f", "--file", type=str,
help='** this is the file you\'re targeting to get the lcNrD treatment')
parser.add_argument("-o", "--out", type=str,
help='this is the file you want the lcNrD file to output as. Default = same name + _LcNrD')
# store true or false so we can just be like 'if args.lowercase:'
# so use true or false values instead of comparing strings
parser.add_argument('-l', "--lowercase", action='store_true',
help='add -l to set lowercase to true')
parser.add_argument('-d', "--duplicates", action='store_true',
help='add -d to remove duplicate elements')
parser.add_argument('-s', "--shuffle", action='store_true',
help='add -s to shuffle your list')
parser.add_argument('-rf', "--replace_file", action='store_true',
help='add -rf to replace the original file after lcnrd operation')
parser.add_argument('-bl', "--blank_lines", action='store_true',
help='add -bl to remove all blank lines')
parser.add_argument('-cmi', "--character_min", type=int, default=0,
help='limits the minimal amount of chacters required')
parser.add_argument('-cma', "--character_max", type=int,
help='limits the maximum amount of chacters required')
parser.add_argument('-kr', "--keyword_removal", type=str,
help='add a -kr to remove lines with the removal keyword, use *.txt or whatever to point to a list of kr')
parser.add_argument('-kk', "--keyword_keep", type=str,
help='add a -kk to keep only the lines with the keep keyword, use *.txt or whatever to point to a list of kk')
parser.add_argument('-kdb', "--keyword_delete_before", type=str,
help='add a -kdb to delete all text before the delete before keyword')
parser.add_argument('-kda', "--keyword_delete_after", type=str,
help='add a -kda to delete all text after the delete after keyword')
parser.add_argument('-kae', "--keyword_add_end", type=str,
help='add a -kae to add some text to the end of every line')
args = parser.parse_args()
print('Grabbing file contents...')
# open the target file & grab its contents. Using `with` like this automatically
# closes the file for us when we leave the indentation.
with open(args.file, "r") as fp:
text = fp.read()
# let's create an array of the lines
lines = text.split("\n")
# Remove blank lines
if args.blank_lines:
print('Removing blank lines...')
temp = ''
for i in lines:
if len(i) != 0:
temp+='\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
#
if args.keyword_delete_before != None:
print('removing text before delete keyword...')
temp = ''
# loop through all elements in the lines array
for i in lines:
#
new = i.split(args.keyword_delete_before)
if len(new)>1:
t = new[1:]
ret = ''
for a in t:
ret+=a+args.keyword_delete_before
else:
ret = i
temp+='\n'+ret
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
#
if args.keyword_delete_after != None:
print('removing text after delete keyword...')
temp = ''
# loop through all elements in the lines array
for i in lines:
#
new = i.split(args.keyword_delete_after)
if len(new)>1:
t = new[:1]
ret = ''
for a in t:
ret+=a+args.keyword_delete_after
else:
ret = i
temp+='\n'+ret
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
# Remove lines that are UNDER min character requirements
if args.character_min > 0:
print('Removing lines under min character requirements...')
temp = ''
for i in lines:
if len(i) > args.character_min-1:
temp+='\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
# Remove lines that are OVER max character requirements
if args.character_max != None:
print('Removing lines over max character requirements...')
temp = ''
for i in lines:
if len(i) < args.character_max+1:
temp+='\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
# this checks if there is a removal keyword & performs this operation if so
if args.keyword_removal != None:
print('removing lines with removal keyword...')
temp = ''
#
if ("*." in args.keyword_removal):
print("file used!")
print('Reading filter keywords...')
# open the keyword keep file & save a reference to its contents
with open(args.keyword_removal.replace('*',''), "r") as fp:
filt = fp.read().split('\n')
keeper = []
print('filter keywords from file...')
# now we break up the lines, break up the filter keywords, then compare each word
for i in lines:
for j in (i.split()):
for k in filt:
if j != k and i not in keeper:
keeper.append(i)
temp += '\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
else:
# loop through all elements in the lines array
for i in lines:
# if not containing the removal keyword, then add it to temp
if args.keyword_removal not in i:
temp+='\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
# this checks if there is a removal keyword & performs this operation if so
if args.keyword_keep != None:
print('removing lines without keep keyword...')
temp = ''
#
if ("*." in args.keyword_keep):
print("file used!")
print('Reading filter keywords...')
# open the keyword keep file & save a reference to its contents
with open(args.keyword_keep.replace('*',''), "r") as fp:
filt = fp.read().split('\n')
keeper = []
print('filter keywords from file...')
# now we break up the lines, break up the filter keywords, then compare each word
for i in lines:
for j in (i.split()):
for k in filt:
if j == k and i not in keeper:
keeper.append(i)
temp += '\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
else:
print("keyword used!")
# loop through all elements in the lines array
for i in lines:
# if not containing the removal keyword, then add it to temp
if args.keyword_keep in i:
temp+='\n'+i
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
# convert to lowercase while we have the initial string. so only one function
# call and no loops needed.
if args.lowercase:
print('lowercasing contents...')
lines = [x.lower() for x in lines]
# remove duplicates by making the list a `set` which automatically makes
# everything unique. Again no loops, just 1 function call.
if args.duplicates:
print('removing duplicates...')
lines = set(lines)
#
if args.keyword_add_end != None:
print('adding keyword to end...')
temp = ''
for i in lines:
temp+='\n'+i+args.keyword_add_end
# lines now = temp split, minus the first element which is an empty element
lines = temp.split('\n')[1:]
# set the name of the new converted file
if not args.out:
if args.replace_file:
print('replacing orginal file...')
args.out = args.file
else:
# ok this is a little bit obscure but its reverse splitting an string into
# a list once so 'dir/file.txt' -> ['dir/file', 'txt']. then the '*' before
# it when passed to format tells format to read the list as mulptiple args.
args.out = "{}_lcNrD.{}".format(*args.file.rsplit(".", 1))
# shuffle the list of content if -s is added
if args.shuffle:
print('shuffling content...')
r.shuffle(lines)
# save the converted file
with open(args.out, "w") as fp:
# strings are immutable in python so every time you add two strings together
# its creating a brand new one. This gets quickly gets slow with lots of text
# so we wanna do this as little as possible. Using join on the list means
# we just make all the string concatenation opertations in one go.
fp.write("\n".join(lines))
print('...& DONE!')