Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup script to search and remove rageshakes from applications based on a time #61

Merged
merged 10 commits into from
Jan 16, 2023
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.idea
/.idea
/bin
/bugs
/pkg
Expand Down
2 changes: 1 addition & 1 deletion changelog.d/61.feature
Original file line number Diff line number Diff line change
@@ -1 +1 @@
zero-dependency python script to cleanup old rageshakes.
Add a zero-dependency python script to cleanup old rageshakes.
140 changes: 86 additions & 54 deletions scripts/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import glob
import gzip
import os
import sys
from datetime import datetime, timedelta
from typing import Dict, Iterable, List
from typing import Dict, Iterable, List, Set


# Cleanup for rageshake server output files
#
Expand All @@ -19,50 +21,80 @@ class Cleanup(object):
def __init__(
michaelkaye marked this conversation as resolved.
Show resolved Hide resolved
self,
limits: Dict[str, int],
days_to_check: List[int],
days_to_check: Iterable[int],
dry_run: bool,
root_path: str,
mxids_to_exclude: List[str],
):
self.dry_run = dry_run
self.days_to_check = days_to_check
self.limits = limits
self.root_path = root_path
self._limits = limits
self._days_to_check = days_to_check
self._dry_run = dry_run
self._root_path = root_path
self._mxids_to_exclude = mxids_to_exclude
# Count of files we deleted or would delete (dry-run)
self.deleted = 0
# Count of files we checked
self.checked = 0
# Sum of bytes in files we deleted or would delete (dry-run)
self.disk_saved = 0
self.mxids_to_exclude = mxids_to_exclude
self.excluded_by_user = {
mxids_to_exclude[i]: 0 for i in range(len(mxids_to_exclude))
}
# History of how many times a given mxid saved a file.
self.excluded_count_by_user = {mxid: 0 for mxid in mxids_to_exclude}

def check_date(self, folder_name: str, applications_to_delete: List[str]) -> None:
def cleanup(self) -> None:
"""
Check for rageshakes to remove according to settings
"""
today = datetime.today()
for days_ago in self._days_to_check:
target = today - timedelta(days=days_ago)
folder_name = target.strftime("%Y-%m-%d")
applications = set()
for name in self._limits.keys():
if self._limits[name] < days_ago:
applications.add(name)
self._check_date(self._root_path + "/" + folder_name, applications)

def _check_date(self, folder_name: str, applications_to_delete: Set[str]) -> None:
"""
Check all rageshakes on a given date (folder)
"""
if len(applications_to_delete) == 0:
print(f"W Not checking {folder_name}, no applications would be removed")
return

# list folder_name for rageshakes:
# foreach:
files = glob.iglob(folder_name + "/[0-9]*")
if not os.path.exists(folder_name):
print(f"W Not checking {folder_name}, not present or not a directory")
return

checked = 0
deleted = 0
for rageshake_name in files:
checked = checked + 1
if self.check_rageshake(rageshake_name, applications_to_delete):
deleted = deleted + 1
with os.scandir(folder_name) as rageshakes:
for rageshake in rageshakes:
rageshake_path = folder_name + os.pathsep + rageshake.name
if rageshake.is_dir():
checked += 1
if self._check_rageshake(rageshake_path, applications_to_delete):
deleted += 1
else:
print(
f"W File in rageshake tree {rageshake_path} is not a directory"
)

print(
f"I Checked {folder_name} for {applications_to_delete}, deleted {deleted}/{checked} rageshakes"
f"I Checked {folder_name} for {applications_to_delete}, "
f"{'would delete' if self._dry_run else 'deleted'} {deleted}/{checked} rageshakes"
)

self.deleted = self.deleted + deleted
self.checked = self.checked + checked
self.deleted += deleted
self.checked += checked
# optionally delete folder if we deleted 100% of rageshakes, but for now it' s fine.

def check_rageshake(
self, rageshake_folder_path: str, applications_to_delete: List[str]
def _check_rageshake(
self, rageshake_folder_path: str, applications_to_delete: Set[str]
) -> bool:
"""
Checks a given rageshake folder, returning True if the rageshake was deleted
michaelkaye marked this conversation as resolved.
Show resolved Hide resolved
"""
try:
app_name = None
mxid = None
Expand All @@ -73,14 +105,12 @@ def check_rageshake(
app_name = parts[1].strip()
if parts[0] == "user_id":
mxid = parts[1].strip()
print(f"app_name {app_name} user_id {mxid}")
if app_name in applications_to_delete:
if mxid in self.mxids_to_exclude:
self.excluded_by_user[mxid] = self.excluded_by_user[mxid] + 1
if mxid in self._mxids_to_exclude:
self.excluded_count_by_user[mxid] += 1
else:
self.delete(rageshake_folder_path)
self._delete(rageshake_folder_path)
return True
return False

except FileNotFoundError as e:
print(
Expand All @@ -89,34 +119,25 @@ def check_rageshake(

return False

def delete(self, rageshake_folder_path: str) -> None:
def _delete(self, rageshake_folder_path: str) -> None:
"""
Delete a given rageshake folder
michaelkaye marked this conversation as resolved.
Show resolved Hide resolved
"""
files = glob.glob(rageshake_folder_path + "/*")
for file in files:
self.disk_saved += os.stat(file).st_size
if self.dry_run:
if self._dry_run:
print(f"I would delete {file}")
else:
print(f"I deleted {file}")
print(f"I deleting {file}")
os.unlink(file)

if self.dry_run:
if self._dry_run:
print(f"I would remove directory {rageshake_folder_path}")
else:
print(f"I removing directory {rageshake_folder_path}")
os.rmdir(rageshake_folder_path)

def cleanup(self) -> None:
today = datetime.today()
for days_ago in self.days_to_check:
target = today - timedelta(days=days_ago)
folder_name = target.strftime("%Y-%m-%d")
applications = []
for name in self.limits.keys():
if self.limits[name] < days_ago:
applications.append(name)
self.check_date(self.root_path + "/" + folder_name, applications)
pass


def main():
parser = argparse.ArgumentParser(description="Cleanup rageshake files on disk")
Expand All @@ -141,8 +162,8 @@ def main():
help="Explicitly supply days in the past to check for deletion, eg '1,2,3,5'",
)
parser.add_argument(
"--exclude-mxids",
dest="exclude_mxids",
"--exclude-mxids-file",
dest="exclude_mxids_file",
type=str,
help="Supply a text file containing one mxid per line to exclude from cleanup. Blank lines and lines starting # are ignored.",
)
Expand All @@ -160,7 +181,16 @@ def main():
args = parser.parse_args()
application_limits: Dict[str, int] = {}
for x in args.limits:
application_limits[x.split(":")[0]] = int(x.split(":")[1])
parts = x.rsplit(":", 1)
try:
if len(parts) < 2:
raise ValueError("missing :")
limit = int(parts[1])
except ValueError as e:
print(f"E Malformed --limits argument: {e}", file=sys.stderr)
sys.exit(1)

application_limits[parts[0]] = limit

days_to_check: Iterable[int] = []
if args.max_days:
Expand All @@ -169,8 +199,8 @@ def main():
days_to_check = map(lambda x: int(x), args.days_to_check.split(","))

mxids_to_exclude = []
if args.exclude_mxids:
with open(args.exclude_mxids) as file:
if args.exclude_mxids_file:
with open(args.exclude_mxids_file) as file:
for lineno, data in enumerate(file):
data = data.strip()
if len(data) == 0:
Expand All @@ -183,20 +213,22 @@ def main():
# mxid
mxids_to_exclude.append(data)
else:
raise Exception(
f"Unable to parse --exclude-mxids file on line {lineno + 1}: {data}"
print(
f"E Unable to parse --exclude-mxids-file on line {lineno + 1}: {data}",
file=sys.stderr,
)
sys.exit(1)

cleanup = Cleanup(
application_limits, days_to_check, args.dry_run, args.path, mxids_to_exclude
)

cleanup.cleanup()
print(
f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes. "
f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup.dry_run}"
f"I Deleted {cleanup.deleted} of {cleanup.checked} rageshakes, "
f"saving {cleanup.disk_saved} bytes. Dry run? {cleanup._dry_run}"
)
print(f"I excluded count by user {cleanup.excluded_by_user}")
print(f"I excluded count by user {cleanup.excluded_count_by_user}")


if __name__ == "__main__":
Expand Down