diff --git a/app/__init__.py b/app/__init__.py index c08c4ae0a..5d10966e8 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -252,6 +252,7 @@ def register_blueprint(application): def init_app(app): + @app.before_request def record_request_details(): g.start = monotonic() diff --git a/app/aws/s3.py b/app/aws/s3.py index 9466e6cce..ebdffddd5 100644 --- a/app/aws/s3.py +++ b/app/aws/s3.py @@ -19,6 +19,77 @@ JOBS_CACHE_MISSES = "JOBS_CACHE_MISSES" +def list_s3_objects(): + bucket_name = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"] + access_key = current_app.config["CSV_UPLOAD_BUCKET"]["access_key_id"] + secret_key = current_app.config["CSV_UPLOAD_BUCKET"]["secret_access_key"] + region = current_app.config["CSV_UPLOAD_BUCKET"]["region"] + session = Session( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name=region, + ) + s3 = session.client("s3") + + try: + response = s3.list_objects_v2(Bucket=bucket_name) + while True: + for obj in response.get("Contents", []): + yield obj["Key"] + if "NextContinuationToken" in response: + response = s3.list_objects_v2( + Bucket=bucket_name, + ContinuationToken=response["NextContinuationToken"], + ) + else: + break + except Exception as e: + current_app.logger.error( + f"An error occurred while regenerating cache #notify-admin-1200 {e}" + ) + + +def get_s3_files(): + current_app.logger.info("Regenerate job cache #notify-admin-1200") + bucket_name = current_app.config["CSV_UPLOAD_BUCKET"]["bucket"] + access_key = current_app.config["CSV_UPLOAD_BUCKET"]["access_key_id"] + secret_key = current_app.config["CSV_UPLOAD_BUCKET"]["secret_access_key"] + region = current_app.config["CSV_UPLOAD_BUCKET"]["region"] + session = Session( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + region_name=region, + ) + objects = list_s3_objects() + + s3res = session.resource("s3", config=AWS_CLIENT_CONFIG) + current_app.logger.info( + f"JOBS cache length before regen: {len(JOBS)} #notify-admin-1200" + ) + for object in objects: + # We put our csv files in the format "service-{service_id}-notify/{job_id}" + try: + object_arr = object.split("/") + job_id = object_arr[1] # get the job_id + job_id = job_id.replace(".csv", "") # we just want the job_id + if JOBS.get(job_id) is None: + object = ( + s3res.Object(bucket_name, object) + .get()["Body"] + .read() + .decode("utf-8") + ) + if "phone number" in object.lower(): + JOBS[job_id] = object + except LookupError as le: + # perhaps our key is not formatted as we expected. If so skip it. + current_app.logger.error(f"LookupError {le} #notify-admin-1200") + + current_app.logger.info( + f"JOBS cache length after regen: {len(JOBS)} #notify-admin-1200" + ) + + def get_s3_file(bucket_name, file_location, access_key, secret_key, region): s3_file = get_s3_object(bucket_name, file_location, access_key, secret_key, region) return s3_file.get()["Body"].read().decode("utf-8") diff --git a/app/celery/provider_tasks.py b/app/celery/provider_tasks.py index 7d32c9326..b79902ced 100644 --- a/app/celery/provider_tasks.py +++ b/app/celery/provider_tasks.py @@ -2,6 +2,7 @@ import os from datetime import timedelta +from botocore.exceptions import ClientError from flask import current_app from sqlalchemy.orm.exc import NoResultFound @@ -22,7 +23,7 @@ # This is the amount of time to wait after sending an sms message before we check the aws logs and look for delivery # receipts -DELIVERY_RECEIPT_DELAY_IN_SECONDS = 120 +DELIVERY_RECEIPT_DELAY_IN_SECONDS = 30 @notify_celery.task( @@ -62,6 +63,21 @@ def check_sms_delivery_receipt(self, message_id, notification_id, sent_at): provider_response=provider_response, ) raise self.retry(exc=ntfe) + except ClientError as err: + # Probably a ThrottlingException but could be something else + error_code = err.response["Error"]["Code"] + provider_response = ( + f"{error_code} while checking sms receipt -- still looking" + ) + status = "pending" + carrier = "" + update_notification_status_by_id( + notification_id, + status, + carrier=carrier, + provider_response=provider_response, + ) + raise self.retry(exc=err) if status == "success": status = NotificationStatus.DELIVERED diff --git a/app/celery/tasks.py b/app/celery/tasks.py index f0d036549..e6ed717e7 100644 --- a/app/celery/tasks.py +++ b/app/celery/tasks.py @@ -441,6 +441,11 @@ def send_inbound_sms_to_service(self, inbound_sms_id, service_id): ) +@notify_celery.task(name="regenerate-job-cache") +def regenerate_job_cache(): + s3.get_s3_files() + + @notify_celery.task(name="process-incomplete-jobs") def process_incomplete_jobs(job_ids): jobs = [dao_get_job_by_id(job_id) for job_id in job_ids] diff --git a/app/config.py b/app/config.py index 8d913bdd8..65ef6b2d3 100644 --- a/app/config.py +++ b/app/config.py @@ -249,6 +249,11 @@ class Config(object): "schedule": crontab(hour=6, minute=0), "options": {"queue": QueueNames.PERIODIC}, }, + "regenerate-job-cache": { + "task": "regenerate-job-cache", + "schedule": crontab(minute="*/30"), + "options": {"queue": QueueNames.PERIODIC}, + }, "cleanup-unfinished-jobs": { "task": "cleanup-unfinished-jobs", "schedule": crontab(hour=4, minute=5), diff --git a/app/service/rest.py b/app/service/rest.py index 7d98cd91c..687cf5a23 100644 --- a/app/service/rest.py +++ b/app/service/rest.py @@ -201,6 +201,8 @@ def get_service_by_id(service_id): fetched = dao_fetch_service_by_id(service_id) data = service_schema.dump(fetched) + + current_app.logger.info(f'>> SERVICE: {data["id"]}; {data}') return jsonify(data=data) diff --git a/deploy-config/production.yml b/deploy-config/production.yml index fc5be0451..fc813fe0b 100644 --- a/deploy-config/production.yml +++ b/deploy-config/production.yml @@ -1,8 +1,8 @@ env: production web_instances: 2 -web_memory: 2G +web_memory: 4G worker_instances: 1 -worker_memory: 2G +worker_memory: 4G scheduler_memory: 256M public_api_route: notify-api.app.cloud.gov admin_base_url: https://beta.notify.gov diff --git a/docs/all.md b/docs/all.md index 0c9aa6af7..5f9be2a30 100644 --- a/docs/all.md +++ b/docs/all.md @@ -60,9 +60,13 @@ - [Data Storage Policies \& Procedures](#data-storage-policies--procedures) - [Potential PII Locations](#potential-pii-locations) - [Data Retention Policy](#data-retention-policy) -- [Debug messages not being sent](#debug-messages-not-being-sent) - - [Getting the file location and tracing what happens](#getting-the-file-location-and-tracing-what-happens) - - [Viewing the csv file](#viewing-the-csv-file) +- [Troubleshooting](#troubleshooting) + - [Debug messages not being sent](#debug-messages-not-being-sent) + - [Getting the file location and tracing what happens](#getting-the-file-location-and-tracing-what-happens) + - [Viewing the csv file](#viewing-the-csv-file) + - [Deployment / app push problems](#deployment--app-push-problems) + - [Routes cannot be mapped to destinations in different spaces](#routes-cannot-be-mapped-to-destinations-in-different-spaces) + - [API request failed](#api-request-failed) # Infrastructure overview @@ -449,7 +453,10 @@ If this is the first time you have used Terraform in this repository, you will f ``` cf push --vars-file deploy-config/sandbox.yml --var NEW_RELIC_LICENSE_KEY=$NEW_RELIC_LICENSE_KEY ``` - + The real `push` command has more var arguments than the single one above. Get their values from a Notify team member. +1. Visit the URL of the app you just deployed + * Admin https://notify-sandbox.app.cloud.gov/ + * API https://notify-api-sandbox.app.cloud.gov/ # Database management @@ -1327,11 +1334,12 @@ Seven (7) days by default. Each service can be set with a custom policy via `Ser Data cleanup is controlled by several tasks in the `nightly_tasks.py` file, kicked off by Celery Beat. +# Troubleshooting -# Debug messages not being sent +## Debug messages not being sent -## Getting the file location and tracing what happens +### Getting the file location and tracing what happens Ask the user to provide the csv file name. Either the csv file they uploaded, or the one that is autogenerated when they do a one-off send and is visible in the UI @@ -1340,7 +1348,7 @@ Starting with the admin logs, search for this file name. When you find it, the In the api logs, search by job_id. Either you will see evidence of the job failing and retrying over and over (in which case search for a stack trace using timestamp), or you will ultimately get to a log line that links the job_id to a message_id. In this case, now search by message_id. You should be able to find the actual result from AWS, either success or failure, with hopefully some helpful info. -## Viewing the csv file +### Viewing the csv file If you need to view the questionable csv file on production, run the following command: @@ -1355,7 +1363,7 @@ locally, just do: poetry run flask command download-csv-file-by-name -f ``` -## Debug steps +### Debug steps 1. Either send a message and capture the csv file name, or get a csv file name from a user 2. Using the log tool at logs.fr.cloud.gov, use filters to limit what you're searching on (cf.app is 'notify-admin-production' for example) and then search with the csv file name in double quotes over the relevant time period (last 5 minutes if you just sent a message, or else whatever time the user sent at) @@ -1363,3 +1371,28 @@ poetry run flask command download-csv-file-by-name -f =1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.144" +version = "1.34.148" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.144-py3-none-any.whl", hash = "sha256:a2cf26e1bf10d5917a2285e50257bc44e94a1d16574f282f3274f7a5d8d1f08b"}, - {file = "botocore-1.34.144.tar.gz", hash = "sha256:4215db28d25309d59c99507f1f77df9089e5bebbad35f6e19c7c44ec5383a3e8"}, + {file = "botocore-1.34.148-py3-none-any.whl", hash = "sha256:9e09428b0bc4d0c1cf5e368dd6ab18eabf6047304060f8b5dd8391677cfe00e6"}, + {file = "botocore-1.34.148.tar.gz", hash = "sha256:258dd95570b43db9fa21cce5426eabaea5867e3a61224157650448b5019d1bbd"}, ] [package.dependencies] @@ -485,13 +485,13 @@ redis = ["redis (>=2.10.5)"] [[package]] name = "cachetools" -version = "5.3.3" +version = "5.4.0" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, - {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, + {file = "cachetools-5.4.0-py3-none-any.whl", hash = "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474"}, + {file = "cachetools-5.4.0.tar.gz", hash = "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827"}, ] [[package]] @@ -1219,13 +1219,13 @@ dev = ["black", "build", "commitizen", "isort", "pip-tools", "pre-commit", "twin [[package]] name = "exceptiongroup" -version = "1.2.1" +version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] [package.extras] @@ -3299,20 +3299,20 @@ files = [ [[package]] name = "pytest" -version = "8.2.2" +version = "8.3.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, - {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, + {file = "pytest-8.3.1-py3-none-any.whl", hash = "sha256:e9600ccf4f563976e2c99fa02c7624ab938296551f280835ee6516df8bc4ae8c"}, + {file = "pytest-8.3.1.tar.gz", hash = "sha256:7e8e5c5abd6e93cb1cc151f23e57adc31fcf8cfd2a3ff2da63e23f732de35db6"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.5,<2.0" +pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -4747,4 +4747,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.12.2" -content-hash = "e7bab493ea80525f7809ac86d3dc93264595dca04e2b51517b41dcf0407146ec" +content-hash = "d6bdb4ac6902b3afbd1699801fe56e2d7c307c925364239f8613691446f1c46f" diff --git a/pyproject.toml b/pyproject.toml index 9bdc87f14..0649c713c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,9 +11,9 @@ python = "^3.12.2" alembic = "==1.13.2" amqp = "==5.2.0" beautifulsoup4 = "==4.12.3" -boto3 = "^1.34.143" -botocore = "^1.34.144" -cachetools = "==5.3.3" +boto3 = "^1.34.144" +botocore = "^1.34.148" +cachetools = "==5.4.0" celery = {version = "==5.4.0", extras = ["redis"]} certifi = ">=2022.12.7" cffi = "==1.16.0" @@ -85,7 +85,7 @@ awscli = "^1.29.74" bandit = "*" black = "^24.3.0" cloudfoundry-client = "*" -exceptiongroup = "==1.2.1" +exceptiongroup = "==1.2.2" flake8 = "^7.1.0" flake8-bugbear = "^24.1.17" freezegun = "^1.5.1" @@ -95,7 +95,7 @@ jinja2-cli = {version = "==0.8.2", extras = ["yaml"]} moto = "==5.0.11" pip-audit = "*" pre-commit = "^3.7.1" -pytest = "^8.2.2" +pytest = "^8.3.1" pytest-env = "^1.1.3" pytest-mock = "^3.14.0" pytest-cov = "^5.0.0" diff --git a/runtime.txt b/runtime.txt index 4ddc7cd66..64f28603a 100644 --- a/runtime.txt +++ b/runtime.txt @@ -1 +1 @@ -python-3.12.3 +python-3.12.x diff --git a/terraform/README.md b/terraform/README.md index f3e619137..bbb63424a 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -134,6 +134,8 @@ These steps assume shared [Terraform state credentials](#terraform-state-credent This command *will deploy your changes* to the cloud. This is a healthy part of testing your code in the sandbox, or if you are creating a new environment (a new directory). **Do not** apply in environments that people are relying upon. + If you need to go on to deploy application code on top of the resources you just instantiated, you will [use `cf push`](https://github.com/GSA/notifications-api/blob/main/docs/all.md#deploying-to-the-sandbox) + 1. Remove the space deployer service instance when you are done manually running Terraform. ```bash # and have the same values as used above. diff --git a/terraform/development/main.tf b/terraform/development/main.tf index 1f45b2b6a..0d8920c6f 100644 --- a/terraform/development/main.tf +++ b/terraform/development/main.tf @@ -6,12 +6,11 @@ locals { } module "csv_upload_bucket" { - source = "github.com/18f/terraform-cloudgov//s3?ref=v0.7.1" + source = "github.com/GSA-TTS/terraform-cloudgov//s3?ref=v1.0.0" - cf_org_name = local.cf_org_name - cf_space_name = local.cf_space_name - recursive_delete = local.recursive_delete - name = "${var.username}-csv-upload-bucket" + cf_org_name = local.cf_org_name + cf_space_name = local.cf_space_name + name = "${var.username}-csv-upload-bucket" } resource "cloudfoundry_service_key" "csv_key" { name = local.key_name