Skip to content

Commit

Permalink
Adds Valkyrie-compatible Fixity Check service and a FileSet button to…
Browse files Browse the repository at this point in the history
… kick it off. (#610)

* Adds Valkyrie-compatible Fixity Check service and a FileSet button to kick it off.

* Rubocop appeasement.

* Adds controller spec.

* Adds spec for Job.
  • Loading branch information
bwatson78 authored Nov 5, 2024
1 parent 1e10399 commit c55c3b9
Show file tree
Hide file tree
Showing 8 changed files with 410 additions and 0 deletions.
37 changes: 37 additions & 0 deletions app/controllers/hyrax/fixity_checks_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true
# [Hyrax-overwrite-v5.0.1]
# Adds redirect on L#23 in create action and points to our custom service
module Hyrax
class FixityChecksController < ApplicationController
before_action :authenticate_user!

# request here with param :file_set_id will trigger a fixity check if
# needed, and respond with a JSON hash that looks something like:
#
# { "file_id" => [
# {
# "checked_uri" => "http://127.0.0.1:8986/rest/test/12/57/9s/28/12579s28n/files/3ff48171-f625-48bb-a73d-b1ba16dde530/fcr:versions/version1",
# "passed" => true,
# "expected_result" => "urn:sha1:03434..."
# "created_at" => "2017-05-16T15:32:50.961Z"
# }
# ]
# }
def create
# render json: fixity_check_service.fixity_check
fixity_check_service.fixity_check
redirect_to main_app.hyrax_file_set_path(params[:file_set_id]), notice: 'Ran fixity check'
end

private

def fixity_check_service
# We are calling `async_jobs: false` to ensure we get a fixity result to
# return even if there are no 'fresh' ones on record. Otherwise, we'd
# have to sometimes return a 'in progress' status for some bytestreams,
# which is a possible future enhancement.
@fixity_check_service ||=
::SelfDeposit::FileSetFixityCheckService.new(params[:file_set_id], async_jobs: false, initiating_user: current_user)
end
end
end
112 changes: 112 additions & 0 deletions app/jobs/fixity_check_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# frozen_string_literal: true
class FixityCheckJob < Hyrax::ApplicationJob
include PreservationEvents
# A Job class that runs a fixity check (using Hyrax.config.fixity_service)
# which contacts fedora and requests a fixity check), and stores the results
# in an ActiveRecord ChecksumAuditLog row. It also prunes old ChecksumAuditLog
# rows after creating a new one, to keep old ones you don't care about from
# filling up your db.
#
# The uri passed in is a fedora URI that fedora can run fixity check on.
# It's normally a version URI like:
# http://localhost:8983/fedora/rest/test/a/b/c/abcxyz/content/fcr:versions/version1
#
# But could theoretically be any URI fedora can fixity check on, like a file uri:
# http://localhost:8983/fedora/rest/test/a/b/c/abcxyz/content
#
# The file_set_id and file_id are used only for logging context in the
# ChecksumAuditLog, and determining what old ChecksumAuditLogs can
# be pruned.
#
# If calling async as a background job, return value is irrelevant, but
# if calling sync with `perform_now`, returns the ChecksumAuditLog
# record recording the check.
#
# @param file_set_id [FileSet] the id for FileSet parent object of URI being checked.
# @param initiating_user [User] the object for the user that kicked off the job.
def perform(file_set_id:, initiating_user:)
event_start = DateTime.current
@file_set = Hyrax.query_service.find_by(id: file_set_id)
run_check.tap do |audit|
result = audit.failed? ? :failure : :success

announce_fixity_check_results(audit, result)
file_set_preservation_event(audit.passed, event_start, initiating_user)
end
end

private

##
# @api private
def run_check
service = Hyrax.config.fixity_service.new(@file_set)
expected_result = service.expected_message_digest

report_to_audit_log(check_results: service.check, uri: service.target.to_s, expected_result:)
rescue Hyrax::Fixity::MissingContentError
report_to_audit_log(check_results: false, uri: service.target.to_s, expected_result:)
end

def announce_fixity_check_results(audit, result)
Hyrax.publisher.publish('file.set.audited', file_set: @file_set, audit_log: audit, result:)

# @todo remove this callback call for Hyrax 4.0.0
process_failure_callback(audit) if should_call_failure_callback(audit)
end

def process_failure_callback(audit)
Hyrax.config.callback.run(:after_fixity_check_failure,
@file_set,
checksum_audit_log: audit,
warn: false)
end

def should_call_failure_callback(audit)
audit.failed? && Hyrax.config.callback.set?(:after_fixity_check_failure)
end

def file_set_preservation_event(log, event_start, initiating_user)
logger = Logger.new(STDOUT)
pulled_file_name = original_file_name
pulled_checksum = original_file_checksum
event = { 'type' => 'Fixity Check', 'start' => event_start, 'software_version' => 'Fedora v6.5.0', 'user' => initiating_user }

if log == true
event['outcome'] = 'Success'
event['details'] = "Fixity intact for file: #{pulled_file_name}: sha1: #{pulled_checksum}"
logger.info "Ran fixity check successfully on #{pulled_file_name}"
else
event['outcome'] = 'Failure'
event['details'] = "Fixity check failed for: #{pulled_file_name}: sha1: #{pulled_checksum}"
logger.error "Fixity check failure: Fixity failed for #{pulled_file_name}"
end
create_preservation_event(@file_set, event)
end

def original_file_name
pulled_original_file_title || pulled_original_file_label || @file_set&.original_file&.original_filename
end

def pulled_original_file_title
@file_set&.original_file&.title&.first
end

def pulled_original_file_label
@file_set&.original_file&.label&.first
end

def original_file_checksum
@file_set&.original_file&.original_checksum&.first
end

def report_to_audit_log(check_results:, uri:, expected_result:)
ChecksumAuditLog.create_and_prune!(
passed: check_results,
file_set_id: @file_set.id.to_s,
checked_uri: uri,
file_id: @file_set.original_file.id.to_s,
expected_result:
)
end
end
88 changes: 88 additions & 0 deletions app/services/self_deposit/file_set_fixity_check_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# frozen_string_literal: true
module SelfDeposit
##
# This class runs fixity checks on a {FileSetBehavior}, potentially on multiple
# files each with multiple versions in the +FileSet+.
#
# The fixity check itself is performed by {FixityCheckJob}, which
# just uses the fedora service to ask for fixity verification.
# The outcome will be some created {ChecksumAuditLog} (ActiveRecord)
# objects, recording the checks and their results.
#
# By default this runs the checks async using +ActiveJob+, so
# returns no useful info -- the checks are still going Use
# {ChecksumAuditLog.latest_for_file_set_id} to retrieve the latest
# machine-readable checks.
#
# But if you initialize with +async_jobs: false+, checks will be done
# blocking in foreground, and you can get back the {ChecksumAuditLog}
# records created.
#
# It will only run fixity checks if there are not recent
# {ChecksumAuditLog}s on record. "recent" is defined by
# +max_days_between_fixity_checks+ arg, which defaults to configured
# {Hyrax::Configuration#max_days_between_fixity_checks}
class FileSetFixityCheckService
attr_reader :id, :async_jobs, :max_days_between_fixity_checks

# @param file_set [Valkyrie ID] file_set
# @param async_jobs [Boolean] Run actual fixity checks in background. Default true.
# @param max_days_between_fixity_checks [int] if an exisitng fixity check is
# recorded within this window, no new one will be created. Default
# {Hyrax::Configuration#max_days_between_fixity_checks}. Set to -1 to force
# check.
# @param latest_version_only [Booelan]. Check only latest version instead of all
# versions. Default false.
def initialize(file_set_id,
async_jobs: true,
max_days_between_fixity_checks: Hyrax.config.max_days_between_fixity_checks,
initiating_user:)
@max_days_between_fixity_checks = max_days_between_fixity_checks || 0
@async_jobs = async_jobs
@initiating_user = initiating_user
@id = file_set_id
@file_set = Hyrax.query_service.find_by(id: @id)
end

# Fixity checks each version of each file if it hasn't been checked recently
# If object async_jobs is false, will returns the set of most recent fixity check
# status for each version of the content file(s). As a hash keyed by file_id,
# values arrays of possibly multiple version checks.
#
# If async_jobs is true (default), just returns nil, stuff is still going on.
def fixity_check
results = fixity_check_file

return if async_jobs
results
end

private

# Retrieve or generate the fixity check for a specific version of a file
# @param [String] version_uri the version to be fixity checked (or the file uri for non-versioned files)
def fixity_check_file
latest_fixity_check = ChecksumAuditLog.logs_for(@id.to_s, checked_uri: @file_set.original_file.file_identifier.to_s.gsub('fedora', 'http')).first
return latest_fixity_check unless needs_fixity_check?(latest_fixity_check)

async_jobs ? FixityCheckJob.perform_later(file_set_id: @id, initiating_user: @initiating_user) : FixityCheckJob.perform_now(file_set_id: @id, initiating_user: @initiating_user)
end

# Check if time since the last fixity check is greater than the maximum days allowed between fixity checks
# @param [ChecksumAuditLog] latest_fixity_check the most recent fixity check
def needs_fixity_check?(latest_fixity_check)
return true unless latest_fixity_check
unless latest_fixity_check.updated_at
logger.warn "***FIXITY*** problem with fixity check log! Latest Fixity check is not nil, but updated_at is not set #{latest_fixity_check}"
return true
end
days_since_last_fixity_check(latest_fixity_check) >= max_days_between_fixity_checks
end

# Return the number of days since the latest fixity check
# @param [ChecksumAuditLog] latest_fixity_check the most recent fixity check
def days_since_last_fixity_check(latest_fixity_check)
(DateTime.current - latest_fixity_check.updated_at.to_date).to_i
end
end
end
91 changes: 91 additions & 0 deletions app/services/self_deposit/valkyrie_fixity_service.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# frozen_string_literal: true
module SelfDeposit
class ValkyrieFixityService
extend ActiveSupport::Concern

attr_accessor :target

# @param [String, RDF::URI] target url for a Fedora resource.
def initialize(object)
raise ArgumentError, 'You must provide a Fileset object' unless object
@target = object.original_file.file_identifier.to_s.gsub('fedora', 'http')
end

def response
@response ||= fixity_response_from_fedora
end

# For backwards compat, check always insists on doing a new request.
# you might want verified? instead which uses a cached request.
# @return true or false
def check
@response = nil
verified?
end

# Executes a fixity check on Fedora
# @return true or false
def verified?
status.include?(success)
end

# An array of 1 or more literals reported by Fedora.
# See 'success' for which one indicates fixity check is good.
def status
fixity_graph.query({ predicate: premis_status_predicate }).map(&:object) +
fixity_graph.query({ predicate: fedora_status_predicate }).map(&:object)
end

# the currently calculated checksum, as a string URI, like
# "urn:sha1:09a848b79f86f3a4f3f301b8baafde455d6f8e0e"
def expected_message_digest
'urn:sha1:' + object_sha1_value
end

# integer, as reported by fedora. bytes maybe?
def expected_size
fixity_graph.query({ predicate: ::RDF::Vocab::PREMIS.hasSize }).first.try(:object).try(:to_s).try(:to_i)
end

# Fedora response as an ::RDF::Graph object. Public API, so consumers
# can do with it what they will, especially if future fedora versions
# add more things to it.
def response_graph
fixity_graph
end

private

def premis_status_predicate
::RDF::Vocab::PREMIS.hasEventOutcome
end

# Fcrepo4.status was used by Fedora < 4.3, but it was removed
# from the 2015-07-24 version of the fedora 4 ontology
# http://fedora.info/definitions/v4/2015/07/24/repository and
# from rdf-vocab in version 0.8.5
def fedora_status_predicate
::RDF::URI("http://fedora.info/definitions/v4/repository#status")
end

def success
::RDF::Literal.new("SUCCESS")
end

def fixity_response_from_fedora
uri = @target + "/fcr:fixity"
Hyrax.query_service.adapter.connection.get(uri)
end

def fixity_graph
@fixity_graph ||= ::RDF::Graph.new << ::RDF::Reader.for(:ttl).new(response.body)
end

def object_sha1_value
response = Hyrax.query_service.adapter.connection.get(@target) do |req|
req.headers["Want-Digest"] = 'sha'
end
response.response.env.response_headers["digest"].split('sha=').last
end
end
end
5 changes: 5 additions & 0 deletions app/views/hyrax/file_sets/_show_details.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,9 @@
<% end %>
</dd>
</div>
<div class="row">
<dd class="col-12">
<%= button_to t('.fixity_check'), file_set_fixity_checks_path(file_set_id: @presenter.id), method: :post, class: 'btn btn-primary' %>
</dd>
</div>
</dl>
2 changes: 2 additions & 0 deletions config/initializers/hyrax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@
# Add registrar implementations by uncommenting and adding to the hash below.
# See app/services/hyrax/identifier/registrar.rb for the registrar interface
# config.identifier_registrars = {}

config.fixity_service = SelfDeposit::ValkyrieFixityService
end

Date::DATE_FORMATS[:standard] = "%m/%d/%Y"
Expand Down
38 changes: 38 additions & 0 deletions spec/controllers/hyrax/fixity_checks_controller_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# frozen_string_literal: true
# [Hyrax-overwrite-v5.0.1
# We have removed json_response tests from here since we are
# no longer rendering json in our create method
require 'rails_helper'

RSpec.describe Hyrax::FixityChecksController, type: :controller do
include Devise::Test::ControllerHelpers

routes { Hyrax::Engine.routes }
let(:user) { FactoryBot.create(:user) }
let!(:file_set) do
FactoryBot.valkyrie_create(:hyrax_file_set, :with_files, title: ['Test File Set'], depositor: user.user_key, read_groups: ['public'], edit_users: [user])
end

context "when signed in" do
describe "POST create" do
before do
sign_in user
post :create, params: { file_set_id: file_set.id }, xhr: true
end

it "returns result and redirects to file_set page" do
expect(response).to be_successful
expect(response.redirect_url).to include "/concern/file_sets/#{file_set.id}"
end
end
end

context "when not signed in" do
describe "POST create" do
it "returns json with the result" do
post :create, params: { file_set_id: file_set.id }, xhr: true
expect(response.code).to eq '401'
end
end
end
end
Loading

0 comments on commit c55c3b9

Please sign in to comment.