-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds Valkyrie-compatible Fixity Check service and a FileSet button to…
… kick it off. (#610) * Adds Valkyrie-compatible Fixity Check service and a FileSet button to kick it off. * Rubocop appeasement. * Adds controller spec. * Adds spec for Job.
- Loading branch information
Showing
8 changed files
with
410 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# frozen_string_literal: true | ||
# [Hyrax-overwrite-v5.0.1] | ||
# Adds redirect on L#23 in create action and points to our custom service | ||
module Hyrax | ||
class FixityChecksController < ApplicationController | ||
before_action :authenticate_user! | ||
|
||
# request here with param :file_set_id will trigger a fixity check if | ||
# needed, and respond with a JSON hash that looks something like: | ||
# | ||
# { "file_id" => [ | ||
# { | ||
# "checked_uri" => "http://127.0.0.1:8986/rest/test/12/57/9s/28/12579s28n/files/3ff48171-f625-48bb-a73d-b1ba16dde530/fcr:versions/version1", | ||
# "passed" => true, | ||
# "expected_result" => "urn:sha1:03434..." | ||
# "created_at" => "2017-05-16T15:32:50.961Z" | ||
# } | ||
# ] | ||
# } | ||
def create | ||
# render json: fixity_check_service.fixity_check | ||
fixity_check_service.fixity_check | ||
redirect_to main_app.hyrax_file_set_path(params[:file_set_id]), notice: 'Ran fixity check' | ||
end | ||
|
||
private | ||
|
||
def fixity_check_service | ||
# We are calling `async_jobs: false` to ensure we get a fixity result to | ||
# return even if there are no 'fresh' ones on record. Otherwise, we'd | ||
# have to sometimes return a 'in progress' status for some bytestreams, | ||
# which is a possible future enhancement. | ||
@fixity_check_service ||= | ||
::SelfDeposit::FileSetFixityCheckService.new(params[:file_set_id], async_jobs: false, initiating_user: current_user) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
# frozen_string_literal: true | ||
class FixityCheckJob < Hyrax::ApplicationJob | ||
include PreservationEvents | ||
# A Job class that runs a fixity check (using Hyrax.config.fixity_service) | ||
# which contacts fedora and requests a fixity check), and stores the results | ||
# in an ActiveRecord ChecksumAuditLog row. It also prunes old ChecksumAuditLog | ||
# rows after creating a new one, to keep old ones you don't care about from | ||
# filling up your db. | ||
# | ||
# The uri passed in is a fedora URI that fedora can run fixity check on. | ||
# It's normally a version URI like: | ||
# http://localhost:8983/fedora/rest/test/a/b/c/abcxyz/content/fcr:versions/version1 | ||
# | ||
# But could theoretically be any URI fedora can fixity check on, like a file uri: | ||
# http://localhost:8983/fedora/rest/test/a/b/c/abcxyz/content | ||
# | ||
# The file_set_id and file_id are used only for logging context in the | ||
# ChecksumAuditLog, and determining what old ChecksumAuditLogs can | ||
# be pruned. | ||
# | ||
# If calling async as a background job, return value is irrelevant, but | ||
# if calling sync with `perform_now`, returns the ChecksumAuditLog | ||
# record recording the check. | ||
# | ||
# @param file_set_id [FileSet] the id for FileSet parent object of URI being checked. | ||
# @param initiating_user [User] the object for the user that kicked off the job. | ||
def perform(file_set_id:, initiating_user:) | ||
event_start = DateTime.current | ||
@file_set = Hyrax.query_service.find_by(id: file_set_id) | ||
run_check.tap do |audit| | ||
result = audit.failed? ? :failure : :success | ||
|
||
announce_fixity_check_results(audit, result) | ||
file_set_preservation_event(audit.passed, event_start, initiating_user) | ||
end | ||
end | ||
|
||
private | ||
|
||
## | ||
# @api private | ||
def run_check | ||
service = Hyrax.config.fixity_service.new(@file_set) | ||
expected_result = service.expected_message_digest | ||
|
||
report_to_audit_log(check_results: service.check, uri: service.target.to_s, expected_result:) | ||
rescue Hyrax::Fixity::MissingContentError | ||
report_to_audit_log(check_results: false, uri: service.target.to_s, expected_result:) | ||
end | ||
|
||
def announce_fixity_check_results(audit, result) | ||
Hyrax.publisher.publish('file.set.audited', file_set: @file_set, audit_log: audit, result:) | ||
|
||
# @todo remove this callback call for Hyrax 4.0.0 | ||
process_failure_callback(audit) if should_call_failure_callback(audit) | ||
end | ||
|
||
def process_failure_callback(audit) | ||
Hyrax.config.callback.run(:after_fixity_check_failure, | ||
@file_set, | ||
checksum_audit_log: audit, | ||
warn: false) | ||
end | ||
|
||
def should_call_failure_callback(audit) | ||
audit.failed? && Hyrax.config.callback.set?(:after_fixity_check_failure) | ||
end | ||
|
||
def file_set_preservation_event(log, event_start, initiating_user) | ||
logger = Logger.new(STDOUT) | ||
pulled_file_name = original_file_name | ||
pulled_checksum = original_file_checksum | ||
event = { 'type' => 'Fixity Check', 'start' => event_start, 'software_version' => 'Fedora v6.5.0', 'user' => initiating_user } | ||
|
||
if log == true | ||
event['outcome'] = 'Success' | ||
event['details'] = "Fixity intact for file: #{pulled_file_name}: sha1: #{pulled_checksum}" | ||
logger.info "Ran fixity check successfully on #{pulled_file_name}" | ||
else | ||
event['outcome'] = 'Failure' | ||
event['details'] = "Fixity check failed for: #{pulled_file_name}: sha1: #{pulled_checksum}" | ||
logger.error "Fixity check failure: Fixity failed for #{pulled_file_name}" | ||
end | ||
create_preservation_event(@file_set, event) | ||
end | ||
|
||
def original_file_name | ||
pulled_original_file_title || pulled_original_file_label || @file_set&.original_file&.original_filename | ||
end | ||
|
||
def pulled_original_file_title | ||
@file_set&.original_file&.title&.first | ||
end | ||
|
||
def pulled_original_file_label | ||
@file_set&.original_file&.label&.first | ||
end | ||
|
||
def original_file_checksum | ||
@file_set&.original_file&.original_checksum&.first | ||
end | ||
|
||
def report_to_audit_log(check_results:, uri:, expected_result:) | ||
ChecksumAuditLog.create_and_prune!( | ||
passed: check_results, | ||
file_set_id: @file_set.id.to_s, | ||
checked_uri: uri, | ||
file_id: @file_set.original_file.id.to_s, | ||
expected_result: | ||
) | ||
end | ||
end |
88 changes: 88 additions & 0 deletions
88
app/services/self_deposit/file_set_fixity_check_service.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# frozen_string_literal: true | ||
module SelfDeposit | ||
## | ||
# This class runs fixity checks on a {FileSetBehavior}, potentially on multiple | ||
# files each with multiple versions in the +FileSet+. | ||
# | ||
# The fixity check itself is performed by {FixityCheckJob}, which | ||
# just uses the fedora service to ask for fixity verification. | ||
# The outcome will be some created {ChecksumAuditLog} (ActiveRecord) | ||
# objects, recording the checks and their results. | ||
# | ||
# By default this runs the checks async using +ActiveJob+, so | ||
# returns no useful info -- the checks are still going Use | ||
# {ChecksumAuditLog.latest_for_file_set_id} to retrieve the latest | ||
# machine-readable checks. | ||
# | ||
# But if you initialize with +async_jobs: false+, checks will be done | ||
# blocking in foreground, and you can get back the {ChecksumAuditLog} | ||
# records created. | ||
# | ||
# It will only run fixity checks if there are not recent | ||
# {ChecksumAuditLog}s on record. "recent" is defined by | ||
# +max_days_between_fixity_checks+ arg, which defaults to configured | ||
# {Hyrax::Configuration#max_days_between_fixity_checks} | ||
class FileSetFixityCheckService | ||
attr_reader :id, :async_jobs, :max_days_between_fixity_checks | ||
|
||
# @param file_set [Valkyrie ID] file_set | ||
# @param async_jobs [Boolean] Run actual fixity checks in background. Default true. | ||
# @param max_days_between_fixity_checks [int] if an exisitng fixity check is | ||
# recorded within this window, no new one will be created. Default | ||
# {Hyrax::Configuration#max_days_between_fixity_checks}. Set to -1 to force | ||
# check. | ||
# @param latest_version_only [Booelan]. Check only latest version instead of all | ||
# versions. Default false. | ||
def initialize(file_set_id, | ||
async_jobs: true, | ||
max_days_between_fixity_checks: Hyrax.config.max_days_between_fixity_checks, | ||
initiating_user:) | ||
@max_days_between_fixity_checks = max_days_between_fixity_checks || 0 | ||
@async_jobs = async_jobs | ||
@initiating_user = initiating_user | ||
@id = file_set_id | ||
@file_set = Hyrax.query_service.find_by(id: @id) | ||
end | ||
|
||
# Fixity checks each version of each file if it hasn't been checked recently | ||
# If object async_jobs is false, will returns the set of most recent fixity check | ||
# status for each version of the content file(s). As a hash keyed by file_id, | ||
# values arrays of possibly multiple version checks. | ||
# | ||
# If async_jobs is true (default), just returns nil, stuff is still going on. | ||
def fixity_check | ||
results = fixity_check_file | ||
|
||
return if async_jobs | ||
results | ||
end | ||
|
||
private | ||
|
||
# Retrieve or generate the fixity check for a specific version of a file | ||
# @param [String] version_uri the version to be fixity checked (or the file uri for non-versioned files) | ||
def fixity_check_file | ||
latest_fixity_check = ChecksumAuditLog.logs_for(@id.to_s, checked_uri: @file_set.original_file.file_identifier.to_s.gsub('fedora', 'http')).first | ||
return latest_fixity_check unless needs_fixity_check?(latest_fixity_check) | ||
|
||
async_jobs ? FixityCheckJob.perform_later(file_set_id: @id, initiating_user: @initiating_user) : FixityCheckJob.perform_now(file_set_id: @id, initiating_user: @initiating_user) | ||
end | ||
|
||
# Check if time since the last fixity check is greater than the maximum days allowed between fixity checks | ||
# @param [ChecksumAuditLog] latest_fixity_check the most recent fixity check | ||
def needs_fixity_check?(latest_fixity_check) | ||
return true unless latest_fixity_check | ||
unless latest_fixity_check.updated_at | ||
logger.warn "***FIXITY*** problem with fixity check log! Latest Fixity check is not nil, but updated_at is not set #{latest_fixity_check}" | ||
return true | ||
end | ||
days_since_last_fixity_check(latest_fixity_check) >= max_days_between_fixity_checks | ||
end | ||
|
||
# Return the number of days since the latest fixity check | ||
# @param [ChecksumAuditLog] latest_fixity_check the most recent fixity check | ||
def days_since_last_fixity_check(latest_fixity_check) | ||
(DateTime.current - latest_fixity_check.updated_at.to_date).to_i | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# frozen_string_literal: true | ||
module SelfDeposit | ||
class ValkyrieFixityService | ||
extend ActiveSupport::Concern | ||
|
||
attr_accessor :target | ||
|
||
# @param [String, RDF::URI] target url for a Fedora resource. | ||
def initialize(object) | ||
raise ArgumentError, 'You must provide a Fileset object' unless object | ||
@target = object.original_file.file_identifier.to_s.gsub('fedora', 'http') | ||
end | ||
|
||
def response | ||
@response ||= fixity_response_from_fedora | ||
end | ||
|
||
# For backwards compat, check always insists on doing a new request. | ||
# you might want verified? instead which uses a cached request. | ||
# @return true or false | ||
def check | ||
@response = nil | ||
verified? | ||
end | ||
|
||
# Executes a fixity check on Fedora | ||
# @return true or false | ||
def verified? | ||
status.include?(success) | ||
end | ||
|
||
# An array of 1 or more literals reported by Fedora. | ||
# See 'success' for which one indicates fixity check is good. | ||
def status | ||
fixity_graph.query({ predicate: premis_status_predicate }).map(&:object) + | ||
fixity_graph.query({ predicate: fedora_status_predicate }).map(&:object) | ||
end | ||
|
||
# the currently calculated checksum, as a string URI, like | ||
# "urn:sha1:09a848b79f86f3a4f3f301b8baafde455d6f8e0e" | ||
def expected_message_digest | ||
'urn:sha1:' + object_sha1_value | ||
end | ||
|
||
# integer, as reported by fedora. bytes maybe? | ||
def expected_size | ||
fixity_graph.query({ predicate: ::RDF::Vocab::PREMIS.hasSize }).first.try(:object).try(:to_s).try(:to_i) | ||
end | ||
|
||
# Fedora response as an ::RDF::Graph object. Public API, so consumers | ||
# can do with it what they will, especially if future fedora versions | ||
# add more things to it. | ||
def response_graph | ||
fixity_graph | ||
end | ||
|
||
private | ||
|
||
def premis_status_predicate | ||
::RDF::Vocab::PREMIS.hasEventOutcome | ||
end | ||
|
||
# Fcrepo4.status was used by Fedora < 4.3, but it was removed | ||
# from the 2015-07-24 version of the fedora 4 ontology | ||
# http://fedora.info/definitions/v4/2015/07/24/repository and | ||
# from rdf-vocab in version 0.8.5 | ||
def fedora_status_predicate | ||
::RDF::URI("http://fedora.info/definitions/v4/repository#status") | ||
end | ||
|
||
def success | ||
::RDF::Literal.new("SUCCESS") | ||
end | ||
|
||
def fixity_response_from_fedora | ||
uri = @target + "/fcr:fixity" | ||
Hyrax.query_service.adapter.connection.get(uri) | ||
end | ||
|
||
def fixity_graph | ||
@fixity_graph ||= ::RDF::Graph.new << ::RDF::Reader.for(:ttl).new(response.body) | ||
end | ||
|
||
def object_sha1_value | ||
response = Hyrax.query_service.adapter.connection.get(@target) do |req| | ||
req.headers["Want-Digest"] = 'sha' | ||
end | ||
response.response.env.response_headers["digest"].split('sha=').last | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# frozen_string_literal: true | ||
# [Hyrax-overwrite-v5.0.1 | ||
# We have removed json_response tests from here since we are | ||
# no longer rendering json in our create method | ||
require 'rails_helper' | ||
|
||
RSpec.describe Hyrax::FixityChecksController, type: :controller do | ||
include Devise::Test::ControllerHelpers | ||
|
||
routes { Hyrax::Engine.routes } | ||
let(:user) { FactoryBot.create(:user) } | ||
let!(:file_set) do | ||
FactoryBot.valkyrie_create(:hyrax_file_set, :with_files, title: ['Test File Set'], depositor: user.user_key, read_groups: ['public'], edit_users: [user]) | ||
end | ||
|
||
context "when signed in" do | ||
describe "POST create" do | ||
before do | ||
sign_in user | ||
post :create, params: { file_set_id: file_set.id }, xhr: true | ||
end | ||
|
||
it "returns result and redirects to file_set page" do | ||
expect(response).to be_successful | ||
expect(response.redirect_url).to include "/concern/file_sets/#{file_set.id}" | ||
end | ||
end | ||
end | ||
|
||
context "when not signed in" do | ||
describe "POST create" do | ||
it "returns json with the result" do | ||
post :create, params: { file_set_id: file_set.id }, xhr: true | ||
expect(response.code).to eq '401' | ||
end | ||
end | ||
end | ||
end |
Oops, something went wrong.