Skip to content

Commit

Permalink
Add better scheduling (#194)
Browse files Browse the repository at this point in the history
* add back cacheing, see how works distributed

* better scheduling for upcoming and in prog

* speed up puma

* better scheduling and edge case scraping

* also add sync task in case not written

* make sure match order correct

* add idx to match date

* use scopes

* do not try to scrape unknown matches if empty

* fix spec, do not return early

* fix nil error

* try increase pool size
  • Loading branch information
estiens authored Nov 16, 2022
1 parent 6aa46d6 commit 9f94698
Show file tree
Hide file tree
Showing 16 changed files with 128 additions and 39 deletions.
11 changes: 6 additions & 5 deletions app/controllers/matches_controller.rb
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
class MatchesController < BaseApiController
before_action :detail_level
before_action :detail_level, except: %i[index future]
before_action :load_matches, except: %i[show]

def index
@details = false unless params[:details]&.downcase == 'true'
@details = true if params[:details]&.downcase == 'true'
order_by_params
end

def current
@matches = @matches.where(status: 'in progress')
@matches = @matches.in_progress
order_by_params
render :index
end

def complete
@matches = @matches.where(status: 'completed')
@matches = @matches.completed
order_by_params
render :index
end

def future
@matches = @matches.where(status: 'future')
@matches = @matches.future
order_by_params
render :index
end
Expand Down Expand Up @@ -57,6 +57,7 @@ def show
def load_matches
@matches = Match.all
@matches = @matches.includes(:match_statistics, :events) if @details
@matches = @matches.order(datetime: :asc)
end

def order_by_params
Expand Down
32 changes: 30 additions & 2 deletions app/jobs/in_progress_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,38 @@ class InProgressJob < ApplicationJob
queue_as :scheduler

def perform
matches = Match.in_progress
scrape_in_progress
scrape_soon_upcoming
scrape_later_today

# we want to scrape ev 30 seconds for in progress
# but cron only has 1 min resolution
sleep(30)
scrape_in_progress
end

private

def scrape_match_details(matches)
return unless matches.count.positive?

Rails.logger.debug { "**SCHEDULER** #{matches.count} matches in progress, scraping" }
Rails.logger.debug { "**SCHEDULER** Single Match Update for #{matches_to_scrape.count} scheduled matches" }
matches.each { |match| FetchDataForScheduledMatch.perform_later(match.id) }
end

def scrape_in_progress
scrape_match_details(Match.in_progress)
end

def scrape_soon_upcoming
matches = Match.where('datetime < ?', 20.minutes.from_now).where(status: 'future_scheduled')
.where('last_checked_at < ?', 1.minute.ago)
scrape_match_details(matches)
end

def scrape_later_today
matches = Match.where('datetime < ?', 1.day.from_now).where(status: 'future_scheduled')
.where('last_checked_at < ?', 5.minutes.ago)
scrape_match_details(matches)
end
end
45 changes: 33 additions & 12 deletions app/jobs/scheduler_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,51 @@ class SchedulerJob < ApplicationJob
SCRAPE_TODAY_EVERY = 1.minute
SCRAPE_GENERAL_EVERY = 90.minutes

def perform
scrape_scheduled_matches
scrape_unscheduled_matches
scrape_unknown_matches
update_anything_not_synced
scrape_completed?
end

# ideally this has nothing
def scrape_unknown_matches
ids = Match.all.where('last_checked_at < ?', SCRAPE_UNKNOWN_EVERY.ago).pluck(:id)
Rails.logger.warn("**SCHEDULER** unknown scrape for #{ids.count} matches")
return unless ids.count.positive?

FetchGeneralDataForAllMatches.perform_later(ids)
Rails.logger.info("**SCHEDULER** unknown scrape for #{ids.count} matches")
fetch_general_data_for_matches(ids)
end

def scrape_completed?
# may have events that come in after full-time will have to see
end

def update_anything_not_synced
Match.where('updated_at > ?', 2.hours.ago).each do |match|
if match.latest_json.present?
MatchUpdateSelfJob.perform_later(match.id)
else
scrape_match_details(match)
end
end
end

def fetch_general_data_for_matches(match_ids)
return unless match_ids.count.positive?

FetchGeneralDataForAllMatches.perform_later(match_ids)
end

def scrape_unscheduled_matches
ids = Match.all.where(status: 'future_unscheduled').where('last_checked_at < ?',
SCRAPE_GENERAL_EVERY.ago).pluck(:id)
Rails.logger.info("**SCHEDULER** (checking info for unscheduled matches) for #{ids.count} matches")
return unless ids.count.positive?
fetch_general_data_for_matches(ids)

FetchGeneralDataForAllMatches.perform_later(ids)
ids = Match.all.where(datetime: nil).where('last_checked_at < ?', SCRAPE_GENERAL_EVERY.ago).pluck(:id)
fetch_general_data_for_matches(ids)
end

def scrape_match_details(matches)
Expand All @@ -33,15 +62,7 @@ def scrape_match_details(matches)
end

def scrape_scheduled_matches
today_matches = Match.today.where('last_checked_at < ?', SCRAPE_TODAY_EVERY.ago)
scrape_match_details(today_matches)
sleep(5)
matches_to_scrape = Match.where('last_checked_at < ?', SCRAPE_SCHEDULED_EVERY.ago).where(status: 'future_scheduled')
scrape_match_details(matches_to_scrape)
end

def perform
scrape_scheduled_matches
scrape_unscheduled_matches
end
end
11 changes: 11 additions & 0 deletions app/models/json_match.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,13 @@ def officials
@officials ||= info['Officials']&.map { |official| map_official(official) }
end

# double check logic on these
def in_progress?
return true if current_time_info[:current_time].to_i.positive?

false
end

def completed?
return true if match_winner&.to_i&.positive?
return false unless match_status.zero?
Expand Down Expand Up @@ -193,13 +200,17 @@ def find_away_team_player(player_id)
def starters(home: true, away: false)
home = false if away
starters = home ? home_team['Players'] : away_team['Players']
return nil if starters.blank?

starters = starters.select { |p| p['Status'] == 1 }
PlayersFormatter.players_from_array(starters)
end

def substitutes(home: true, away: false)
home = false if away
subs = home ? home_team['Players'] : away_team['Players']
return nil if subs.blank?

subs = subs.select { |p| p['Status'] == 2 }
PlayersFormatter.players_from_array(subs)
end
Expand Down
2 changes: 1 addition & 1 deletion app/models/match.rb
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def update_self_from_latest_data
private

def validate_basic_info
return if fifa_competition_id && fifa_season_id && fifa_stage_id && fifa_group_id
return if fifa_competition_id && fifa_season_id && fifa_stage_id

errors.add(:base, 'Match is missing basic information')
end
Expand Down
3 changes: 1 addition & 2 deletions app/services/match_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ def self.scrape_for_general_info(matches)
match = match.is_a?(Match) ? match : Match.find(match)
match_specific_info = match_info.find { |m| m['IdMatch'] == match.fifa_id }
if match_specific_info
match.update(latest_json: match_specific_info.to_json)
match.update_column(:latest_json, match_specific_info.to_json)
match.id
else
Rails.logger.info("**SCRAPER** Match #{match.fifa_id} not found in general info")
match.touch
nil
end
end.compact
Expand Down
32 changes: 26 additions & 6 deletions app/services/match_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def initialize(match: nil, json_string: nil)
@match = match
json_string ||= match.latest_json
@json_match = JsonMatch.new(json_string)
@updated = nil
@changed = []
end

Expand All @@ -32,14 +33,21 @@ def write_match
return false unless @match&.persisted?
return false unless update_match_from_json

Rails.logger.info("MatchWriter: #{match.fifa_id} updated - Changed: #{@changed.inspect}")
Rails.logger.debug { "MatchWriter: #{match.fifa_id} updated - Changed: #{@changed.inspect}" }
match.last_changed_at = Time.zone.now
match.last_changed = @changed
match.save
end

private

def start_match!
return unless match.status == :future_scheduled
return unless @json_match.in_progress?

match.update_column(:status, :in_progress)
end

def complete_match!
return unless match.status == :in_progress
return unless @json_match.completed?
Expand Down Expand Up @@ -70,12 +78,14 @@ def update_match_in_progress
end

def update_match_from_json
changed = false
updated = nil
match.update_column(:last_checked_at, Time.now)
changed = true if update_match_in_progress
changed = true if try_update_if_blank?(match_identifiers.merge(team_ids))
changed = true if try_update_anything?(general_info_attributes)
changed
updated = true if update_match_in_progress

updated = true if check_for_upcoming_changes
updated = true if try_update_if_blank?(match_identifiers.merge(team_ids).merge(general_info_attributes))
updated = true if try_update_anything?(general_info_attributes)
updated
end

def try_update_if_blank?(attrs)
Expand All @@ -102,6 +112,16 @@ def try_update_anything?(attrs)
updated
end

def check_for_upcoming_changes
return false unless match.datetime
return false unless match.datetime > Time.zone.now - 24.hours

start_match!
write_home_stats
write_away_stats
true
end

def match_identifiers
{ fifa_id: @json_match.identifiers[:fifa_id],
fifa_competition_id: @json_match.identifiers[:competition_id],
Expand Down
4 changes: 1 addition & 3 deletions app/views/matches/index.json.jbuilder
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
json.array! @matches do |match|
json.partial! 'matches/match', match: match
end
json.array! @matches, partial: 'matches/match', as: :match, cached: true
4 changes: 1 addition & 3 deletions app/views/teams/index.json.jbuilder
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
json.groups @groups do |group|
json.letter group.letter
json.teams group.teams do |team|
json.partial! 'teams/result', team: team
end
json.teams group.teams, partial: 'teams/result', as: :team, cached: true
end
5 changes: 5 additions & 0 deletions app/views/teams/show.json.jbuilder
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ if @last_match
end
if @next_match
json.next_match do
json.id @next_match.id
json.home_team @next_match.home_team&.country || @next_match.home_team_tbd
json.away_team @next_match.away_team&.country || @next_match.away_team_tbd
json.datetime @next_match.datetime&.utc&.iso8601
json.venue @next_match.venue
json.location @next_match.location
json.stage_name @next_match.stage_name
end
end
2 changes: 1 addition & 1 deletion config/database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ production:
adapter: postgresql
encoding: unicode
url: <%= ENV['DATABASE_URL'] %>
pool: <%= ENV['DB_POOL'] || 15 %>
pool: <%= ENV['DB_POOL'] || 25 %>
5 changes: 3 additions & 2 deletions config/puma.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
if Rails.env.production?
workers Integer(ENV['WEB_CONCURRENCY'] || 3)
min_threads = Integer(ENV['MIN_THREADS'] || ENV['RAILS_MAX_THREADS'] || 4)
max_threads = Integer(ENV['RAILS_MAX_THREADS'] || 8)
min_threads = Integer(ENV['RAILS_MIN_THREADS'] || ENV['RAILS_MAX_THREADS'] || 5)
max_threads = Integer(ENV['RAILS_MAX_THREADS'] || 5)

threads min_threads, max_threads
else
Expand All @@ -10,6 +10,7 @@
threads (ENV['RAILS_MIN_THREADS'] || 1), (ENV['RAILS_MAX_THREADS'] || 1)
end

wait_for_less_busy_worker
preload_app!

rackup DefaultRackup
Expand Down
5 changes: 5 additions & 0 deletions db/migrate/20221116000921_add_index_to_match_date.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddIndexToMatchDate < ActiveRecord::Migration[7.0]
def change
add_index :matches, :datetime
end
end
3 changes: 2 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2022_11_14_232107) do
ActiveRecord::Schema[7.0].define(version: 2022_11_16_000921) do
# These are extensions that must be enabled in order to support this database
enable_extension "pgcrypto"
enable_extension "plpgsql"
Expand Down Expand Up @@ -142,6 +142,7 @@
t.json "detailed_time"
t.json "last_changed", default: []
t.index ["away_team_id"], name: "index_matches_on_away_team_id"
t.index ["datetime"], name: "index_matches_on_datetime"
t.index ["fifa_id"], name: "index_matches_on_fifa_id"
t.index ["home_team_id"], name: "index_matches_on_home_team_id"
t.index ["winner_id"], name: "index_matches_on_winner_id"
Expand Down
1 change: 1 addition & 0 deletions fly.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ kill_timeout = 60

[deploy]
release_command = "bin/rails db:migrate"
strategy = "bluegreen"

[env]
PORT = "8080"
Expand Down
2 changes: 1 addition & 1 deletion spec/requests/match_request_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
match.save
end

it 'should return a full payload for index endpoint' do
it 'should return a full payload for index endpoint (no details)' do
res = get matches_path
expect(res).to eq 200

Expand Down

0 comments on commit 9f94698

Please sign in to comment.