Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve TeSS scraping #4479

Merged
merged 10 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions FUNDERS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ CINECA-Project:
funder: true
funding_id: "825775"
funding_system: cordis
funder_name: Horizon 2020
url: https://www.cineca-project.eu

epsrc-training-grant:
Expand Down
1 change: 0 additions & 1 deletion _layouts/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
<link rel="manifest" href="{{ site.baseurl }}/manifest.json">
<meta name="theme-color" content="#2c3143"/>


{{ page | generate_dublin_core: site }}

{% assign topic = site.data[page.topic_name] %}
Expand Down
12 changes: 6 additions & 6 deletions _layouts/faq.html
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
---
layout: base
---

<div itemscope itemtype="https://schema.org/FAQPage">
{% assign contributors = site.data['contributors'] %}
<section class="tutorial">
<section class="tutorial" itemscope itemprop="mainEntity" itemtype="https://schema.org/Question">

<nav aria-label="breadcrumb">
<ol class="breadcrumb">
Expand Down Expand Up @@ -37,8 +37,7 @@

</ol>
</nav>

<h1>{{page.title}}</h1>
<h1 itemprop="name">{{page.title}}</h1>

{% if page.contributors %}
<div class="contributors-line" style="font-size:1rem;">{{ locale['authors'] | default: "Authors" }}: {% include
Expand All @@ -48,8 +47,8 @@ <h1>{{page.title}}</h1>
{% endif %}

<div class="container">
<div class="row">
<div class="col-md-8">
<div class="row" itemscope itemprop="acceptedAnswer" itemtype="https://schema.org/Answer">
<div class="col-md-8" itemprop="text">
{{ page.description }}


Expand Down Expand Up @@ -85,3 +84,4 @@ <h2 id="bibliography">{{locale['references']| default: "References" }}</h2>
{% endif %}

</section>
</div>
18 changes: 18 additions & 0 deletions _layouts/learning-pathway.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@
{% assign gitter = site.gitter_url %}
{% endif %}

<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Course",
"name": "{{ pathway.title }}",
"description": "{{ pathway.description }}",
"provider": {
"@type": "Organization",
"email": "[email protected]",
"name": "Galaxy Training Network",
"url": "https://training.galaxyproject.org",
"logo": "https://training.galaxyproject.org/training-material/assets/images/GTNLogo1000.png",
"sameAs": "https://training.galaxyproject.org"
}
}
</script>



<section class="tutorials-list {{ page.topic_name }} topic-type-{{ topic.type }}">
<hgroup>
Expand Down
2 changes: 1 addition & 1 deletion _plugins/gtn/boxify.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def self.generate_collapsible_title(box_type, title, lang = 'en', key, contents:
[box_id, %(
<div class="box-title #{box_type}-title" id="#{box_id}">
<button class="gtn-boxify-button #{box_type}" type="button" aria-controls="#{box_id}#{refers_to_contents}" aria-expanded="true">
#{get_icon(box_type)} #{box_title}
#{get_icon(box_type)} <span>#{box_title}</span>
<span class="fold-unfold fa fa-minus-square"></span>
</button>
</div>
Expand Down
19 changes: 19 additions & 0 deletions _plugins/gtn/contributors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,25 @@ def self.get_authors(data)
end
end

##
# Get the non-author contributors of a material.
# Params:
# +data+:: +Hash+ of the YAML frontmatter from a material
# Returns:
# +Array+ of contributor IDs
def self.get_non_authors(material)
if material.key?('contributors')
[]
elsif material.key?('contributions')
material['contributions']
.reject { |k| k == 'funding' }
.reject { |k| k == 'authorship' }
.values.flatten.uniq
else
[]
end
end

# Convenience method to allow us to handle nil sites, and load directly
# from disk ourselves.
def self._load_file(site, category)
Expand Down
84 changes: 59 additions & 25 deletions _plugins/jekyll-jsonld.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ module JsonldFilter
'@type': 'Organization',
email: '[email protected]',
name: 'Galaxy Training Network',
url: 'https://galaxyproject.org/teach/gtn/'
url: 'https://training.galaxyproject.org',
logo: 'https://training.galaxyproject.org/training-material/assets/images/GTNLogo1000.png',
}.freeze

A11Y = {
Expand All @@ -25,6 +26,14 @@ module JsonldFilter
'for non-visual users.',
}.freeze

EDU_ROLES = {
'use' => 'Students',
'admin-dev' => 'Galaxy Administrators',
'basics' => 'Students',
'data-science' => 'Data-Science Students',
'instructors' => 'Instructors',
}

##
# Generate the Dublin Core metadata for a material.
# Parmaeters:
Expand Down Expand Up @@ -131,21 +140,36 @@ def generate_org_jsonld(id, contributor, site)
end

def generate_funder_jsonld(id, contributor, site)
organization = {
'@context': 'https://schema.org',
'@type': 'Grant',
identifier: contributor['funding_id'],
url: contributor['url'] || Gtn::Contributors.fetch_funding_url(contributor),
funder: {
organization = [
{
'@context': 'https://schema.org',
'@type': 'Organization',
'http://purl.org/dc/terms/conformsTo': {
'@id': 'https://bioschemas.org/profiles/Organization/0.3-DRAFT',
'@type': 'CreativeWork'
},
name: Gtn::Contributors.fetch_name(site, id),
description: contributor.fetch('funding_statement',
'An organization supporting the Galaxy Training Network'),
url: Gtn::Contributors.fetch_funding_url(contributor),
description: contributor.fetch('funding_statement', 'An organization supporting the Galaxy Training Network'),
url: contributor.fetch('url', "https://training.galaxyproject.org/training-material/hall-of-fame/#{id}/"),
logo: contributor.fetch('avatar', "https://github.com/#{id}.png"),
},
{
'@context': 'https://schema.org',
'@type': 'Grant',
identifier: contributor['funding_id'],
url: contributor['url'] || Gtn::Contributors.fetch_funding_url(contributor),
funder: {
'@type': 'Organization',
name: contributor['funder_name'],
description: contributor.fetch('funding_statement',
'An organization supporting the Galaxy Training Network'),
url: Gtn::Contributors.fetch_funding_url(contributor),
}
}
}
organization['startDate'] = contributor['start_date'] if contributor.key?('start_date')
organization['endDate'] = contributor['end_date'] if contributor.key?('end_date')
]

organization[1]['startDate'] = contributor['start_date'] if contributor.key?('start_date')
organization[1]['endDate'] = contributor['end_date'] if contributor.key?('end_date')

organization
end
Expand Down Expand Up @@ -260,7 +284,7 @@ def to_jsonld(material, topic, site)
# "associatedMedia":,
audience: {
'@type': 'EducationalAudience',
educationalRole: 'Students'
educationalRole: EDU_ROLES[topic['type']]
},
# "audio":,
# "award":,
Expand Down Expand Up @@ -344,7 +368,6 @@ def to_jsonld(material, topic, site)
# "alternateName":,
# "description" described below
# "disambiguatingDescription":,
identifier: site['github_repository'],
# "image":,
# "mainEntityOfPage":,
# "name" described below
Expand All @@ -353,8 +376,11 @@ def to_jsonld(material, topic, site)
# "subjectOf":,
# "url" described below
workTranslation: [],
creativeWorkStatus: material['draft'] ? 'Under development' : 'Active',
creativeWorkStatus: material['draft'] ? 'Draft' : 'Active',
}

data['identifier'] = "https://gxy.io/GTN:#{material['short_id']}" if material.key?('short_id')

data.update(A11Y)

# info depending if tutorial, hands-on or slide level
Expand All @@ -367,15 +393,17 @@ def to_jsonld(material, topic, site)
data['isPartOf'] = topic_desc

if (material['name'] == 'tutorial.md') || (material['name'] == 'slides.html')
if material['name'] == 'tutorial.md'
data['learningResourceType'] = 'hands-on tutorial'
data['name'] = "Hands-on for '#{material['title']}' tutorial"
else
data['learningResourceType'] = 'slides'
data['name'] = "Slides for '#{material['title']}' tutorial"
end
data['learningResourceType'] = if material['name'] == 'tutorial.md'
'hands-on tutorial'
else
'slides'
end
data['name'] = material['title']
data['url'] = "#{site['url']}#{site['baseurl']}#{material['url']}"

# Requires https://github.com/galaxyproject/training-material/pull/4271
data['version'] = Gtn::ModificationTimes.obtain_modification_count(material['path'])

# Time required
if material.key?('time_estimation') && !material['time_estimation'].nil?
data['timeRequired'] = "PT#{material['time_estimation'].upcase}"
Expand All @@ -397,8 +425,7 @@ def to_jsonld(material, topic, site)
end

# Keywords
data['keywords'] = [topic['name']] + (material['tags'] || [])
data['keywords'] = data['keywords'].join(', ')
data['keywords'] = [topic['title']] + (material['tags'] || [])
# Zenodo links
if material.key?('zenodo_link')
mentions.push({
Expand Down Expand Up @@ -517,6 +544,13 @@ def to_jsonld(material, topic, site)
data['author'] = authors
end

# Add non-author contributors
if material.key?('contributions')
data['contributor'] = Gtn::Contributors.get_non_authors(material).map do |x|
generate_person_jsonld(x, site['data']['contributors'][x], site)
end
end

about = []
about.push(topic_desc)
edam_terms = topic.fetch('edam_ontology', []) | material.fetch('edam_ontology', [])
Expand Down
10 changes: 7 additions & 3 deletions bin/workflows-fetch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ def fetch_workflows(server)
http.request(request)
end

JSON.parse(response.body).map do |w|
w['server'] = server
w
begin
JSON.parse(response.body).map do |w|
w['server'] = server
w
end
rescue StandardError
[]
end
end

Expand Down
Loading