Skip to content

Commit

Permalink
#13 introduced main language; #10 improving
Browse files Browse the repository at this point in the history
#13 : providing users with the opportunity to select a main language when inserting a primary key label
#10 : currently adapting the entire Intermediate Templates mechanism to newly introduced features
  • Loading branch information
Sebastiano-G committed Mar 28, 2024
1 parent 009b3c5 commit 22c7d71
Show file tree
Hide file tree
Showing 9 changed files with 417 additions and 224 deletions.
3 changes: 2 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,9 +988,10 @@ def GET(self, name):
with open(res_template) as tpl_form:
fields = json.load(tpl_form)
try:
title = [data[k][0] for k,v in data.items() \
title_field = [v for k,v in data.items() \
for field in fields if (field['disambiguate'] == "True" \
and k == field['id'])][0]
title = [lang_value for lang_value in title_field if len(lang_value) == 3][0]
except Exception as e:
title = "No title"
properties = {field["label"]:[field["property"], field["type"], field["view_class"]] for field in fields if 'property' in field}
Expand Down
1 change: 1 addition & 0 deletions forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def get_form(json_form, from_dict=False, subtemplate=False):
if "disambiguate" in field and field["disambiguate"] == 'True':
#vpass = form.regexp(r".{0,200}$", 'must be between 1 and 200 characters') # TODO: check the regex (either set it to {0, 200} or remove it in case of Subtemplates' primary keys)
params = params + (form.Textbox(myid, #vpass,
type='text',
description = description,
id=myid,
placeholder=placeholder,
Expand Down
100 changes: 56 additions & 44 deletions mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def getValuesFromFields(fieldPrefix, recordData, fields=None, field_type=None):
result_dict = {'type':'URI'}
results = set()
for key, value in recordData.items():
if key.startswith(fieldPrefix+'-'): # multiple values from text box (wikidata) + URL
if key.startswith(fieldPrefix+'_'): # multiple values from text box (wikidata) + URL
values = value.split(',', 1)
results.add(( values[0].strip(), urllib.parse.unquote(values[1]) )) # (id, label)
elif key == fieldPrefix and field_type != 'Textarea': # uri from dropdown (single value from controlled vocabulary) + URL
Expand All @@ -66,7 +66,10 @@ def getValuesFromTextualFields(fieldPrefix, recordData):
for key, value in recordData.items():
if key.startswith(fieldPrefix+'_'):
lang = key.rsplit('_')[1]
results.add((value,lang))
if lang == 'mainLang':
result_dict['mainLang'] = value
else:
results.add((value,lang))
result_dict['results'] = results
return result_dict

Expand All @@ -76,7 +79,7 @@ def getRightURIbase(value):

def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=None,tpl_form=None):
""" transform input data into RDF, upload data to the triplestore, dump data locally """

print("RECORD:", recordData)
# MAPPING FORM / PROPERTIES
if tpl_form:
with open(tpl_form) as config_form:
Expand Down Expand Up @@ -114,6 +117,7 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non

# retrieve hidden triples (to be saved) and re-introduce them in the modified named graph
to_be_saved = queries.saveHiddenTriples(graphToClear, tpl_form)
print(type(to_be_saved))
if to_be_saved['results']['bindings'] != [{}]:
for binding in to_be_saved['results']['bindings']:
subject = URIRef(binding['subject']['value'])
Expand Down Expand Up @@ -150,12 +154,13 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
else getValuesFromTextualFields(field['id'], recordData) if 'value' in field and field['value'] == 'Literal' else recordData[field['id']]
# TODO disambiguate as URI, value
if field["disambiguate"] == 'True': # use the key 'disambiguate' as title of the graph
main_value = [label for label in value['results'] if label[1] == conf.mainLang][0]
main_label = main_value[0]
main_lang = main_value[1]
main_lang = value['mainLang']
main_value = [label for label in value['results'] if label[1] == main_lang]
main_value = list(value['results']) if len(main_value) == 0 else main_value
main_label = main_value[0][0]
wd.add(( URIRef(base+graph_name+'/'), URIRef(field['property']), Literal(main_label, lang=main_lang) ))
wd.add(( URIRef(base+graph_name), RDFS.label, Literal(main_label) ))
wd.add(( URIRef(base+graph_name+'/'), RDFS.label, Literal(main_label) ))
wd.add(( URIRef(base+graph_name), RDFS.label, Literal(main_label, lang=main_lang) ))
wd.add(( URIRef(base+graph_name+'/'), RDFS.label, Literal(main_label, lang=main_lang) ))

# the main entity has the same URI of the graph but the final /

Expand Down Expand Up @@ -188,15 +193,14 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
entityURI = getRightURIbase(entity[0]) # Wikidata or new entity
wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(entityURI) ))
wd.add(( URIRef( entityURI ), RDFS.label, Literal(entity[1].lstrip().rstrip(), datatype="http://www.w3.org/2001/XMLSchema#string") ))
elif value['type'] == 'Literal':
elif value['type'] == 'Literal': #multi-language Literals
for literal in value['results']:
val, lang = literal
val = val.replace('\n','').replace('\r','')
wd.add(( URIRef(base+graph_name), URIRef(field['property']), Literal(val, lang=lang)))
# now get also the entities associated to textareas (record creation)
if field['type'] == 'Textarea':
value = getValuesFromFields(field['id'], recordData, fields, 'Textarea')
print('HERE', value)
for entity in value['results']:
entityURI = getRightURIbase(entity[0])+entity[0]
wd.add(( URIRef(base+graph_name), SCHEMA.keywords, URIRef(entityURI) ))
Expand All @@ -221,25 +225,25 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
wd_extraction.serialize(destination='records/'+recordID+"-extraction-"+str(graph['internalID'])+'.ttl', format='ttl', encoding='utf-8')
server.update('load <file:///'+dir_path+'/records/'+recordID+"-extraction-"+str(graph['internalID'])+'.ttl> into graph <'+base+extraction_graph_name+'/>')
# SUBTEMPLATE
elif field['type']=="Subtemplate":
elif field['type']=="Subtemplate" and field['id'] in recordData:
print(recordData[field['id']])
if type(recordData[field['id']]) != type([]) and field['id']+"-subrecords" in recordData:
# get the list of subrecords associated to a 'Subtemplate' field
subrecords = recordData[field['id']+"-subrecords"].split(",") if recordData[field['id']+"-subrecords"] != "" else []
for subrecord in subrecords:
if ";" in subrecord:
subrecord_id, retrieved_label = subrecord.split(";",1)
else:
# process a new subrecord, send its data to the triplestore, and link it to the main record
subrecord_id = subrecord
subrecord_template = field['import_subtemplate']
label_id = find_label(subrecord_template)
label_value = [recordData[label] for label in recordData[subrecord].split(",") if label == label_id+"__"+subrecord or (label == label_id and recordData[label] != '')]
retrieved_label = label_value[0] if len(label_value) > 0 else field['label']+"-"+subrecord
process_new_subrecord(recordData,userID,stage,knowledge_extraction,subrecord_template,subrecord)
processed_subrecord = process_new_subrecord(recordData,userID,stage,knowledge_extraction,subrecord_template,subrecord)
subrecord_id, retrieved_label = processed_subrecord
wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(base+subrecord_id) ))
wd.add(( URIRef(base+subrecord_id), RDFS.label, Literal(retrieved_label, datatype="http://www.w3.org/2001/XMLSchema#string")))
else:
for entity in recordData[field['id']]:
entity_URI, entity_label = entity.split(",",1)
entity_URI, entity_label = entity
wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(base+entity_URI) ))
wd.add(( URIRef(base+entity_URI), RDFS.label, Literal(entity_label, datatype="http://www.w3.org/2001/XMLSchema#string")))

Expand All @@ -262,45 +266,53 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
return 'records/'+recordID+'.ttl'

def process_new_subrecord(data, userID, stage, knowledge_extraction, sub_tpl, subrecord_id):
subrecord_fields = data[subrecord_id].split(",")
# prepare a new dict to store data of subrecord-x
new_record_data = {'recordID': subrecord_id,}

with open(sub_tpl) as fields:
subtemplate = json.load(fields)
# process the input data related to subrecord-x
for subrecord_field in subrecord_fields:
subrecord_field__base_id = subrecord_field.split("__")[0]
for subtemplate_field in subtemplate:
subfield_id = subtemplate_field['id']

# check inner subrecords (subrecord-y associated with subrecord-x's 'Subtemplate' field
if subrecord_field+"-subrecords" in data:
new_record_data[subrecord_field__base_id] = [[]]
inner_subtemplate = [key['import_subtemplate'] for key in subtemplate if key['id'] == subrecord_field__base_id][0]
for inner_subrecord in data[subrecord_field + "-subrecords"].split(","):
if ";" in inner_subrecord:
processed_subrecord = inner_subrecord.replace(';', ",", 1)
else:
processed_subrecord = process_new_subrecord(data,userID,stage,knowledge_extraction,inner_subtemplate,inner_subrecord)
new_record_data[subrecord_field__base_id][0].append(processed_subrecord)
# check single value fields (e.g. Date/Literal)
elif data[subrecord_field] != "":
key = subrecord_field.split("__")[0]
new_record_data[key] = data[subrecord_field]
# check multiple values fields (e.g. Entities, SKOS Vocabs)
else:
multiple_values = [key for key in data if key.startswith(subrecord_field+"-")]
if multiple_values != []:
for value in multiple_values:
new_key = value.split("__")[0] + "-" + value.split("-")[-1]
new_record_data[new_key] = data[value]
else:
new_record_data[subrecord_field.split("__")[0]] = ""
# Subtemplate
if subtemplate_field['type'] == 'Subtemplate':
key = subfield_id+"_"+subrecord_id
# Process inner-subrecords and retrieve their ids,labels in order to provide a link to them in the upper-level subrecord
if key+"-subrecords" in data:
new_record_data[subfield_id] = [[]]
inner_subtemplate = subtemplate_field['import_subtemplate']
for inner_subrecord in data[key+"-subrecords"].split(","):
if ";" in inner_subrecord:
processed_subrecord = inner_subrecord.replace(';', ",", 1)
else:
processed_subrecord = process_new_subrecord(data,userID,stage,knowledge_extraction,inner_subtemplate,inner_subrecord)
new_record_data[subfield_id][0].append(processed_subrecord) # store the id,label pair inside the subrecord dict

# Date
elif subtemplate_field['type'] == 'Date':
key = subtemplate_field['id']+"_"+subrecord_id
new_record_data[subtemplate_field['id']] = data[key]

# Multiple values fields: Literals or URI
elif subtemplate_field['value'] == 'Literal' or subtemplate_field['value'] in ['URI','URL']:
keys = [input_id for input_id in data.keys() if input_id.startswith(subtemplate_field['id']+"_") and input_id.endswith("_"+subrecord_id)]
for key in keys:
shortened_key = key.rsplit("_",1)[0]
new_record_data[shortened_key] = data[key]

# Label: disambiguate field
if subtemplate_field['disambiguate'] == "True":
main_lang_input_field = subfield_id+'_mainLang_'+subrecord_id
main_lang = data[main_lang_input_field] if main_lang_input_field in data else "No main lang"
label_input_field = subfield_id+"_"+main_lang+"_"+subrecord_id
label = data[label_input_field] if label_input_field in data else "No label"

label = new_record_data[find_label(sub_tpl)]
print("\nDATA:\n",new_record_data)
store_data = storify(new_record_data)
grapht_to_clear = None if stage == 'not modified' else base+subrecord_id+"/"
inputToRDF(store_data,userID,stage,knowledge_extraction,graphToClear=grapht_to_clear,tpl_form=sub_tpl)
result = subrecord_id+","+label
result = [subrecord_id,label]
return result

def find_label(tpl):
Expand Down
12 changes: 7 additions & 5 deletions queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,16 @@ def compare_sublists(l, lol):
return False

data = defaultdict(list)
print("#DATA:\n", results)
for result in results["results"]["bindings"]:
result.pop('subject',None)
result.pop('graph_title',None)
label = result.pop('graph_title',None)
for k,v in result.items():
if '_label' not in k and v['type'] == 'literal': # string values
if v['value'] not in data[k]: # unique values
data[k].append(v['value'])
value = v['value']
if 'xml:lang' in v:
value = (v['value'],v['xml:lang'],'mainLang') if v['value']==label['value'] and v['xml:lang']==label['xml:lang'] else (v['value'],v['xml:lang'])
if value not in data[k]:
data[k].append(value)
elif v['type'] == 'uri': # uri values

if k+'_label' in result:
Expand Down Expand Up @@ -362,7 +364,7 @@ def saveHiddenTriples(graph, tpl):
with open(tpl) as template:
fields = json.load(template)

results = []
results = {'results': {'bindings': [{}]}}
hidden_fields = [field for field in fields if field['hidden'] == 'True']
patterns = [ 'OPTIONAL {?subject <'+hidden_field['property']+'> ?'+hidden_field['id']+'. ?subject ?'+hidden_field['id']+'_property ?'+hidden_field['id']+'}. ' if hidden_field['value'] in ['Literal','Date','gYearMonth','gYear','URL'] else 'OPTIONAL {?subject <'+hidden_field['property']+'> ?'+hidden_field['id']+'. ?'+hidden_field['id']+' rdfs:label ?'+hidden_field['id']+'_label . ?subject ?'+hidden_field['id']+'_property ?'+hidden_field['id']+'}.' for hidden_field in hidden_fields if 'value' in hidden_field and hidden_field['hidden'] == 'True']
if patterns != []:
Expand Down
16 changes: 10 additions & 6 deletions static/css/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -1417,7 +1417,7 @@ footer .articleSubtitle {
}

option[selected='selected'] {
background-color: rgba(100, 23, 180, 1);
background-color: #ced4da;
}

/* Style the arrow inside the select element: */
Expand Down Expand Up @@ -2126,7 +2126,6 @@ input[subtemplate]+.fa-plus-circle {
line-height: 1.5;
padding: .375rem 1.75rem .375rem .75rem;
}

.lang-item:first-of-type {
padding-left: 0.8em !important;
}
Expand All @@ -2136,7 +2135,6 @@ input[subtemplate]+.fa-plus-circle {
text-decoration: none !important;
cursor: pointer;
}

.lang-item.selected-lang, .lang-item:hover {
text-decoration: underline !important;
color: rgb(100, 23, 180) !important;
Expand All @@ -2151,7 +2149,6 @@ input[subtemplate]+.fa-plus-circle {
border: solid 1px black;
border-radius: 5px;
}

#lang-form:after {
content: " ";
position: absolute;
Expand All @@ -2162,17 +2159,24 @@ input[subtemplate]+.fa-plus-circle {
border-left: 10px solid transparent;
border-bottom: 10px solid rgba(100, 23, 180, 1);
}

#lang-form section:first-child div {
z-index: 1001;
position: relative;
}

#lang-form section:nth-child(2) div {
z-index: 1000;
position: relative;
}
#lang-form .current-lang {
background-color: #ced4da;
}

.info-language .lang-item {
font-size: 0.9em !important;
}
.info-language .lang-item:first-of-type {
padding-left: 0px !important;
}

/* extra */
.fa-globe, .fa-plus-circle, .fa-eye, .fa-trash{color: rgba(100,23,180,1); cursor: pointer;}
Expand Down
Loading

0 comments on commit 22c7d71

Please sign in to comment.