Skip to content

Commit

Permalink
#10 Introduced duplicate rubrecords; #7 solved
Browse files Browse the repository at this point in the history
#10 : The recursion function dedicated to managing the subrecords produced (mapping.py) has been optimized to make any contributions faster and to reduce the number of steps required. On the Record creation page, it is now possible to select subrecords already created for the same OWL class, including subrecords not yet saved and created on the same page (main.js)
#7 Fixed problems with homonyms and creation of previously deleted templates. The feature will need to be updated when multiple classes for templates are introduced.
  • Loading branch information
Sebastiano-G committed Feb 22, 2024
1 parent 4785e2f commit 0acf30f
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 74 deletions.
10 changes: 6 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def GET(self, res_name):
with open(TEMPLATE_LIST,'r') as tpl_file:
tpl_list = json.load(tpl_file)

print(res_name)
res_type = [i['type'] for i in tpl_list if i["short_name"] == res_name][0]
res_full_name = [i['name'] for i in tpl_list if i["short_name"] == res_name][0]

Expand Down Expand Up @@ -513,6 +514,7 @@ def POST(self, page):
# create a new template
elif actions.action.startswith('createTemplate'):
print('create template')
print(actions)
is_git_auth = github_sync.is_git_auth()
res_type = actions.class_uri.strip() if "class_uri" in actions else conf.main_entity
res_name = actions.class_name.replace(' ','_').lower() if "class_name" in actions else "not provided"
Expand All @@ -524,9 +526,9 @@ def POST(self, page):
types = [t['type'] for t in templates]
now_time = str(time.time()).replace('.','-')
# check for duplicates
res_n = actions.class_name if (res_type not in types and res_name not in names) else actions.class_name+'_'+now_time
res_n, adress = (actions.class_name, res_name) if (res_type not in types and res_name not in names) else (actions.class_name+'_'+now_time, res_name+'_'+now_time)
u.updateTemplateList(res_n,res_type)
raise web.seeother(prefixLocal+'template-'+res_name)
raise web.seeother(prefixLocal+'template-'+adress)

# login or create a new record
else:
Expand Down Expand Up @@ -827,7 +829,7 @@ def POST(self, name):

# save the new record for future publication
if actions.action.startswith('save'):
if not f.validates() or not u.check_mandatory_fields(web.input()):
if not f.validates():
graphToRebuild = conf.base+name+'/'
recordID = name
data = queries.getData(graphToRebuild,templateID)
Expand Down Expand Up @@ -866,7 +868,7 @@ def POST(self, name):

# publish the record
elif actions.action.startswith('publish'):
if not f.validates() or not u.check_mandatory_fields(web.input()):
if not f.validates():
graphToRebuild = conf.base+name+'/'
recordID = name
data = queries.getData(graphToRebuild,templateID)
Expand Down
6 changes: 3 additions & 3 deletions forms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
import web , datetime , os, time, re, cgi , json
import web , datetime , os, time, re, cgi , json, html
from web import form
import conf

Expand Down Expand Up @@ -60,7 +60,7 @@ def get_form(json_form, from_dict=False, subtemplate=False):
description = field['label'] if 'label' in field and len(field['label']) > 0 else 'input'
pre_a = '<span class="tip" data-toggle="tooltip" data-placement="bottom" title="'
pre_b = '"><i class="fas fa-info-circle"></i></span>'
prepend = pre_a+field['prepend']+pre_b if 'prepend' in field and len(field['prepend']) > 0 else ''
prepend = pre_a+html.escape(field['prepend'])+pre_b if 'prepend' in field and len(field['prepend']) > 0 else ''
disabled = 'disabled' if 'disabled' in field and field['disabled'] == "True" else ''
classes = field['class'] if 'class' in field and len(field['class']) > 0 else ''
if 'vocab' in field:
Expand All @@ -87,7 +87,7 @@ def get_form(json_form, from_dict=False, subtemplate=False):
# Text box
if field['type'] in ['Textbox','Vocab', 'WebsitePreview']:
if "disambiguate" in field and field["disambiguate"] == 'True':
vpass = form.regexp(r".{1,200}$", 'must be between 1 and 200 characters')
vpass = form.regexp(r".{0,200}$", 'must be between 1 and 200 characters') # TODO: check the regex (either set it to {0, 200} or remove it in case of Subtemplates' primary keys)
params = params + (form.Textbox(myid, vpass,
description = description,
id=myid,
Expand Down
115 changes: 74 additions & 41 deletions mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def getValuesFromFields(fieldPrefix, recordData, fields=None, field_type=None):


def getRightURIbase(value):
return WD+value if value.startswith('Q') else GEO+value if value.isdecimal() else VIAF+value[4:] if value.startswith("viaf") else ''+value if value.startswith("http") else base+value
return WD+value if value.startswith('Q') else GEO+value if value.isdecimal() else VIAF+value[4:] if value.startswith("viaf") else ''+value if value.startswith("http") else base+value.lstrip().rstrip()


def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=None,tpl_form=None, subrecords_dict=None):
Expand Down Expand Up @@ -115,6 +115,7 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
print(subject, predicate, obj, label)
if label:
wd.add((obj, RDFS.label, Literal(label, datatype="http://www.w3.org/2001/XMLSchema#string")))

queries.clearGraph(graphToClear)
wd.add(( URIRef(base+graph_name+'/'), PROV.generatedAtTime, Literal(datetime.datetime.now(),datatype=XSD.dateTime) ))
wd.add(( URIRef(base+graph_name+'/'), URIRef('http://dbpedia.org/ontology/currentStatus'), Literal(stage, datatype="http://www.w3.org/2001/XMLSchema#string") ))
Expand Down Expand Up @@ -198,11 +199,25 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non
server.update('load <file:///'+dir_path+'/records/'+recordID+"-extraction-"+str(graph['internalID'])+'.ttl> into graph <'+base+extraction_graph_name+'/>')
# SUBTEMPLATE
elif field['type']=="Subtemplate":
# check potential duplications:
#doubled_values = check_double_subrecords(recordData) if not doubled_values else doubled_values

# handle imported entities from catalogue (not newly created ones)
imported_entities = [field_id for field_id in recordData if field_id.startswith(field['id']+"-") and "," in recordData[field_id]]
for imported_entity in imported_entities:
imported_entity_id, imported_entity_label = recordData[imported_entity].split(',')
imported_entity_label = urllib.parse.unquote(imported_entity_label)
entityURI = getRightURIbase(imported_entity_id)
print(entityURI)
wd.add(( URIRef(base+graph_name), URIRef(field['property']), URIRef(entityURI) ))
wd.add(( URIRef( entityURI ), RDFS.label, Literal(imported_entity_label.lstrip().rstrip(), datatype="http://www.w3.org/2001/XMLSchema#string") ))
subrecords = process_subrecords(recordData, field['id']) if not subrecords_dict else subrecords_dict
print("SUBRECORDS!!!!!!!!!!!!!!!\n", subrecords)
print("#### surbrecords:", subrecords)
if field['id'] in subrecords:
for subrecord_idx, subrecord in subrecords[field['id']].items():
ID = str(int(time.time() * 1000))
ct = datetime.datetime.now()
ts = ct.timestamp()
ID = str(ts).replace('.', '-')
subrecord['recordID'] = ID
label = find_label(field['import_subtemplate'], subrecord, field['label'])
inputToRDF(storify(subrecord),userID,stage,knowledge_extraction,tpl_form=field['import_subtemplate'],subrecords_dict=subrecord)
Expand All @@ -228,45 +243,63 @@ def inputToRDF(recordData, userID, stage, knowledge_extraction, graphToClear=Non

return 'records/'+recordID+'.ttl'

def check_double_subrecords(data):
results_dict = {
'targets': {},
'pointers' : {},
}
for key, value in data.items():
if value.startswith("target-"):
split_key = key.split("__")
new_key = split_key[0] + "__" + split_key[-1]
split_value = value.replace("target-", "").split("__")
new_value = split_value[0] + "__" + split_value[-1]
results_dict['targets'][new_value] = new_key
results_dict['pointers'][new_key] = new_value
return results_dict




# convert the dict of inputs into a series of nested dictionaries to be parsed as single records
def process_subrecords(data, id):
results = {}
created_subrecords = [key for key in data if key.startswith(id+"__")]
if created_subrecords != []:
for subrecord in created_subrecords:
add_results = {}
subrecord_split = subrecord.split('__')
prefix, num = subrecord_split[0], subrecord_split[-1]
subrecord_fields = data[subrecord].split(',')
inner_subrecords = [key for item in subrecord_fields for key in data.keys() if key.startswith(item + "__")]
for key in subrecord_fields:
if data[key] != "":
add_results[key.split('__')[0]] = data[key]
else:
inner_subrecords = [inner_subrecord for inner_subrecord in data.keys() if inner_subrecord.startswith(key + "__")]
if inner_subrecords != []:
for inner_subrecord in inner_subrecords:
inner_subrecord_split = inner_subrecord.split('__')
inner_prefix, inner_num = inner_subrecord_split[0], inner_subrecord_split[-1]
if inner_prefix in add_results:
add_results[inner_prefix][inner_num] = process_subrecords(data, inner_subrecord)
else:
add_results[inner_prefix] = {
inner_num: process_subrecords(data, inner_subrecord)
}
else:
imported_values = [import_key for import_key in data.keys() if import_key.startswith(key + "-")]
for imported_value in imported_values:
new_key = imported_value.split('__')[0] + "-" + imported_value.split('-')[-1]
add_results[new_key] = data[imported_value]
if prefix in results:
results[prefix][num] = add_results
else:
results[prefix] = { num: add_results }
elif data[id] != "":
for el in data[id].split(','):
results[el.split('__')[0]] = data[el]
return results
def process_subrecords(data, id, created_subrecords=None):
results = {}
subrecords = [key for key in data if key.startswith(id+"__") and not data[key].startswith("target-")] if created_subrecords == None else created_subrecords

for subrecord in subrecords:
subrecord_split = subrecord.split('__')
prefix, num = subrecord_split[0], subrecord_split[-1]
if prefix not in results:
results[prefix] = { num: {} }
else:
results[prefix][num] = {}
add_results = {}
subrecord_fields = data[subrecord].split(',')
for key in subrecord_fields:
if data[key].startswith("target-"):
add_results[key.replace("target-", "").split('__')[0]] = {key.split('__')[-1] : process_subrecords(data, data[key].replace("target-", "")) }
elif data[key] != "":
add_results[key.split('__')[0]] = data[key]
else:
multiple_values_fields = [import_key for import_key in data.keys() if import_key.startswith(key + "-")]
for imported_value in multiple_values_fields:
new_key = imported_value.split('__')[0] + "-" + imported_value.split('-')[-1]
add_results[new_key] = data[imported_value]
inner_subrecords = [item for item in data.keys() if item.startswith(key + "__") and not data[item].startswith("target-") ]
if inner_subrecords:
add_results[key.split('__')[0]] = process_subrecords(data, key, inner_subrecords)[key.split('__')[0]]
results[prefix][num] = add_results

if not subrecords and data[id] != "":
for el in data[id].split(','):
imported_resources = [field_id for field_id in data if field_id.startswith(el+"-")]
for imported_res in imported_resources:
results[imported_res.split('__')[0]+"-"+imported_res.split("-")[-1]] = data[imported_res]
results[el.split('__')[0]] = data[el]

return results


def find_label(tpl, subrecord, alternative_label):
print(tpl)
Expand Down
Loading

0 comments on commit 0acf30f

Please sign in to comment.