This repository has been archived by the owner on Jan 13, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy patherudite_server.py
187 lines (144 loc) · 5.72 KB
/
erudite_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import subprocess
import os
import nltk.data
import threading
from flask import Flask
from flask import Response
from flask import request
from flask import jsonify
from flask_cors import CORS
from werkzeug import secure_filename
from Helpers import file_extraction
from Helpers import deployment_utils as deploy
from IR import infoRX
from Models import abcnn_model
from Models import AnsSelect
from Models import DT_RNN
class EdXServer():
status = {}
def __init__(self):
self.file = ''
self.context = []
self.query = ''
@classmethod
def update(cls, value):
cls.status = value
def get_file(self, filename):
# print(filename)
self.file = os.path.join(app.config['UPLOAD_FOLDER'], filename)
self.context = file_extraction.extract_file_contents(self.file)
print(self.context)
if len(self.context) > 0:
self.context = self.context[0]
return True
return False
# return True #TODO: remove before deployment
def get_query(self, query):
self.query = query
print(self.query)
# Filter top 5 paras using Info Retrieval
self.update({'val': 'Ranking Paragraphs using Information Retrieval.'})
para_select = infoRX.retrieve_info(self.context, self.query)
para_sents = []
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
print(type(para_select[0]), para_select[0])
self.update({'val': 'Tokenizing top ranked paragraphs'})
for para in para_select:
para_sents.extend(tokenizer.tokenize(para[0]))
print('Sentences selected by IR Module:')
print(para_sents)
val_list = []
for sent in para_sents:
val_list.append({'word': sent, 'score': '\b\b'})
self.update({'val': 'Sentences selected by IR Module', 'answers': val_list})
try:
# Select Ans Sents - ABCNN
self.update({'val': 'Ranking Candidate Answer Sentences.'})
abcnn = abcnn_model()
ans_sents = abcnn.ans_select(query, para_sents)
val_list = []
for sentence,score in ans_sents:
val_list.append({'word': sentence, 'score': score[0]})
self.update(
{
'val': 'Sentences scored by Sentence Selection Module',
'answers': val_list,
},
)
print('\nSystem: Sentences scored by Sentence Selection Module')
for sentence,score in ans_sents:
print('{0:50}\t{1}'.format(sentence, score[0]))
print('')
self.update({'val': 'Generating VDT and extracting Answer.'})
best_ans, score, answers = deploy.extract_answer_from_sentences(
ans_sents,
query,
verbose=True,
)
except Exception as e:
return {'answers': [{'word': 'ERROR', 'score': str(e)}]}
# Ignore: Phase 2-3: Input Module and Answer Module
# answers = []
# for ans, a_score in ans_sents.iteritems():
# words = deploy.extract_answer_from_sentence(ans, self.query)
# words = sorted(words, key=operator.itemgetter(1))
# for word, w_score in words.iteritems()[:5]:
# answers.append((word, w_score * a_score))
# answers = sorted(answers, key=operator.itemgetter(1))
# proc = subprocess.Popen(['python','test.py',query],shell=False,stdout=subprocess.PIPE)
ans_list = []
print('\nSystem: Candidate answers scored by Answer Extraction Module')
for x in answers[:5]:
print('{0:10}\t{1}'.format(x[0], float(x[1][0])))
ans_list.append({'word':x[0], 'score': float(x[1][0])})
ans_dict = {'val': 'Candidate answers scored by Answer Extraction Module', 'answers': ans_list}
return ans_dict
app = Flask(__name__)
app2 = Flask(__name__)
server = EdXServer()
CORS(app, origins="http://localhost:5000", allow_headers=[
"Content-Type", "Authorization", "Access-Control-Allow-Origin"],
supports_credentials=True, intercept_exceptions=False)
CORS(app2, origins="http://localhost:5001", allow_headers=[
"Content-Type", "Authorization", "Access-Control-Allow-Origin"],
supports_credentials=True, intercept_exceptions=False)
app.config['UPLOAD_FOLDER'] = os.path.join('./data/uploads')
@app.route('/filed',methods=['POST'])
def filer():
print('here')
# data = request.get_json(force=True)
# filename = data['filename']
# file = data['file']
if not os.path.exists(app.config['UPLOAD_FOLDER']):
os.makedirs(app.config['UPLOAD_FOLDER'])
f = request.files['file']
f.save(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename)))
print(f)
if server.get_file(f.filename):
resp = Response('File uploaded. Context Ready.')
else:
resp = Response('Error in file upload.')
resp.headers['Access-Control-Allow-Origin'] = '*'
return resp
@app2.route('/query',methods=['POST'])
def queried():
query = request.get_json(force=True)['query']
# resp = Response(server.get_query(query))
resp = jsonify(server.get_query(query))
resp.headers['Access-Control-Allow-Origin'] = '*'
return resp
@app.route('/status', methods=['POST'])
def status():
print(EdXServer.status)
resp = jsonify(EdXServer.status)
resp.headers['Access-Control-Allow-Origin'] = '*'
return resp
def start1(port):
app.run(port=port)
def start2(port):
app2.run(port=port)
if __name__ == '__main__':
t1 = threading.Thread(target=start1, args=(5000,))
t2 = threading.Thread(target=start2, args=(5001,))
t1.start()
t2.start()