-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
194 lines (168 loc) · 7.24 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import gradio as gr
import spaces
from chatbot import model_inference, EXAMPLES, chatbot
from voice_chat import respond
from MyChatInterface import MyChatInterface
from groq import Groq
# Define Gradio theme
theme = gr.themes.Soft(
primary_hue="sky",
secondary_hue="violet",
neutral_hue="gray",
font=[gr.themes.GoogleFont('orbitron')]
)
# Create Gradio blocks for different functionalities
# Chat interface block
with gr.Blocks(
css=""".gradio-container .avatar-container {height: 40px width: 40px !important;} #duplicate-button {margin: auto; color: white; background: #f1a139; border-radius: 100vh; margin-top: 2px; margin-bottom: 2px;}""",
) as chat:
gr.Markdown("### Image Chat, Image Generation, Image classification and Normal Chat")
# gr.ChatInterface(
# fn=model_inference,
# chatbot = chatbot,
# examples=EXAMPLES,
# multimodal=True,
# cache_examples=False,
# autofocus=False,
# concurrency_limit=10,
# )
MyChatInterface(
fn=model_inference,
chatbot = chatbot,
examples=EXAMPLES,
multimodal=True,
cache_examples=False,
autofocus=False,
concurrency_limit=10,
)
# Voice chat block
# with gr.Blocks() as voice:
# gr.Markdown("# Try Voice Chatfrom Below Link:")
# gr.HTML("<a href='https://huggingface.co/spaces/KingNish/Voicee'>https://huggingface.co/spaces/KingNish/Voicee</a>")
def transcribe_and_stream(inputs, model_name="groq_whisper", show_info="show_info", language="english"):
def groq_whisper_tts(filename):
groq_client = Groq(api_key=GROQ_API_KEY)
with open(filename, "rb") as file:
transcriptions = groq_client.audio.transcriptions.create(
file=(filename, file.read()),
model="whisper-large-v3-turbo",
response_format="json",
temperature=0.0
)
print("transcribed text:", transcriptions.text)
print("********************************")
return transcriptions.text
if inputs is not None and inputs!="":
if show_info=="show_info":
gr.Info("Processing Audio", duration=1)
text = groq_whisper_tts(inputs)
# stream text output
# for i in range(len(text)):
# time.sleep(0.01)
# yield text[: i + 10]
return text
else:
return ""
def aya_speech_text_response(text):
if text is not None and text!="":
groq_client = Groq(api_key=GROQ_API_KEY)
stream = groq_client.chat.completions.create(
model="llama3-70b-8192",
messages=[ ####改为self.messages TODO
{
"role": "system",
"content": "You are an expert writer. Generate a long, comprehensive, structured chapter for the section provided. If additional instructions are provided, consider them very important. Only output the content.",
},
{
"role": "user",
"content": f"Generate a long, comprehensive, structured chapter. Use the following section and important instructions:\n\n<section_title>{prompt}</section_title>\n\n<additional_instructions>{additional_instructions}</additional_instructions>",
},
],
temperature=0.3,
max_tokens=8000,
top_p=1,
stream=True,
stop=None,
)
###debug TODO
for temp_token in stream:
yield temp_token
#output = ""
# for event in stream:
# if event:
# if event.event_type == "text-generation":
# output+=event.text
# cleaned_output = clean_text(output)
# yield cleaned_output
else:
return ""
###TODO debug
def convert_text_to_speech(text, language="english"):
# Text-to-speech function
import edge_tts
import tempfile
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, gr.Warning("Please enter text to convert.")
if not voice:
return None, gr.Warning("Please select a voice.")
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
# do language detection to determine voice of speech response
if text is not None and text!="":
# clean text before doing language detection
# cleaned_text = clean_text(text, remove_bullets=True, remove_newline=True)
audio_path = text_to_speech(text)
return audio_path
else:
return None
with gr.Blocks() as voice:
with gr.Row():
with gr.Column():
e2e_audio_file = gr.Audio(sources="microphone", type="filepath", min_length=None)
e2_audio_submit_button = gr.Button(value="Get Aya's Response", variant="primary")
clear_button_microphone = gr.ClearButton()
with gr.Column():
e2e_audio_file_trans = gr.Textbox(lines=3,label="Your Input", autoscroll=False, show_copy_button=True, interactive=False)
e2e_audio_file_aya_response = gr.Textbox(lines=3,label="Aya's Response", show_copy_button=True, container=True, interactive=False)
e2e_aya_audio_response = gr.Audio(type="filepath", label="Aya's Audio Response")
e2_audio_submit_button.click(
transcribe_and_stream,
inputs=[e2e_audio_file],
outputs=[e2e_audio_file_trans],
show_progress="full",
).then(
aya_speech_text_response,
inputs=[e2e_audio_file_trans],
outputs=[e2e_audio_file_aya_response],
show_progress="full",
).then(
convert_text_to_speech,
inputs=[e2e_audio_file_aya_response],
outputs=[e2e_aya_audio_response],
show_progress="full",
)
clear_button_microphone.click(lambda: None, None, e2e_audio_file)
clear_button_microphone.click(lambda: None, None, e2e_aya_audio_response)
clear_button_microphone.click(lambda: None, None, e2e_audio_file_aya_response)
clear_button_microphone.click(lambda: None, None, e2e_audio_file_trans)
with gr.Blocks() as image:
gr.HTML("<iframe src='https://kingnish-image-gen-pro.hf.space' width='100%' height='2000px' style='border-radius: 8px;'></iframe>")
with gr.Blocks() as instant2:
gr.HTML("<iframe src='https://kingnish-instant-video.hf.space' width='100%' height='3000px' style='border-radius: 8px;'></iframe>")
with gr.Blocks() as video:
gr.Markdown("""More Models are coming""")
gr.TabbedInterface([ instant2], ['Instant🎥'])
# Main application block
with gr.Blocks(theme=theme, title="OpenGPT 4o DEMO") as demo:
gr.Markdown("# OpenGPT 4o")
gr.TabbedInterface([chat, voice, image, video], ['💬 SuperChat','🗣️ Voice Chat', '🖼️ Image Engine', '🎥 Video Engine'])
demo.queue(max_size=300)
# demo.launch(share=True)
demo.launch()