Bhaskar2611 commited on
Commit
96be83b
·
verified ·
1 Parent(s): 8c459b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -100
app.py CHANGED
@@ -1,73 +1,12 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
64
-
65
  # import gradio as gr
66
  # from huggingface_hub import InferenceClient
67
 
 
 
 
68
  # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
69
 
70
- # # Function to respond to user input while maintaining chat history
71
  # def respond(
72
  # message,
73
  # history: list[tuple[str, str]],
@@ -76,43 +15,37 @@ if __name__ == "__main__":
76
  # temperature,
77
  # top_p,
78
  # ):
79
- # try:
80
- # # Append system message at the start
81
- # messages = [{"role": "system", "content": system_message}]
82
-
83
- # # Append the chat history
84
- # for user_msg, bot_reply in history:
85
- # if user_msg:
86
- # messages.append({"role": "user", "content": user_msg})
87
- # if bot_reply:
88
- # messages.append({"role": "assistant", "content": bot_reply})
89
-
90
- # # Add the latest user message
91
- # messages.append({"role": "user", "content": message})
92
-
93
- # response = ""
94
-
95
- # # Stream the response token by token to avoid loading delays
96
- # for message in client.chat_completion(
97
- # messages,
98
- # max_tokens=max_tokens,
99
- # stream=True,
100
- # temperature=temperature,
101
- # top_p=top_p,
102
- # ):
103
- # token = message.choices[0].delta.content
104
- # response += token
105
- # yield response
106
-
107
- # except Exception:
108
- # # Handle any error silently (without showing the error icon or message)
109
- # yield "An internal error occurred. But let's continue." # Custom message or silent
110
-
111
- # # Gradio interface for customizable chatbot behavior
112
  # demo = gr.ChatInterface(
113
- # fn=respond,
114
  # additional_inputs=[
115
- # gr.Textbox(value="You are an AI dermatologist Chatbot.", label="System message"),
116
  # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
117
  # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
118
  # gr.Slider(
@@ -125,7 +58,100 @@ if __name__ == "__main__":
125
  # ],
126
  # )
127
 
 
128
  # if __name__ == "__main__":
129
  # demo.launch()
130
 
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import gradio as gr
2
  # from huggingface_hub import InferenceClient
3
 
4
+ # """
5
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ # """
7
  # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+
10
  # def respond(
11
  # message,
12
  # history: list[tuple[str, str]],
 
15
  # temperature,
16
  # top_p,
17
  # ):
18
+ # messages = [{"role": "system", "content": system_message}]
19
+
20
+ # for val in history:
21
+ # if val[0]:
22
+ # messages.append({"role": "user", "content": val[0]})
23
+ # if val[1]:
24
+ # messages.append({"role": "assistant", "content": val[1]})
25
+
26
+ # messages.append({"role": "user", "content": message})
27
+
28
+ # response = ""
29
+
30
+ # for message in client.chat_completion(
31
+ # messages,
32
+ # max_tokens=max_tokens,
33
+ # stream=True,
34
+ # temperature=temperature,
35
+ # top_p=top_p,
36
+ # ):
37
+ # token = message.choices[0].delta.content
38
+
39
+ # response += token
40
+ # yield response
41
+
42
+ # """
43
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
+ # """
 
 
 
 
 
 
45
  # demo = gr.ChatInterface(
46
+ # respond,
47
  # additional_inputs=[
48
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
  # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
  # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
  # gr.Slider(
 
58
  # ],
59
  # )
60
 
61
+
62
  # if __name__ == "__main__":
63
  # demo.launch()
64
 
65
 
66
+ import gradio as gr
67
+ from huggingface_hub import InferenceClient
68
+
69
+ # Initialize the client with your desired model
70
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
71
+
72
+ # Format the conversation prompt with history
73
+ def format_prompt(message, history):
74
+ prompt = "<s>" # Beginning of sequence for formatting
75
+ for user_prompt, bot_response in history:
76
+ prompt += f"[INST] {user_prompt} [/INST]"
77
+ prompt += f" {bot_response}</s> "
78
+ prompt += f"[INST] {message} [/INST]" # Format current user message
79
+ return prompt
80
+
81
+ # Function to generate responses while keeping conversation context
82
+ def generate(
83
+ prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0
84
+ ):
85
+ temperature = float(temperature)
86
+ if temperature < 1e-2:
87
+ temperature = 1e-2
88
+ top_p = float(top_p)
89
+
90
+ generate_kwargs = dict(
91
+ temperature=temperature,
92
+ max_new_tokens=max_new_tokens,
93
+ top_p=top_p,
94
+ repetition_penalty=repetition_penalty,
95
+ do_sample=True,
96
+ seed=42, # Seed for reproducibility
97
+ )
98
+
99
+ # Format the prompt with the history and current message
100
+ formatted_prompt = format_prompt(prompt, history)
101
+
102
+ # Stream the generated response
103
+ stream = client.text_generation(
104
+ formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
105
+ )
106
+ output = ""
107
+ for response in stream:
108
+ output += response.token.text
109
+ yield output # Yield the streamed output as it's generated
110
+
111
+ # Customizable input controls for the chatbot interface
112
+ additional_inputs = [
113
+ gr.Slider(
114
+ label="Temperature",
115
+ value=0.9,
116
+ minimum=0.0,
117
+ maximum=1.0,
118
+ step=0.05,
119
+ interactive=True,
120
+ info="Higher values produce more diverse outputs",
121
+ ),
122
+ gr.Slider(
123
+ label="Max new tokens",
124
+ value=256,
125
+ minimum=0,
126
+ maximum=1048,
127
+ step=64,
128
+ interactive=True,
129
+ info="The maximum numbers of new tokens",
130
+ ),
131
+ gr.Slider(
132
+ label="Top-p (nucleus sampling)",
133
+ value=0.90,
134
+ minimum=0.0,
135
+ maximum=1,
136
+ step=0.05,
137
+ interactive=True,
138
+ info="Higher values sample more low-probability tokens",
139
+ ),
140
+ gr.Slider(
141
+ label="Repetition penalty",
142
+ value=1.2,
143
+ minimum=1.0,
144
+ maximum=2.0,
145
+ step=0.05,
146
+ interactive=True,
147
+ info="Penalize repeated tokens",
148
+ )
149
+ ]
150
+
151
+ # Define the chatbot interface with interactive sliders and chatbot panel
152
+ gr.ChatInterface(
153
+ fn=generate,
154
+ chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
155
+ additional_inputs=additional_inputs,
156
+ title="""AI Dermatologist Chatbot"""
157
+ ).launch(show_api=False)