一、ChatGLM-6B介绍
ChatGLM2-6B是智谱AI及清华KEG实验室发布的中英双语对话模型。
二、下载ChatGLM2-6B
https://github.com/THUDM/ChatGLM2-6B
三、创建环境
3.1、conda create ChatGLM2B
3.2、进入ChatGLM2-6B
cd D:\workspace\opensource\openai\ChatGLM2-6B
3.3、安装chatglm2-6b
pip install requirements.txt
3.4、下载model
Huggingface上的项目地址:https://huggingface.co/THUDM/chatglm2-6b
部分代码需要在这里下载。chatglm2-6b-int4
或者
3.5、需要安装tdm64-gcc-5.1.0-2
3.6、安装PyTorch
...
四、修改部分代码
cli_demo.py
import os
import platform
import signal
from transformers import AutoTokenizer, AutoModel
import readline
tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
#model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).cuda()
model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
# 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
# from utils import load_model_on_gpus
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
model = model.eval()
os_name = platform.system()
clear_command = 'cls' if os_name == 'Windows' else 'clear'
stop_stream = False
def build_prompt(history):
prompt = "欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序"
for query, response in history:
prompt += f"\n\n用户:{query}"
prompt += f"\n\nChatGLM2-6B:{response}"
return prompt
def signal_handler(signal, frame):
global stop_stream
stop_stream = True
def main():
past_key_values, history = None, []
global stop_stream
print("欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
while True:
query = input("\n用户:")
if query.strip() == "stop":
break
if query.strip() == "clear":
past_key_values, history = None, []
os.system(clear_command)
print("欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
continue
print("\nChatGLM:", end="")
current_length = 0
for response, history, past_key_values in model.stream_chat(tokenizer, query, history=history,
past_key_values=past_key_values,
return_past_key_values=True):
if stop_stream:
stop_stream = False
break
else:
print(response[current_length:], end="", flush=True)
current_length = len(response)
print("")
if __name__ == "__main__":
main()
web_demo.py
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import mdtex2html
from utils import load_model_on_gpus
tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
#tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b", trust_remote_code=True)
#model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b", trust_remote_code=True).float()
# from utils import load_model_on_gpus
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
model = model.eval()
"""Override Chatbot.postprocess"""
def postprocess(self, y):
if y is None:
return []
for i, (message, response) in enumerate(y):
y[i] = (
None if message is None else mdtex2html.convert((message)),
None if response is None else mdtex2html.convert(response),
)
return y
gr.Chatbot.postprocess = postprocess
def parse_text(text):
"""copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
lines = text.split("\n")
lines = [line for line in lines if line != ""]
count = 0
for i, line in enumerate(lines):
if "```" in line:
count += 1
items = line.split('`')
if count % 2 == 1:
lines[i] = f"<pre><code class='language-{items[-1]}'>"
else:
lines[i] = f"<br></code></pre>"
else:
if i > 0:
if count % 2 == 1:
line = line.replace("`", "\`")
line = line.replace("<", "<")
line = line.replace(">", ">")
line = line.replace(" ", " ")
line = line.replace("*", "*")
line = line.replace("_", "_")
line = line.replace("-", "-")
line = line.replace(".", ".")
line = line.replace("!", "!")
line = line.replace("(", "(")
line = line.replace(")", ")")
line = line.replace("$", "$")
lines[i] = "<br>"+line
text = "".join(lines)
return text
def predict(input, chatbot, max_length, top_p, temperature, history, past_key_values):
chatbot.append((parse_text(input), ""))
#for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
# return_past_key_values=True,
# max_length=max_length, top_p=top_p,
# temperature=temperature):
for response, history in model.stream_chat(tokenizer ,input ,history,past_key_values=past_key_values,
return_past_key_values=False,
max_length=max_length, top_p=top_p,
temperature=temperature):
chatbot[-1] = (parse_text(input), parse_text(response))
yield chatbot, history, past_key_values
def reset_user_input():
return gr.update(value='')
def reset_state():
return [], [], None
with gr.Blocks() as demo:
gr.HTML("""<h1 align="center">ChatGLM2-6B</h1>""")
chatbot = gr.Chatbot()
with gr.Row():
with gr.Column(scale=4):
with gr.Column(scale=12):
#user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10)
with gr.Column(min_width=32, scale=1):
submitBtn = gr.Button("Submit", variant="primary")
with gr.Column(scale=1):
emptyBtn = gr.Button("Clear History")
max_length = gr.Slider(0, 32768, value=8192, step=1.0, label="Maximum length", interactive=True)
top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
history = gr.State([])
past_key_values = gr.State(None)
submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history, past_key_values],
[chatbot, history, past_key_values], show_progress=True)
submitBtn.click(reset_user_input, [], [user_input])
emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
#demo.queue().launch(share=False, inbrowser=True)
demo.queue().launch(share=False, inbrowser=True,server_name = '0.0.0.0')
web_demo2.py
from transformers import AutoModel, AutoTokenizer
import streamlit as st
st.set_page_config(
page_title="ChatGLM2-6b",
page_icon=":robot:",
layout='wide'
)
@st.cache_resource
def get_model():
#tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
#model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
# from utils import load_model_on_gpus
# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
model = model.eval()
return tokenizer, model
tokenizer, model = get_model()
st.title("ChatGLM2-6B")
max_length = st.sidebar.slider(
'max_length', 0, 32768, 8192, step=1
)
top_p = st.sidebar.slider(
'top_p', 0.0, 1.0, 0.8, step=0.01
)
temperature = st.sidebar.slider(
'temperature', 0.0, 1.0, 0.8, step=0.01
)
if 'history' not in st.session_state:
st.session_state.history = []
if 'past_key_values' not in st.session_state:
st.session_state.past_key_values = None
for i, (query, response) in enumerate(st.session_state.history):
#for i, (query, response) in enumerate(st.session_state):
with st.chat_message(name="user", avatar="user"):
st.markdown(query)
with st.chat_message(name="assistant", avatar="assistant"):
st.markdown(response)
with st.chat_message(name="user", avatar="user"):
input_placeholder = st.empty()
with st.chat_message(name="assistant", avatar="assistant"):
message_placeholder = st.empty()
prompt_text = st.text_area(label="user input",
height=100,
placeholder="please")
button = st.button("send", key="predict")
if button:
input_placeholder.markdown(prompt_text)
history, past_key_values = st.session_state.history, st.session_state.past_key_values
for response, history, past_key_values in model.stream_chat(tokenizer, prompt_text, history,
past_key_values=past_key_values,
max_length=max_length, top_p=top_p,
temperature=temperature,
return_past_key_values=True):
message_placeholder.markdown(response)
st.session_state.history = history
st.session_state.past_key_values = past_key_values
五、运行代码
5.1、激活环境
conda activate ChatGLM2B
5.2、python openai_api.py
(ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>python openai_api.py
Traceback (most recent call last):
File "openai_api.py", line 16, in <module>
from sse_starlette.sse import ServerSentEvent, EventSourceResponse
ModuleNotFoundError: No module named 'sse_starlette'
pip install sse_starlette -i https://pypi.douban.com/simple/
在windows上装不了readline
pip install readline -i https://pypi.douban.com/simple/
(ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>pip install readline -i https://pypi.douban.com/simple/
Looking in indexes: https://pypi.douban.com/simple/
Collecting readline
Downloading https://mirrors.cloud.tencent.com/pypi/packages/f4/01/2cf081af8d880b44939a5f1b446551a7f8d59eae414277fd0c303757ff1b/readline-6.2.4.1.tar.gz (2.3 MB)
---------------------------------------- 2.3/2.3 MB 3.1 MB/s eta 0:00:00
Preparing metadata (setup.py) ... error
error: subprocess-exited-with-error
× python setup.py egg_info did not run successfully.
│ exit code: 1
╰─> [1 lines of output]
error: this module is not meant to work on Windows
[end of output]
note: This error originates from a subprocess, and is likely not a problem with pip.
error: metadata-generation-failed
× Encountered error while generating package metadata.
╰─> See above for output.
note: This is an issue with the package mentioned above, not pip.
hint: See above for details.
5.3、python web_demo.py
(ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>python web_demo.py
Failed to load cpm_kernels:No module named 'cpm_kernels'
C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c:1:0: warning: -fPIC ignored for target (all code is position independent)
#include <omp.h>
^
gcc: error: libgomp.spec: No such file or directory
Compile parallel cpu kernel gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c -shared -o C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.so failed.
C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels.c:1:0: warning: -fPIC ignored for target (all code is position independent)
void compress_int4_weight(void *weight, void *out, int n, int m)
^
Running on local URL: http://0.0.0.0:7860
5.4、streamlit run web_demo2.py
(ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>streamlit run web_demo2.py
You can now view your Streamlit app in your browser.
Local URL: http://localhost:8501
Network URL: http://192.168.1.103:8501
这样LLM大模型就跑起来了。