Keyformer 环境搭建及分析
ysh One

ref https://github.com/d-matrix-ai/keyformer-llm
https://arxiv.org/pdf/2403.09054

1 software envirenment prep

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# conda
wget https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Linux-x86_64.sh
sudo chmod a+x ./Anaconda3-2024.06-1-Linux-x86_64.sh
./Anaconda3-2024.06-1-Linux-x86_64.sh
# cuda
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-ubuntu2404.pin
sudo mv cuda-ubuntu2404.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda-repo-ubuntu2404-12-5-local_12.5.1-555.42.06-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu2404-12-5-local_12.5.1-555.42.06-1_amd64.deb
sudo cp /var/cuda-repo-ubuntu2404-12-5-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get update
sudo apt-get -y install cuda-toolkit-12-5
sudo apt-get install -y nvidia-driver-555-open
sudo apt-get install -y cuda-drivers-555

2 repo init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
git clone https://github.com/d-matrix-ai/keyformer-llm.git
conda env create --file=conda-env.yml
conda activate keyformer-env
pip install torch flash_attn accelerate==0.32.1
pip install --upgrade transformers
# download mpt model
cd models
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/mosaicml/mpt-7b mpt-7b-keyformer
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/cerebras/Cerebras-GPT-6.7B cerebras-6.7b-keyformer
cd ..
cd models/model_download
# this step may need proxy to access huggingface.com and cuda (>12.0)
# if see any error , try change download_model func setting to  model_name,  trust_remote_code=True,  torchscript=True
python3 download_model.py --model_name mosaicml/mpt-7b
python3 download_model.py --model_name cerebras/Cerebras-GPT-6.7B

cd ../../
mv models/model_download/model/* models/mpt-7b-keyformer/.
mv models/model_download/model/* models/cerebras-6.7b-keyformer
cp -r models/mpt-keyformer-lib/* models/mpt-7b-keyformer
cp -r models/cerebras-keyformer-lib/* models/cerebras-6.7b-keyformer

run summarization

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
cd summarization/dataset_download
pip install -U datasets
# in this step , if blocked while Mapping, try delete the cache_dir setting in line 23
# the HF_ENDPOINT setting only available when http_proxy and https_proxy is unset
HF_ENDPOINT=https://hf-mirror.com python download_cnndm.py
cd ../
cp ../models/mpt-7b-keyformer/fc.py /home/nmhn/.cache/huggingface/modules/transformers_modules/mpt-7b-keyformer/
vim cerebras-6.7b-keyformer
# line 18 from typing import Dict, Any, List, Mapping, Optional
vim modeling_gpt2.py

line 629
self.req_tokens = attn_outputs[7]
self.itr_count = attn_outputs[6]
line 919
self.keyformer = config.keyformer_config["keyformer"]
self.kv_cache = config.keyformer_config["kv_cache"]
self.recent = config.keyformer_config["recent"]
self.tau_init = config.keyformer_config["tau_init"]
self.tau_delta = config.keyformer_config["tau_delta"]

chmod a+x run_summarization_task.sh
./run_summarization_task.sh

run conversation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
cd conversation/dataset_download
HF_ENDPOINT=https://hf-mirror.com python download_soda.py
chmod a+x ./run_conversation_task.sh
vim ./run_conversation_task.sh # add HF_ENDPOINT=https://hf-mirror.com before python
# modify the 413 line to
                    if dialogue.dim() > 1:
                        dialogue = dialogue[:, input_ids.shape[-1]:].cpu()
                    else:
                        dialogue = dialogue.cpu()
./run_conversation_task.sh

--model_name mosaicml/mpt-7b \
--dataset_path ./data/soda_eval.json \
--save_path ./out_model.conversation \
--score_path ./out_model.score \
--model_path ../models/mpt-7b-keyformer \
--attentions_path ./out_model.attention \
--device cuda \
--task summarization \
--bs 1 \
--dtype float16 \
--causal_lm \
--early_stopping \
--output_summaries_only \
--output_sequence_scores \
--save_attentions \
--save_prompt_attentions \
--padding_side left \
--beam 4 \
--model_parallelize \
--keyformer \
--kv_cache 60 \
--recent 30 \
--tau_init 1 \
--tau_end 2 \
--no_repeat_ngram_size 0 \
--repetition_penalty 1 \
--max_tokenizer_length 1920 \
--max_new_tokens 128 \
--min_gen_length 30 \
--num_return_sequences 1 \
--seed 12345 \
--n_obs 1000

重要函数

1
2
# models/[model_name]/attention_llm_eval_harness.py
keyformer_mask

TODO 分析

Powered by Hexo & Theme Keep