ref https://github.com/d-matrix-ai/keyformer-llm https://arxiv.org/pdf/2403.09054
1 software envirenment prep 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 wget https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Linux-x86_64.sh sudo chmod a+x ./Anaconda3-2024.06-1-Linux-x86_64.sh./Anaconda3-2024.06-1-Linux-x86_64.sh wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-ubuntu2404.pin sudo mv cuda-ubuntu2404.pin /etc/apt/preferences.d/cuda-repository-pin-600wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda-repo-ubuntu2404-12-5-local_12.5.1-555.42.06-1_amd64.deb sudo dpkg -i cuda-repo-ubuntu2404-12-5-local_12.5.1-555.42.06-1_amd64.debsudo cp /var/cuda-repo-ubuntu2404-12-5-local /cuda-*-keyring.gpg /usr/share/keyrings/sudo apt-get updatesudo apt-get -y install cuda-toolkit-12-5sudo apt-get install -y nvidia-driver-555-opensudo apt-get install -y cuda-drivers-555
2 repo init 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 git clone https://github.com/d-matrix-ai/keyformer-llm.git conda env create --file=conda-env.yml conda activate keyformer-env pip install torch flash_attn accelerate==0.32.1 pip install --upgrade transformers cd modelsGIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/mosaicml/mpt-7b mpt-7b-keyformer GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/cerebras/Cerebras-GPT-6.7B cerebras-6.7b-keyformer cd ..cd models/model_downloadpython3 download_model.py --model_name mosaicml/mpt-7b python3 download_model.py --model_name cerebras/Cerebras-GPT-6.7B cd ../../mv models/model_download/model/* models/mpt-7b-keyformer/.mv models/model_download/model/* models/cerebras-6.7b-keyformercp -r models/mpt-keyformer-lib/* models/mpt-7b-keyformercp -r models/cerebras-keyformer-lib/* models/cerebras-6.7b-keyformer
run summarization 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 cd summarization/dataset_downloadpip install -U datasets HF_ENDPOINT=https://hf-mirror.com python download_cnndm.py cd ../cp ../models/mpt-7b-keyformer/fc.py /home/nmhn/.cache/huggingface/modules/transformers_modules/mpt-7b-keyformer/vim cerebras-6.7b-keyformer vim modeling_gpt2.py line 629 self.req_tokens = attn_outputs[7] self.itr_count = attn_outputs[6] line 919 self.keyformer = config.keyformer_config["keyformer" ] self.kv_cache = config.keyformer_config["kv_cache" ] self.recent = config.keyformer_config["recent" ] self.tau_init = config.keyformer_config["tau_init" ] self.tau_delta = config.keyformer_config["tau_delta" ] chmod a+x run_summarization_task.sh./run_summarization_task.sh
run conversation 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 cd conversation/dataset_downloadHF_ENDPOINT=https://hf-mirror.com python download_soda.py chmod a+x ./run_conversation_task.shvim ./run_conversation_task.sh if dialogue.dim() > 1: dialogue = dialogue[:, input_ids.shape[-1]:].cpu() else : dialogue = dialogue.cpu() ./run_conversation_task.sh --model_name mosaicml/mpt-7b \ --dataset_path ./data/soda_eval.json \ --save_path ./out_model.conversation \ --score_path ./out_model.score \ --model_path ../models/mpt-7b-keyformer \ --attentions_path ./out_model.attention \ --device cuda \ --task summarization \ --bs 1 \ --dtype float16 \ --causal_lm \ --early_stopping \ --output_summaries_only \ --output_sequence_scores \ --save_attentions \ --save_prompt_attentions \ --padding_side left \ --beam 4 \ --model_parallelize \ --keyformer \ --kv_cache 60 \ --recent 30 \ --tau_init 1 \ --tau_end 2 \ --no_repeat_ngram_size 0 \ --repetition_penalty 1 \ --max_tokenizer_length 1920 \ --max_new_tokens 128 \ --min_gen_length 30 \ --num_return_sequences 1 \ --seed 12345 \ --n_obs 1000
重要函数 1 2 # models/[model_name]/attention_llm_eval_harness.py keyformer_mask
TODO 分析