Skip to content

Commit

Permalink
update scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Nov 6, 2023
1 parent 1c231ba commit 7c65521
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 22 deletions.
4 changes: 2 additions & 2 deletions tests/peft/fine_tune.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ cd "${BASH_SOURCE[0]%/*}"

python hf_finetune.py --model-name decapoda-research/llama-7b-hf --lora-target-modules down_proj --use-full-precision --publish-peft-with-id goliaro/llama-7b-lora-full
python hf_finetune.py --model-name decapoda-research/llama-7b-hf --lora-target-modules down_proj --publish-peft-with-id goliaro/llama-7b-lora-half
python hf_finetune.py --model-name JackFram/llama-160m-base --lora-target-modules down_proj --use-full-precision --publish-peft-with-id goliaro/llama-160m-lora-full
python hf_finetune.py --model-name JackFram/llama-160m-base --lora-target-modules down_proj --publish-peft-with-id goliaro/llama-160m-lora-half
python hf_finetune.py --model-name JackFram/llama-160m --lora-target-modules down_proj --use-full-precision --publish-peft-with-id goliaro/llama-160m-lora-full
python hf_finetune.py --model-name JackFram/llama-160m --lora-target-modules down_proj --publish-peft-with-id goliaro/llama-160m-lora-half

python hf_finetune.py --model-name meta-llama/Llama-2-7b-hf --lora-target-modules down_proj --use-full-precision --publish-peft-with-id goliaro/llama-2-7b-lora-full
python hf_finetune.py --model-name meta-llama/Llama-2-7b-hf --lora-target-modules down_proj --publish-peft-with-id goliaro/llama-2-7b-lora-half
Expand Down
2 changes: 1 addition & 1 deletion tests/peft/hf_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def print_trainable_parameters(model):

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model-name", type=str, default="decapoda-research/llama-7b-hf")
parser.add_argument("--model-name", type=str, default="meta-llama/Llama-2-7b-hf")
parser.add_argument("--lora-rank", type=int, default=16)
parser.add_argument("--lora-alpha", type=int, default=32)
parser.add_argument("--lora-target-modules", type=str, default="down_proj", help="Comma-separated list of layers from the base model to target")
Expand Down
55 changes: 36 additions & 19 deletions tests/peft/hf_serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,51 +2,68 @@
import torch
import os, sys
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, LlamaTokenizer
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
AutoConfig,
LlamaTokenizer,
GenerationConfig,
)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--peft-model-id", type=str, default="./finetuned-llama")
parser.add_argument("--use-full-precision", action="store_true", help="Use full precision")
parser.add_argument(
"--use-full-precision", action="store_true", help="Use full precision"
)
parser.add_argument("--max-new-tokens", type=int, default=50)
parser.add_argument("--do-sample", action="store_true", help="Use sampling")
args = parser.parse_args()
peft_model_id = args.peft_model_id
#peft_model_id = "goliaro/llama-7b-lora-half"
use_full_precision=args.use_full_precision
use_full_precision = args.use_full_precision
max_new_tokens = args.max_new_tokens

# Change working dir to folder storing this script
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
#load_in_8bit=True,
torch_dtype = torch.float32 if use_full_precision else torch.float16,
device_map='auto',
config.base_model_name_or_path,
return_dict=True,
# load_in_8bit=True,
torch_dtype=torch.float32 if use_full_precision else torch.float16,
device_map="auto",
)
hf_config = AutoConfig.from_pretrained(
config.base_model_name_or_path, trust_remote_code=True
)
hf_config = AutoConfig.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
hf_arch = getattr(hf_config, "architectures")[0]
if hf_arch == "LLaMAForCausalLM" or hf_arch == "LlamaForCausalLM":
tokenizer = LlamaTokenizer.from_pretrained(
config.base_model_name_or_path, use_fast=True,
torch_dtype = torch.float32 if use_full_precision else torch.float16,
config.base_model_name_or_path,
use_fast=True,
torch_dtype=torch.float32 if use_full_precision else torch.float16,
)
else:
tokenizer = AutoTokenizer.from_pretrained(
config.base_model_name_or_path,
torch_dtype = torch.float32 if use_full_precision else torch.float16,
config.base_model_name_or_path,
torch_dtype=torch.float32 if use_full_precision else torch.float16,
)

# Generation config
generation_config = GenerationConfig.from_pretrained(config.base_model_name_or_path)
generation_config.do_sample = args.do_sample
# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)
batch = tokenizer("Two things are infinite: ", return_tensors='pt')
batch = tokenizer("Two things are infinite: ", return_tensors="pt")
with torch.cuda.amp.autocast():
output_tokens = model.generate(**batch, max_new_tokens=max_new_tokens)
print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
output_tokens = model.generate(
**batch, max_new_tokens=max_new_tokens, generation_config=generation_config
)
print("\n\n", tokenizer.decode(output_tokens[0], skip_special_tokens=False))


if __name__ == "__main__":
main()

0 comments on commit 7c65521

Please sign in to comment.