#deepLearning/llm/1
加载模型
当使用 LLM 时,需要加载两个 model 1. 调用的模型本身 2. tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-4k-instruct", device_map="cuda", torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
print(model)
Phi3ForCausalLM( (model): Phi3Model( (embed_tokens): Embedding(32064, 3072, padding_idx=32000) (embed_dropout): Dropout(p=0.0, inplace=False) (layers): ModuleList( (0-31): 32 x Phi3DecoderLayer( (self_attn): Phi3Attention( (o_proj): Linear(in_features=3072, out_features=3072, bias=False) (qkv_proj): Linear(in_features=3072, out_features=9216, bias=False) (rotary_emb): Phi3RotaryEmbedding() ) (mlp): Phi3MLP( (gate_up_proj): Linear(in_features=3072, out_features=16384, bias=False) (down_proj): Linear(in_features=8192, out_features=3072, bias=False) (activation_fn): SiLU() ) (input_layernorm): Phi3RMSNorm() (resid_attn_dropout): Dropout(p=0.0, inplace=False) (resid_mlp_dropout): Dropout(p=0.0, inplace=False) (post_attention_layernorm): Phi3RMSNorm() ) ) (norm): Phi3RMSNorm() ) (lm_head): Linear(in_features=3072, out_features=32064, bias=False) )
|
tokenizer.special_tokens_map
{'bos_token': '<s>', 'eos_token': '<|endoftext|>', 'unk_token': '<unk>', 'pad_token': '<|endoftext|>'}
|
Tokenizer负责将输入文本分割为token,然后将其提供给生成模型 # 构建
prompt 和 tokenizer 加载模型后,如何使用 3. 使用 generate()
4. 调用 pipeline
messages = [ {"role": "user", "content": "Create a funny joke about chickens."} ]
text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
print(text) print("====" * 10) print(model_inputs)
<|user|> Create a funny joke about chickens.<|end|> <|assistant|>
======================================== {'input_ids': tensor([[32010, 6204, 263, 2090, 1460, 2958, 446, 1048, 521, 860, 575, 29889, 32007, 32001]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
|
generated_ids = model.generate( **model_inputs, max_new_tokens=512 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] print(response)
Why did the chicken join the band? Because it had the drumsticks!
|
from transformers import pipeline
generator = pipeline( "text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=500, do_sample=False )
|
messages = [ {"role": "user", "content": "Create a funny joke about chickens."} ]
output = generator(messages) print(output[0]["generated_text"])
Why did the chicken join the band? Because it had the drumsticks!
|