import torch
#state_dict = torch.load(“/home/llama-33B_epoch_0000_step_0004.pt/llama.bin“)
#state_dict = torch.load(“/home/ckpt/llama/30B/consolidated.00.pth”)
#state_dict = torch.load(‘/home/ckpt/llama/33B/checkpoints/actor/llama-33B_epoch_0000_step_0004.pt/global_step5/mp_rank_02_model_states.pt’)
#state_dict = torch.load(‘/home/ckpt/trainllama/train_zero2_step0/actor/llama-33B_epoch_000_step_000.pt/llama_0.bin‘)
#state_dict = torch.load(‘/home/ckpt/llama/llama-7B_epoch_0000_step_2199.pt/global_step2200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt’)
state_dict = torch.load(‘/home/ckpt/llama/65B/checkpoints/actor/llama-65B_epoch_0000000_step_0069999.pt/65b_4.bin‘)
print(type(state_dict))
for i in state_dict:
#if i == ‘model.layers.1.attention.wq.weight’:
print(i)
print(type(state_dict[i]))
#print(“value:”,state_dict[i].shape)
print(“value:”,state_dict[i][0:10,0:5])
#print(“value:”,state_dict[i].data.size())
#print(“value:”,state_dict[i].data)
原文地址:https://blog.csdn.net/upwind_fly/article/details/134555034
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若转载,请注明出处:http://www.7code.cn/show_2177.html
如若内容造成侵权/违法违规/事实不符,请联系代码007邮箱:suwngjj01@126.com进行投诉反馈,一经查实,立即删除!