convert-h5-to-ggml.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. from pathlib import Path
  2. import sys
  3. import struct
  4. import json
  5. import numpy as np
  6. from transformers import AutoModelForCausalLM, AutoTokenizer
  7. import sentencepiece.sentencepiece_model_pb2 as model
  8. if len(sys.argv) < 3:
  9. print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
  10. print(" ftype == 0 -> float32")
  11. print(" ftype == 1 -> float16")
  12. sys.exit(1)
  13. # output in the same directory as the model
  14. dir_model = sys.argv[1]
  15. fname_out = sys.argv[1] + "/ggml-model.bin"
  16. with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
  17. hparams = json.load(f)
  18. sp_proto = model.ModelProto()
  19. sp_proto.ParseFromString(open(Path(sys.argv[1]) / "spiece.model", "rb").read())
  20. # possible data types
  21. # ftype == 0 -> float32
  22. # ftype == 1 -> float16
  23. #
  24. # map from ftype to string
  25. ftype_str = ["f32", "f16"]
  26. ftype = 1
  27. if len(sys.argv) > 2:
  28. ftype = int(sys.argv[2])
  29. if ftype < 0 or ftype > 1:
  30. print("Invalid ftype: " + str(ftype))
  31. sys.exit(1)
  32. fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
  33. tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
  34. model = AutoModelForCausalLM.from_pretrained(
  35. dir_model, low_cpu_mem_usage=True, trust_remote_code=True
  36. )
  37. # print (model)
  38. # print(tokenizer.encode('I believe the meaning of life is'))
  39. list_vars = model.state_dict()
  40. for name in list_vars.keys():
  41. print(name, list_vars[name].shape, list_vars[name].dtype)
  42. fout = open(fname_out, "wb")
  43. print(hparams)
  44. fout.write(struct.pack("i", 0x67676D6C)) # magic: ggml in hex
  45. fout.write(struct.pack("i", hparams["d_model"]))
  46. fout.write(struct.pack("i", hparams["max_seq_len"]))
  47. fout.write(struct.pack("i", hparams["n_heads"]))
  48. fout.write(struct.pack("i", hparams["n_layers"]))
  49. fout.write(struct.pack("i", hparams["vocab_size"]))
  50. fout.write(struct.pack("i", ftype))
  51. # TODO: temporary hack to not deal with implementing the tokenizer
  52. for piece in sp_proto.pieces:
  53. encoded_piece = piece.piece.encode("utf-8")
  54. fout.write(struct.pack("i", len(encoded_piece)))
  55. fout.write(encoded_piece)
  56. fout.write(struct.pack("f", piece.score))
  57. if hparams["vocab_size"] > len(sp_proto.pieces):
  58. for i in range(hparams["vocab_size"] - len(sp_proto.pieces)):
  59. fout.write(struct.pack("i", 0))
  60. fout.write(struct.pack("f", 0))
  61. for name in list_vars.keys():
  62. data = list_vars[name].squeeze().numpy()
  63. print("Processing variable: " + name + " with shape: ", data.shape)
  64. n_dims = len(data.shape)
  65. # ftype == 0 -> float32, ftype == 1 -> float16
  66. ftype_cur = 0
  67. if ftype != 0:
  68. if name[-7:] == ".weight" and n_dims == 2:
  69. print(" Converting to float16")
  70. data = data.astype(np.float16)
  71. ftype_cur = 1
  72. else:
  73. print(" Converting to float32")
  74. data = data.astype(np.float32)
  75. ftype_cur = 0
  76. else:
  77. if data.dtype != np.float32:
  78. print(" Converting to float32")
  79. data = data.astype(np.float32)
  80. ftype_cur = 0
  81. # header
  82. str = name.encode("utf-8")
  83. fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
  84. for i in range(n_dims):
  85. fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
  86. fout.write(str)
  87. # data
  88. data.tofile(fout)
  89. fout.close()
  90. print("Done. Output file: " + fname_out)
  91. print("")