convert-ckpt-to-ggml.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. # Convert a model checkpoint to a ggml compatible file
  2. #
  3. # Load the model using TensorFlow.
  4. # Iterate over all variables and write them to a binary file.
  5. #
  6. # For each variable, write the following:
  7. # - Number of dimensions (int)
  8. # - Name length (int)
  9. # - Dimensions (int[n_dims])
  10. # - Name (char[name_length])
  11. # - Data (float[n_dims])
  12. #
  13. # By default, the bigger matrices are converted to 16-bit floats.
  14. # This can be disabled by adding the "use-f32" CLI argument.
  15. #
  16. # At the start of the ggml file we write the model parameters
  17. # and vocabulary.
  18. #
  19. import sys
  20. import json
  21. import struct
  22. import numpy as np
  23. import tensorflow as tf
  24. # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
  25. def bytes_to_unicode():
  26. """
  27. Returns list of utf-8 byte and a corresponding list of unicode strings.
  28. The reversible bpe codes work on unicode strings.
  29. This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
  30. When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
  31. This is a signficant percentage of your normal, say, 32K bpe vocab.
  32. To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
  33. And avoids mapping to whitespace/control characters the bpe code barfs on.
  34. """
  35. bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
  36. cs = bs[:]
  37. n = 0
  38. for b in range(2**8):
  39. if b not in bs:
  40. bs.append(b)
  41. cs.append(2**8+n)
  42. n += 1
  43. cs = [chr(n) for n in cs]
  44. return dict(zip(bs, cs))
  45. # helper method to convert a numpy array to different float types
  46. def convert_to_ftype(data, ftype):
  47. # fp16
  48. if ftype == 1:
  49. return data.astype(np.float16)
  50. assert False, "Invalid ftype: " + str(ftype)
  51. if len(sys.argv) < 3:
  52. print("Usage: convert-ckpt-to-ggml.py dir-model ftype\n")
  53. print(" ftype == 0 -> float32")
  54. print(" ftype == 1 -> float16")
  55. sys.exit(1)
  56. # output in the same directory as the model
  57. dir_model = sys.argv[1]
  58. fname_out = sys.argv[1] + "/ggml-model.bin"
  59. with open(dir_model + "/encoder.json", "r", encoding="utf-8") as f:
  60. encoder = json.load(f)
  61. with open(dir_model + "/hparams.json", "r", encoding="utf-8") as f:
  62. hparams = json.load(f)
  63. # possible data types
  64. # ftype == 0 -> float32
  65. # ftype == 1 -> float16
  66. #
  67. # map from ftype to string
  68. ftype_str = ["f32", "f16"]
  69. ftype = 1
  70. if len(sys.argv) > 2:
  71. ftype = int(sys.argv[2])
  72. if ftype < 0 or ftype > 1:
  73. print("Invalid ftype: " + str(ftype))
  74. sys.exit(1)
  75. fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
  76. list_vars = tf.train.list_variables(dir_model)
  77. fout = open(fname_out, "wb")
  78. fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
  79. fout.write(struct.pack("i", hparams["n_vocab"]))
  80. fout.write(struct.pack("i", hparams["n_ctx"]))
  81. fout.write(struct.pack("i", hparams["n_embd"]))
  82. fout.write(struct.pack("i", hparams["n_head"]))
  83. fout.write(struct.pack("i", hparams["n_layer"]))
  84. fout.write(struct.pack("i", ftype))
  85. byte_encoder = bytes_to_unicode()
  86. byte_decoder = {v:k for k, v in byte_encoder.items()}
  87. fout.write(struct.pack("i", len(encoder)))
  88. for key in encoder:
  89. text = bytearray([byte_decoder[c] for c in key])
  90. fout.write(struct.pack("i", len(text)))
  91. fout.write(text)
  92. for name, shape in list_vars:
  93. print("Processing variable: " + name + " with shape: ", shape)
  94. data = tf.train.load_variable(dir_model, name).squeeze()
  95. n_dims = len(data.shape);
  96. # for efficiency - transpose the projection matrices
  97. # "model/h.*/attn/c_attn/w"
  98. # "model/h.*/attn/c_proj/w"
  99. # "model/h.*/mlp/c_fc/w"
  100. # "model/h.*/mlp/c_proj/w"
  101. if name[-14:] == "/attn/c_attn/w" or \
  102. name[-14:] == "/attn/c_proj/w" or \
  103. name[-11:] == "/mlp/c_fc/w" or \
  104. name[-13:] == "/mlp/c_proj/w":
  105. print(" Transposing")
  106. data = data.transpose()
  107. dshape = data.shape
  108. ftype_cur = 0
  109. if ftype != 0:
  110. # match name:
  111. # "model/wte"
  112. # "model/h.*/attn/c_attn/w"
  113. # "model/h.*/attn/c_proj/w"
  114. # "model/h.*/mlp/c_fc/w"
  115. # "model/h.*/mlp/c_proj/w"
  116. if name == "model/wte" or name[-2:] == "/w":
  117. print(" Converting to " + ftype_str[ftype])
  118. data = convert_to_ftype(data, ftype)
  119. ftype_cur = ftype
  120. else:
  121. print(" Converting to float32")
  122. data = data.astype(np.float32)
  123. ftype_cur = 0
  124. # header
  125. str = name.encode('utf-8')
  126. fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
  127. for i in range(n_dims):
  128. fout.write(struct.pack("i", dshape[n_dims - 1 - i]))
  129. fout.write(str);
  130. # data
  131. data.tofile(fout)
  132. fout.close()
  133. print("Done. Output file: " + fname_out)
  134. print("")