浏览代码

rollback change to model format

Guillaume Wenzek 1 年之前
父节点
当前提交
b2fdcd2f6b
共有 2 个文件被更改,包括 7 次插入4 次删除
  1. 4 3
      ggml/examples/unity/model_loader.cpp
  2. 3 1
      ggml/ggml_convert.py

+ 4 - 3
ggml/examples/unity/model_loader.cpp

@@ -40,13 +40,14 @@ model_loader::load_model_weights(fairseq2_model &model, std::ifstream &fin)
 {
     std::int64_t num_tensor = 0;
     std::int64_t f32_ctx_size = 0;
-    std::int64_t f16_ctx_size = 0;
     fin.read((char*) &num_tensor, sizeof(num_tensor));
     fin.read((char*) &f32_ctx_size, sizeof(f32_ctx_size));
-    fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
 
-    // TODO: it might be intersting to allow the caller to not upcast the weights to float32.
+    // TODO: it might be interesting to allow the caller to not upcast the weights to float32.
+    // Note this require changing the on disk format
     bool as_float32 = true;
+    std::int64_t f16_ctx_size = f32_ctx_size;
+    // fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
 
     struct ggml_init_params params = {
         /*.mem_size   =*/ as_float32 ? f32_ctx_size : f16_ctx_size,

+ 3 - 1
ggml/ggml_convert.py

@@ -235,7 +235,9 @@ def write_state_dict(
     compressed_byte_size += ggml.ggml_tensor_overhead() * (len(state_dict) + 10)
 
     out.write(struct.pack("<q", true_byte_size))
-    out.write(struct.pack("<q", compressed_byte_size))
+    # TODO: it could be interesting to write this to allow model_loader to chose the precision when loading.
+    # But changing this require republishing .ggml files
+    # out.write(struct.pack("<q", compressed_byte_size))
     GB = 1024**3
     if fp16:
         log.warning(