1 year ago · b2fdcd2f6b
--- a/ggml/examples/unity/model_loader.cpp
+++ b/ggml/examples/unity/model_loader.cpp
@@ -40,13 +40,14 @@ model_loader::load_model_weights(fairseq2_model &model, std::ifstream &fin)
 
															 {
														
 
															     std::int64_t num_tensor = 0;
														
 
															     std::int64_t f32_ctx_size = 0;
														
 
															-    std::int64_t f16_ctx_size = 0;
														
 
															     fin.read((char*) &num_tensor, sizeof(num_tensor));
														
 
															     fin.read((char*) &f32_ctx_size, sizeof(f32_ctx_size));
														
 
															-    fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
														
 
															-    // TODO: it might be intersting to allow the caller to not upcast the weights to float32.
														
 
															+    // TODO: it might be interesting to allow the caller to not upcast the weights to float32.
														
 
															+    // Note this require changing the on disk format
														
 
															     bool as_float32 = true;
														
 
															+    std::int64_t f16_ctx_size = f32_ctx_size;
														
 
															+    // fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
														
 
															     struct ggml_init_params params = {
														
 
															         /*.mem_size   =*/ as_float32 ? f32_ctx_size : f16_ctx_size,
														
--- a/ggml/ggml_convert.py
+++ b/ggml/ggml_convert.py
@@ -235,7 +235,9 @@ def write_state_dict(
 
															     compressed_byte_size += ggml.ggml_tensor_overhead() * (len(state_dict) + 10)
														
 
															     out.write(struct.pack("<q", true_byte_size))
														
 
															-    out.write(struct.pack("<q", compressed_byte_size))
														
 
															+    # TODO: it could be interesting to write this to allow model_loader to chose the precision when loading.
														
 
															+    # But changing this require republishing .ggml files
														
 
															+    # out.write(struct.pack("<q", compressed_byte_size))
														
 
															     GB = 1024**3
														
 
															     if fp16:
														
 
															         log.warning(