1 年之前 · b2fdcd2f6b
--- a/ggml/examples/unity/model_loader.cpp
+++ b/ggml/examples/unity/model_loader.cpp
@@ -40,13 +40,14 @@ model_loader::load_model_weights(fairseq2_model &model, std::ifstream &fin)
 
				 {
			
 
				     std::int64_t num_tensor = 0;
			
 
				     std::int64_t f32_ctx_size = 0;
			
 
				-    std::int64_t f16_ctx_size = 0;
			
 
				     fin.read((char*) &num_tensor, sizeof(num_tensor));
			
 
				     fin.read((char*) &f32_ctx_size, sizeof(f32_ctx_size));
			
 
				-    fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
			
 
				 
			
 
				-    // TODO: it might be intersting to allow the caller to not upcast the weights to float32.
			
 
				+    // TODO: it might be interesting to allow the caller to not upcast the weights to float32.
			
 
				+    // Note this require changing the on disk format
			
 
				     bool as_float32 = true;
			
 
				+    std::int64_t f16_ctx_size = f32_ctx_size;
			
 
				+    // fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
			
 
				 
			
 
				     struct ggml_init_params params = {
			
 
				         /*.mem_size   =*/ as_float32 ? f32_ctx_size : f16_ctx_size,
			
--- a/ggml/ggml_convert.py
+++ b/ggml/ggml_convert.py
@@ -235,7 +235,9 @@ def write_state_dict(
 
				     compressed_byte_size += ggml.ggml_tensor_overhead() * (len(state_dict) + 10)
			
 
				 
			
 
				     out.write(struct.pack("<q", true_byte_size))
			
 
				-    out.write(struct.pack("<q", compressed_byte_size))
			
 
				+    # TODO: it could be interesting to write this to allow model_loader to chose the precision when loading.
			
 
				+    # But changing this require republishing .ggml files
			
 
				+    # out.write(struct.pack("<q", compressed_byte_size))
			
 
				     GB = 1024**3
			
 
				     if fp16:
			
 
				         log.warning(