Parcourir la source

cleanup comments

Guillaume Wenzek il y a 1 an
Parent
commit
48fc7dfcd5
2 fichiers modifiés avec 17 ajouts et 26 suppressions
  1. 3 6
      ggml/examples/unity/model_loader.cpp
  2. 14 20
      ggml/ggml_convert.py

+ 3 - 6
ggml/examples/unity/model_loader.cpp

@@ -39,18 +39,15 @@ std::int64_t
 model_loader::load_model_weights(fairseq2_model &model, std::ifstream &fin)
 {
     std::int64_t num_tensor = 0;
-    std::int64_t f32_ctx_size = 0;
+    std::int64_t f32_tensor_size = 0;
     fin.read((char*) &num_tensor, sizeof(num_tensor));
-    fin.read((char*) &f32_ctx_size, sizeof(f32_ctx_size));
+    fin.read((char*) &f32_tensor_size, sizeof(f32_tensor_size));
 
     // TODO: it might be interesting to allow the caller to not upcast the weights to float32.
     // Note this require changing the on disk format
     bool as_float32 = true;
-    std::int64_t f16_ctx_size = f32_ctx_size;
-    // fin.read((char*) &f16_ctx_size, sizeof(f16_ctx_size));
-
     struct ggml_init_params params = {
-        /*.mem_size   =*/ as_float32 ? f32_ctx_size : f16_ctx_size,
+        /*.mem_size   =*/ f32_tensor_size + num_tensor * (int64_t)ggml_tensor_overhead(),
         /*.mem_buffer =*/ NULL,
         /*.no_alloc   =*/ false,
     };

+ 14 - 20
ggml/ggml_convert.py

@@ -219,33 +219,27 @@ def write_state_dict(
         convert float32 tensors to float16 on disk
     """
     out.write(struct.pack("<q", len(state_dict)))
-    # True size of each tensor
+    # True size of each tensor (before downcasting to float16)
     true_byte_size = sum(x.numel() * x.element_size() for x in state_dict.values())
-    # + tensor overhead
-    true_byte_size += ggml.ggml_tensor_overhead() * (len(state_dict) + 10)
-
-    def _fp16_byte_size(x: torch.Tensor) -> int:
-        full_byte_size = x.numel() * x.element_size()
-        if fp16 and x.dtype == torch.float32:
-            full_byte_size //= 2
-        return full_byte_size
-
-    # Compressed size
-    compressed_byte_size = sum(_fp16_byte_size(x) for x in state_dict.values())
-    compressed_byte_size += ggml.ggml_tensor_overhead() * (len(state_dict) + 10)
-
     out.write(struct.pack("<q", true_byte_size))
-    # TODO: it could be interesting to write this to allow model_loader to chose the precision when loading.
-    # But changing this require republishing .ggml files
-    # out.write(struct.pack("<q", compressed_byte_size))
+
     GB = 1024**3
-    if fp16:
+    if not fp16:
         log.warning(
-            f"Saving a ggml file with {len(state_dict)} tensors, totalling {true_byte_size / GB:.3f}Gb compressed to {compressed_byte_size / GB:.3f}"
+            f"Saving a ggml file with {len(state_dict)} tensors, totalling {true_byte_size / GB:.3f}Gb"
         )
     else:
+
+        def _fp16_byte_size(x: torch.Tensor) -> int:
+            full_byte_size = x.numel() * x.element_size()
+            if fp16 and x.dtype == torch.float32:
+                full_byte_size //= 2
+            return full_byte_size
+
+        # Compressed size
+        compressed_byte_size = sum(_fp16_byte_size(x) for x in state_dict.values())
         log.warning(
-            f"Saving a ggml file with {len(state_dict)} tensors, totalling {true_byte_size / GB:.3f}Gb"
+            f"Saving a ggml file with {len(state_dict)} tensors, totalling {true_byte_size / GB:.3f}Gb compressed to {compressed_byte_size / GB:.3f}"
         )
 
     for key, value in state_dict.items():