ggml.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. """
  2. We are vendoring https://github.com/abetlen/ggml-python (MIT License)
  3. adding a few utilities to convert between ggml and numpy tensors for testing.
  4. """
  5. import numpy as np
  6. import ctypes
  7. import torch
  8. from pathlib import Path
  9. from typing import Self
  10. from typing import Dict
  11. from typing import Callable
  12. from typing import Any
  13. from typing import Tuple
  14. from typing import Union
  15. from typing import Type
  16. from third_party_ggml import *
  17. ### Helpers
  18. def numpy_dtype(ggml_type: ctypes.c_int) -> type:
  19. if ggml_type == 0:
  20. # GGML_TYPE_F32 = 0,
  21. return np.float32
  22. if ggml_type == 1:
  23. # GGML_TYPE_F16 = 1,
  24. return np.float16
  25. raise NotImplementedError(f"Can't convert GGML_TYPE({ggml_type}) to a numpy.dtype")
  26. def from_numpy_dtype(dtype: np.dtype) -> ctypes.c_int:
  27. if dtype == np.float32:
  28. return ctypes.c_int(0)
  29. elif dtype == np.float16:
  30. return ctypes.c_int(1)
  31. raise NotImplementedError(f"Can't convert {dtype} to a GGML_TYPE")
  32. def shape(tensor: Union[ggml_tensor, ggml_tensor_p]) -> Tuple[int, ...]:
  33. if isinstance(tensor, ctypes._Pointer):
  34. tensor = tensor.contents
  35. ndims = tensor.n_dims
  36. return tuple([tensor.ne[i] for i in range(ndims)])
  37. def strides(tensor: Union[ggml_tensor, ggml_tensor_p]) -> Tuple[int, ...]:
  38. if isinstance(tensor, ctypes._Pointer):
  39. tensor = tensor.contents
  40. ndims = tensor.n_dims
  41. return tuple([tensor.nb[i] for i in range(ndims)])
  42. def to_numpy(tensor: Union[ggml_tensor, ggml_tensor_p]) -> np.ndarray:
  43. if isinstance(tensor, ctypes._Pointer):
  44. tensor = tensor.contents
  45. t_shape = shape(tensor)
  46. # Convert the ggml data pointer to a pointer to ints with the same size (float16 -> uint16)
  47. # This is needed because Python ctypes doesn't have "float16", and as_array only works with ctypes pointer
  48. type_size = ggml_type_size(tensor.type)
  49. int_width: type = getattr(ctypes, f"c_uint{8 * type_size}")
  50. ptr = ctypes.cast(tensor.data, ctypes.POINTER(int_width))
  51. # Create a numpy array with the wrong dtype
  52. int_arr = np.ctypeslib.as_array(ptr, shape=t_shape)
  53. # Reinterpret it to the right dtype
  54. res = np.frombuffer(int_arr, dtype=numpy_dtype(tensor.type)).reshape(t_shape)
  55. # TODO: assert strides / check contiguous
  56. # assert strides(tensor) == res.strides, "TODO: support strided tensor"
  57. return res
  58. GgmlShape = ctypes.c_int64 * GGML_MAX_DIMS
  59. def from_file(
  60. ctx: ggml_context_p, file: Path, shape: Tuple[int, ...], dtype: type = np.float32
  61. ) -> ggml_tensor_p:
  62. data = np.fromfile(str(file), dtype=dtype).reshape(shape) # type: ignore
  63. return from_numpy(ctx, data)
  64. def _pad_shape(shape: Tuple[int, ...]) -> Tuple[int, int, int, int]:
  65. if len(shape) >= 4:
  66. return shape
  67. padding = (1,) * (4 - len(shape))
  68. return shape + padding # type: ignore
  69. def from_numpy(ctx: ggml_context_p, array: np.ndarray) -> ggml_tensor_p:
  70. tensor_p = ggml_new_tensor(
  71. ctx, from_numpy_dtype(array.dtype), 1, GgmlShape()
  72. )
  73. tensor_p.contents.n_dims = array.ndim
  74. tensor_p.contents.data = array.ctypes.data_as(ctypes.c_void_p)
  75. tensor_p.contents.ne = GgmlShape(*_pad_shape(array.shape))
  76. # print(f"array: {array.shape} @0x{array.ctypes.data_as(ctypes.c_void_p)}")
  77. # print(f"tensor_p: {shape(tensor_p)} @0x{tensor_p.contents.data:x}")
  78. # prevent the underlying numpy array to be freed
  79. setattr(tensor_p, "__data", array)
  80. return tensor_p
  81. class NativeObj:
  82. AllocFn = Callable[[], ctypes.c_void_p]
  83. FreeFn = Callable[[ctypes.c_void_p], None]
  84. _cache: Dict[str, Tuple[AllocFn, FreeFn]] = {}
  85. @classmethod
  86. def _init_c_func(cls, kind: str) -> Tuple[AllocFn, FreeFn]:
  87. if kind in cls._cache:
  88. return cls._cache[kind]
  89. alloc_fn = getattr(lib, f"{kind}_alloc")
  90. alloc_fn.argtypes = []
  91. alloc_fn.restype = ctypes.c_void_p
  92. free_fn = getattr(lib, f"{kind}_free")
  93. free_fn.argtypes = [ctypes.c_void_p]
  94. free_fn.restype = None
  95. cls._cache[kind] = (alloc_fn, free_fn)
  96. return (alloc_fn, free_fn)
  97. def __init__(self, kind: str, ptr: ctypes.c_void_p = NULL):
  98. self.kind = kind
  99. alloc_fn, self._free_fn = self._init_c_func(kind)
  100. self.ptr = alloc_fn() if ptr is None else ptr
  101. # print(self)
  102. def free(self) -> None:
  103. if self.ptr is not None:
  104. self._free_fn(self.ptr)
  105. # print(f"freeing {self}")
  106. self.ptr = NULL
  107. def __enter__(self) -> Self:
  108. return self
  109. def __exit__(self, *args: Any) -> None:
  110. self.free()
  111. def __del__(self) -> None:
  112. self.free()
  113. def __repr__(self) -> str:
  114. return f"<{self.kind} native object at 0x{self.ptr:x}>"
  115. def MeasureArena() -> NativeObj:
  116. return NativeObj("ggml_allocr", ggml_allocr_new_measure(GGML_MEM_ALIGN))
  117. def FixedSizeArena(mem_size: int) -> NativeObj:
  118. memory = torch.zeros(mem_size, dtype=torch.uint8)
  119. allocr = ggml_allocr_new(
  120. ctypes.c_void_p(memory.data_ptr()), mem_size, GGML_MEM_ALIGN
  121. )
  122. arena = NativeObj("ggml_allocr", allocr)
  123. # Add a reference from the arena object to the underlying tensor, otherwise it will be freed to early.
  124. setattr(arena, "__memory", memory)
  125. return arena
  126. def UnityModel() -> NativeObj:
  127. return NativeObj("unity_model")
  128. def GptVocab() -> NativeObj:
  129. return NativeObj("gpt_vocab")
  130. lib.unity_model_load.argtypes = [ctypes.c_char_p, ctypes.c_void_p, ctypes.c_void_p]
  131. def unity_model_load(model_file: Path) -> Tuple[NativeObj, NativeObj]:
  132. model = UnityModel()
  133. vocab = GptVocab()
  134. lib.unity_model_load(
  135. ctypes.create_string_buffer(str(model_file).encode("utf-8")),
  136. model.ptr,
  137. vocab.ptr,
  138. )
  139. return model, vocab
  140. lib.unity_graph.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
  141. lib.unity_graph.restype = ctypes.POINTER(ggml_cgraph)
  142. def unity_graph(model: NativeObj, tensor: ggml_tensor_p) -> ggml_cgraph_p:
  143. return lib.unity_graph(model.ptr, tensor) # type: ignore
  144. lib.unity_eval.argtypes = [
  145. ctypes.c_void_p,
  146. ctypes.c_void_p,
  147. ctypes.POINTER(ggml_tensor),
  148. ctypes.c_int,
  149. ]
  150. lib.unity_eval.restype = ctypes.POINTER(ggml_cgraph)
  151. def unity_eval(
  152. allocr: NativeObj, model: NativeObj, tensor: ggml_tensor_p, n_threads: int
  153. ) -> ggml_cgraph_p:
  154. return lib.unity_eval(allocr.ptr, model.ptr, tensor, n_threads)