ggml.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. """
  2. We are vendoring https://github.com/abetlen/ggml-python (MIT License)
  3. adding a few utilities to convert between ggml and numpy tensors for testing.
  4. """
  5. import numpy as np
  6. import ctypes
  7. import torch
  8. import functools
  9. from pathlib import Path
  10. from typing import Self
  11. from typing import Dict
  12. from typing import Callable
  13. from typing import Any
  14. from typing import Tuple
  15. from typing import Union
  16. from typing import Type
  17. from third_party_ggml import *
  18. ### Helpers
  19. def numpy_dtype(ggml_type: ctypes.c_int) -> type:
  20. if ggml_type == 0:
  21. # GGML_TYPE_F32 = 0,
  22. return np.float32
  23. if ggml_type == 1:
  24. # GGML_TYPE_F16 = 1,
  25. return np.float16
  26. raise NotImplementedError(f"Can't convert GGML_TYPE({ggml_type}) to a numpy.dtype")
  27. def from_numpy_dtype(dtype: np.dtype) -> ctypes.c_int:
  28. if dtype == np.float32:
  29. return ctypes.c_int(0)
  30. elif dtype == np.float16:
  31. return ctypes.c_int(1)
  32. raise NotImplementedError(f"Can't convert {dtype} to a GGML_TYPE")
  33. def shape(tensor: Union[ggml_tensor, ggml_tensor_p]) -> Tuple[int, ...]:
  34. if isinstance(tensor, ctypes._Pointer):
  35. tensor = tensor.contents
  36. ndims = tensor.n_dims
  37. return tuple([tensor.ne[i] for i in range(ndims)])
  38. def strides(tensor: Union[ggml_tensor, ggml_tensor_p]) -> Tuple[int, ...]:
  39. if isinstance(tensor, ctypes._Pointer):
  40. tensor = tensor.contents
  41. ndims = tensor.n_dims
  42. return tuple([tensor.nb[i] for i in range(ndims)])
  43. def to_numpy(tensor: Union[ggml_tensor, ggml_tensor_p]) -> np.ndarray:
  44. if isinstance(tensor, ctypes._Pointer):
  45. tensor = tensor.contents
  46. t_shape = shape(tensor)
  47. # Convert the ggml data pointer to a pointer to ints with the same size (float16 -> uint16)
  48. # This is needed because Python ctypes doesn't have "float16", and as_array only works with ctypes pointer
  49. type_size = ggml_type_size(tensor.type)
  50. int_width: type = getattr(ctypes, f"c_uint{8 * type_size}")
  51. ptr = ctypes.cast(tensor.data, ctypes.POINTER(int_width))
  52. # Create a numpy array with the wrong dtype
  53. int_arr = np.ctypeslib.as_array(ptr, shape=t_shape)
  54. # Reinterpret it to the right dtype
  55. res = np.frombuffer(int_arr, dtype=numpy_dtype(tensor.type)).reshape(t_shape)
  56. # TODO: assert strides / check contiguous
  57. # assert strides(tensor) == res.strides, "TODO: support strided tensor"
  58. return res
  59. GgmlShape = ctypes.c_int64 * GGML_MAX_DIMS
  60. def from_file(
  61. ctx: ggml_context_p, file: Path, shape: Tuple[int, ...], dtype: type = np.float32
  62. ) -> ggml_tensor_p:
  63. data = np.fromfile(str(file), dtype=dtype).reshape(shape) # type: ignore
  64. return from_numpy(ctx, data)
  65. def _pad_shape(shape: Tuple[int, ...]) -> Tuple[int, int, int, int]:
  66. if len(shape) >= 4:
  67. return shape
  68. padding = (1,) * (4 - len(shape))
  69. return shape + padding # type: ignore
  70. def from_numpy(ctx: ggml_context_p, array: Union[np.ndarray, "torch.Tensor"]) -> ggml_tensor_p:
  71. if type(array).__name__ == "Tensor":
  72. array = array.numpy()
  73. tensor_p = ggml_new_tensor(ctx, from_numpy_dtype(array.dtype), 1, GgmlShape())
  74. tensor_p.contents.n_dims = array.ndim
  75. tensor_p.contents.data = array.ctypes.data_as(ctypes.c_void_p)
  76. tensor_p.contents.ne = GgmlShape(*_pad_shape(array.shape))
  77. # print(f"array: {array.shape} @0x{array.ctypes.data_as(ctypes.c_void_p)}")
  78. # print(f"tensor_p: {shape(tensor_p)} @0x{tensor_p.contents.data:x}")
  79. # prevent the underlying numpy array to be freed
  80. setattr(tensor_p, "__data", array)
  81. return tensor_p
  82. class NativeObj:
  83. AllocFn = Callable[[], ctypes.c_void_p]
  84. FreeFn = Callable[[ctypes.c_void_p], None]
  85. _cache: Dict[str, Tuple[AllocFn, FreeFn]] = {}
  86. @classmethod
  87. def _init_c_func(cls, kind: str) -> Tuple[AllocFn, FreeFn]:
  88. if kind in cls._cache:
  89. return cls._cache[kind]
  90. alloc_fn = getattr(lib, f"{kind}_alloc")
  91. alloc_fn.argtypes = []
  92. alloc_fn.restype = ctypes.c_void_p
  93. free_fn = getattr(lib, f"{kind}_free")
  94. free_fn.argtypes = [ctypes.c_void_p]
  95. free_fn.restype = None
  96. cls._cache[kind] = (alloc_fn, free_fn)
  97. return (alloc_fn, free_fn)
  98. def __init__(self, kind: str, ptr: ctypes.c_void_p = NULL):
  99. self.kind = kind
  100. alloc_fn, self._free_fn = self._init_c_func(kind)
  101. self.ptr = alloc_fn() if ptr is None else ptr
  102. # print(self)
  103. def free(self) -> None:
  104. if self.ptr is not None:
  105. self._free_fn(self.ptr)
  106. # print(f"freeing {self}")
  107. self.ptr = NULL
  108. def __enter__(self) -> ctypes.c_void_p:
  109. return self.ptr
  110. def __exit__(self, *args: Any) -> None:
  111. self.free()
  112. def __del__(self) -> None:
  113. self.free()
  114. def __repr__(self) -> str:
  115. return f"<{self.kind} native object at 0x{self.ptr:x}>"
  116. def MeasureArena() -> NativeObj:
  117. return NativeObj("ggml_allocr", ggml_allocr_new_measure(GGML_MEM_ALIGN))
  118. def FixedSizeArena(mem_size: int) -> NativeObj:
  119. memory = torch.zeros(mem_size, dtype=torch.uint8)
  120. allocr = ggml_allocr_new(
  121. ctypes.c_void_p(memory.data_ptr()), mem_size, GGML_MEM_ALIGN
  122. )
  123. arena = NativeObj("ggml_allocr", allocr)
  124. # Add a reference from the arena object to the underlying tensor, otherwise it will be freed to early.
  125. setattr(arena, "__memory", memory)
  126. return arena
  127. def UnityModel() -> NativeObj:
  128. return NativeObj("unity_model")
  129. def GptVocab() -> NativeObj:
  130. return NativeObj("gpt_vocab")
  131. def Fairseq2Model() -> NativeObj:
  132. return NativeObj("fairseq2_model")
  133. lib.std_string_alloc.argtypes = [ctypes.c_char_p]
  134. lib.std_string_alloc.restype = ctypes.c_void_p
  135. lib.std_string_free.argtypes = [ctypes.c_void_p]
  136. lib.std_string_free.restype = None
  137. NativeObj._cache["std_string"] = (lib.std_string_alloc, lib.std_string_free)
  138. @functools.lru_cache(1024)
  139. def CppStr(content: str) -> NativeObj:
  140. c_str = ctypes.create_string_buffer(content.encode("utf-8"))
  141. cpp_str = lib.std_string_alloc(c_str)
  142. return NativeObj("std_string", cpp_str)
  143. lib.unity_model_load.argtypes = [ctypes.c_char_p, ctypes.c_void_p, ctypes.c_void_p]
  144. def unity_model_load(model_file: Path) -> Tuple[NativeObj, NativeObj]:
  145. model = UnityModel()
  146. vocab = GptVocab()
  147. lib.unity_model_load(
  148. ctypes.create_string_buffer(str(model_file).encode("utf-8")),
  149. model.ptr,
  150. vocab.ptr,
  151. )
  152. return model, vocab
  153. lib.load_unity_ggml_file.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
  154. lib.load_unity_ggml_file.restype = None
  155. def load_unity_ggml_file(model_file: Path) -> NativeObj:
  156. model = Fairseq2Model()
  157. bytes_file = ctypes.create_string_buffer(str(model_file).encode("utf-8"))
  158. lib.load_unity_ggml_file(model.ptr, bytes_file)
  159. return model
  160. lib.unity_audio_encoder_graph.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
  161. lib.unity_audio_encoder_graph.restype = ctypes.POINTER(ggml_cgraph)
  162. def unity_audio_encoder_graph(model: NativeObj, tensor: ggml_tensor_p) -> ggml_cgraph_p:
  163. return lib.unity_audio_encoder_graph(model.ptr, tensor) # type: ignore
  164. lib.unity_eval.argtypes = [
  165. ctypes.c_void_p,
  166. ctypes.c_void_p,
  167. ctypes.POINTER(ggml_tensor),
  168. ctypes.c_int,
  169. ]
  170. lib.unity_eval.restype = ctypes.POINTER(ggml_cgraph)
  171. def unity_eval(
  172. allocr: ctypes.c_void_p, model: NativeObj, tensor: ggml_tensor_p, n_threads: int
  173. ) -> ggml_cgraph_p:
  174. return lib.unity_eval(allocr, model.ptr, tensor, n_threads)
  175. _FORWARD_CACHE: Dict[str, Callable[[...], ggml_tensor_p]] = {}
  176. def forward(
  177. layer_name: str, model: NativeObj, prefix: str, *inputs: ggml_tensor_p
  178. ) -> ggml_tensor_p:
  179. fwd: Any = _FORWARD_CACHE.get(layer_name)
  180. if fwd is None:
  181. fwd = getattr(lib, layer_name + "_forward")
  182. num_inputs = len(inputs)
  183. fwd.argtypes = [ctypes.c_void_p, ctypes.c_void_p] + [
  184. ctypes.POINTER(ggml_tensor)
  185. ] * num_inputs
  186. fwd.restype = ctypes.POINTER(ggml_tensor)
  187. _FORWARD_CACHE[layer_name] = fwd
  188. with CppStr(prefix) as std_prefix:
  189. return fwd(model.ptr, std_prefix, *inputs) # ignore: type[no-any-return]