utils.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. """
  2. Common helpers for working with ggml + numpy
  3. """
  4. from ggml import ffi, lib
  5. from typing import Union, Optional
  6. import numpy as np
  7. def init(mem_size: int, mem_buffer: ffi.CData = ffi.NULL, no_alloc: bool = False) -> ffi.CData:
  8. """
  9. Initialize a ggml context, which will be freed automatically when the pointer is garbage collected.
  10. """
  11. params = ffi.new('struct ggml_init_params*')
  12. params.mem_size = mem_size
  13. params.mem_buffer = mem_buffer
  14. params.no_alloc = no_alloc
  15. return ffi.gc(lib.ggml_init(params[0]), lib.ggml_free)
  16. TensorLike = Union[ffi.CData, np.ndarray]
  17. def copy(from_tensor: TensorLike, to_tensor: TensorLike, allow_requantize: bool = True):
  18. """
  19. Copy the contents of one tensor to another, doing any necessary (de/re)quantization transparently.
  20. Works across numpy & ggml tensors, but they must have the same shape (and be contiguous).
  21. Parameters
  22. ----------
  23. from_tensor : TensorLike
  24. The tensor to copy from (a numpy array or possibly-quantized ggml tensor)
  25. to_tensor : TensorLike
  26. The tensor to copy to (a numpy array or possibly-quantized ggml tensor)
  27. allow_requantize : bool
  28. If False, will throw an error if requantization is required (i.e. both from_tensor
  29. and to_tensor are quantized with different quantization types)
  30. """
  31. if id(from_tensor) == id(to_tensor):
  32. return
  33. __expect_same_layout("source", from_tensor, "destination", to_tensor)
  34. __check_shape_consistent_with_type(from_tensor)
  35. __check_shape_consistent_with_type(to_tensor)
  36. from_type = __get_type(from_tensor)
  37. to_type = __get_type(to_tensor)
  38. if from_type == to_type:
  39. ffi.memmove(__get_data(to_tensor), __get_data(from_tensor), __get_nbytes(from_tensor))
  40. else:
  41. assert allow_requantize or not lib.ggml_is_quantized(from_type) or not lib.ggml_is_quantized(to_type), \
  42. f"Requantizing from {__type_name(from_type)} to {__type_name(to_type)} is disabled. Force with allow_requantize=True"
  43. __set_floats(to_tensor, __get_floats(from_tensor))
  44. def numpy(tensor: ffi.CData, allow_copy: Union[bool, np.ndarray] = False, allow_requantize=False) -> np.ndarray:
  45. """
  46. Convert a ggml tensor to a numpy array.
  47. If the tensor isn't quantized, the returned numpy array will be a view over its data.
  48. If it is quantized (and allow_copy is True), the copy will involve dequantization and the returned array will
  49. be a copy of the original tensor (any changes to the numpy array won't then be reflected back to the tensor).
  50. Parameters
  51. ----------
  52. tensor : ffi.CData
  53. The tensor to convert to a numpy array
  54. allow_copy : bool or np.ndarray
  55. If False, will throw an error if the tensor is quantized (since dequantization requires extra memory).
  56. If True, will dequantize the tensor and return a copy of the data in a new float32 numpy array.
  57. If an np.ndarray, will copy the data into the given array (which must be the same shape as the tensor) when dequantization is needed
  58. allow_requantize : bool
  59. If allow_copy is a tensor with a different quantization type than the source tensor, will throw an error unless allow_requantize is True.
  60. """
  61. shape = __get_shape(tensor)
  62. if lib.ggml_is_quantized(tensor.type):
  63. if allow_copy == False:
  64. raise ValueError(f"{__describe(tensor)} is quantized, conversion to numpy requires a copy (pass allow_copy=True; changes to the numpy array won't affect the original).")
  65. elif isinstance(allow_copy, np.ndarray):
  66. __expect_same_layout("source tensor", tensor, "dequantization output tensor", allow_copy)
  67. destination = allow_copy
  68. else:
  69. destination = np.empty(shape, dtype=np.float32)
  70. copy(tensor, destination, allow_requantize=allow_requantize)
  71. return destination
  72. else:
  73. dtype = __type_to_dtype(tensor.type)
  74. if not dtype:
  75. raise NotImplementedError(f'Cannot convert {__describe(tensor)} to numpy')
  76. assert __is_contiguous(tensor), f"Cannot convert {__describe(tensor)} to numpy (support contiguous tensors only)"
  77. nbytes = lib.ggml_nelements(tensor) * lib.ggml_type_size(tensor.type)
  78. array = np.frombuffer(ffi.buffer(lib.ggml_get_data(tensor), nbytes), dtype=dtype)
  79. array.shape = shape
  80. return array
  81. def __type_name(type: int) -> str:
  82. name = lib.ggml_type_name(type)
  83. return ffi.string(name).decode('utf-8') if name else None
  84. __k_quant_types = set([
  85. lib.GGML_TYPE_Q2_K,
  86. lib.GGML_TYPE_Q3_K,
  87. lib.GGML_TYPE_Q4_K,
  88. lib.GGML_TYPE_Q5_K,
  89. lib.GGML_TYPE_Q6_K,
  90. lib.GGML_TYPE_Q8_K,
  91. ])
  92. __type_to_dtype_dict = {
  93. lib.GGML_TYPE_I8: np.int8,
  94. lib.GGML_TYPE_I16: np.int16,
  95. lib.GGML_TYPE_I32: np.int32,
  96. lib.GGML_TYPE_F16: np.float16,
  97. lib.GGML_TYPE_F32: np.float32,
  98. }
  99. def __type_to_dtype(type: int) -> Optional[np.dtype]: return __type_to_dtype_dict.get(type)
  100. def __dtype_to_type(dtype: np.dtype):
  101. if dtype == np.float32: return lib.GGML_TYPE_F32
  102. elif dtype == np.float16: return lib.GGML_TYPE_F16
  103. elif dtype == np.int32: return lib.GGML_TYPE_I32
  104. elif dtype == np.int16: return lib.GGML_TYPE_I16
  105. elif dtype == np.int8: return lib.GGML_TYPE_I8
  106. else: raise ValueError(f"Unsupported dtype: {dtype}")
  107. def __describe(tensor: ffi.CType): return f'Tensor[{__type_name(__get_type(tensor))}, {__get_shape(tensor)}]'
  108. def __get_type(tensor: TensorLike): return __dtype_to_type(tensor.dtype) if isinstance(tensor, np.ndarray) else tensor.type
  109. def __get_shape(x: TensorLike): return x.shape if isinstance(x, np.ndarray) else tuple([x.ne[i] for i in range(x.n_dims)])
  110. def __get_strides(x: TensorLike): return x.strides if isinstance(x, np.ndarray) else tuple([x.nb[i] for i in range(x.n_dims)])
  111. def __get_data(x: TensorLike) -> ffi.CData: return ffi.from_buffer(x) if isinstance(x, np.ndarray) else lib.ggml_get_data(x)
  112. def __get_nbytes(tensor: TensorLike): return tensor.nbytes if isinstance(tensor, np.ndarray) else lib.ggml_nbytes(tensor)
  113. def __get_nelements(tensor: TensorLike): return tensor.size if isinstance(tensor, np.ndarray) else lib.ggml_nelements(tensor)
  114. def __is_contiguous(tensor: TensorLike): return tensor.flags['C_CONTIGUOUS'] if isinstance(tensor, np.ndarray) else lib.ggml_is_contiguous(tensor)
  115. def __get_floats(tensor: TensorLike) -> ffi.CData:
  116. data, type = __get_data(tensor), __get_type(tensor)
  117. if type == lib.GGML_TYPE_F32:
  118. return ffi.cast('float*', data)
  119. else:
  120. nelements = __get_nelements(tensor)
  121. floats = ffi.new('float[]', nelements)
  122. if type == lib.GGML_TYPE_F16:
  123. lib.ggml_fp16_to_fp32_row(ffi.cast('uint16_t*', data), floats, nelements)
  124. elif lib.ggml_is_quantized(type):
  125. qtype = lib.ggml_internal_get_type_traits(type)
  126. assert qtype.to_float, f"Type {__type_name(type)} is not supported by ggml"
  127. qtype.to_float(data, floats, nelements)
  128. else:
  129. raise NotImplementedError(f'Cannot read floats from {__describe(tensor)}')
  130. return floats
  131. def __set_floats(tensor: TensorLike, f32_data: ffi.CData) -> None:
  132. data, type, nbytes = __get_data(tensor), __get_type(tensor), __get_nbytes(tensor)
  133. if type == lib.GGML_TYPE_F32:
  134. ffi.memmove(data, f32_data, nbytes)
  135. else:
  136. nelements = __get_nelements(tensor)
  137. if type == lib.GGML_TYPE_F16:
  138. lib.ggml_fp32_to_fp16_row(f32_data, ffi.cast('uint16_t*', data), nelements)
  139. elif lib.ggml_is_quantized(type):
  140. qtype = lib.ggml_internal_get_type_traits(type)
  141. assert qtype.from_float, f"Type {__type_name(type)} is not supported by ggml"
  142. qtype.from_float(f32_data, data, nelements)
  143. else:
  144. raise NotImplementedError(f'Cannot write floats to {__describe(tensor)}')
  145. def __expect_same_layout(name1: str, tensor1: TensorLike, name2: str, tensor2: TensorLike):
  146. shape1, shape2 = __get_shape(tensor1), __get_shape(tensor2)
  147. assert shape1 == shape2, f"Shape mismatch: {name1} has {shape1} but {name2} has {shape2}"
  148. assert __is_contiguous(tensor1) and __is_contiguous(tensor2), f"Only contiguous tensors are supported (got {name1} with strides {__get_strides(tensor1)} and {name2} with strides {__get_strides(tensor2)})"
  149. def __check_shape_consistent_with_type(tensor: TensorLike):
  150. type = __get_type(tensor)
  151. if not lib.ggml_is_quantized(type):
  152. return
  153. shape = __get_shape(tensor)
  154. block_size = lib.ggml_blck_size(type)
  155. assert not (block_size == 0 and type in __k_quant_types), f"Can't quantize, native library was not compiled with USE_K_QUANTS!"
  156. assert block_size > 0, f"Invalid block size {block_size} for type {__type_name(type)}"
  157. for i, d in enumerate(shape):
  158. assert d % block_size == 0, f"Dimension {i} of {__describe(tensor)} is not divisible by {block_size}, required for quantization."