test_unity_cpp.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import ggml
  2. import ctypes
  3. import torch
  4. import pytest
  5. import numpy as np
  6. from typing import Iterator
  7. from ggml import NativeObj
  8. Ctx = ggml.ggml_context_p
  9. PARAMS_16MB = ggml.ggml_init_params(mem_size=16 * 1024 * 1024, mem_buffer=None)
  10. @pytest.fixture(name="ctx")
  11. def _ctx() -> Iterator[Ctx]:
  12. """Allocate a new context with 16 MB of memory"""
  13. try:
  14. ctx = ggml.ggml_init(params=PARAMS_16MB)
  15. yield ctx
  16. finally:
  17. ggml.ggml_free(ctx)
  18. def test_ggml_bindings_work(ctx: Ctx) -> None:
  19. # Instantiate tensors
  20. x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  21. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  22. b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  23. # Use ggml operations to build a computational graph
  24. x2 = ggml.ggml_mul(ctx, x, x)
  25. f = ggml.ggml_add(ctx, ggml.ggml_mul(ctx, a, x2), b)
  26. gf = ggml.ggml_build_forward(f)
  27. # Set the input values
  28. ggml.ggml_set_f32(x, 2.0)
  29. ggml.ggml_set_f32(a, 3.0)
  30. ggml.ggml_set_f32(b, 4.0)
  31. # Compute the graph
  32. ggml.ggml_graph_compute_with_ctx(ctx, ctypes.pointer(gf), 1)
  33. # Get the output value
  34. output = ggml.ggml_get_f32_1d(f, 0)
  35. assert output == 16.0
  36. def test_shape_works(ctx: Ctx) -> None:
  37. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 10)
  38. assert ggml.shape(a) == (10,)
  39. b = ggml.ggml_new_tensor_2d(ctx, ggml.GGML_TYPE_F32, 11, 21)
  40. assert ggml.shape(b) == (11, 21)
  41. c = ggml.ggml_new_tensor_3d(ctx, ggml.GGML_TYPE_F32, 12, 22, 32)
  42. assert ggml.shape(c) == (12, 22, 32)
  43. @pytest.mark.xfail(
  44. reason="TODO: understand diff between ggml strides and numpy strides"
  45. )
  46. def test_strides_works(ctx: Ctx) -> None:
  47. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 10)
  48. assert ggml.strides(a) == np.ones((10,), dtype=np.float32).strides
  49. b = ggml.ggml_new_tensor_2d(ctx, ggml.GGML_TYPE_F32, 11, 21)
  50. assert ggml.strides(b) == np.ones((11, 21), dtype=np.float32).strides
  51. c = ggml.ggml_new_tensor_3d(ctx, ggml.GGML_TYPE_F32, 12, 22, 32)
  52. assert ggml.strides(c) == np.ones((12, 22, 32), dtype=np.float32).strides
  53. def test_to_numpy_works_with_f32(ctx: Ctx) -> None:
  54. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 10)
  55. a = ggml.ggml_set_f32(a, 2.14)
  56. assert np.allclose(ggml.to_numpy(a), np.ones((10,)) * 2.14)
  57. b = ggml.ggml_new_tensor_2d(ctx, ggml.GGML_TYPE_F32, 11, 21)
  58. assert np.allclose(ggml.to_numpy(b), np.zeros((11, 21)))
  59. c = ggml.ggml_new_tensor_3d(ctx, ggml.GGML_TYPE_F32, 12, 22, 32)
  60. assert np.allclose(ggml.to_numpy(c), np.zeros((12, 22, 32)))
  61. def test_from_numpy_works_with_f32(ctx: Ctx) -> None:
  62. a = np.random.normal(size=(10,)).astype(dtype=np.float32)
  63. ga = ggml.from_numpy(ctx, a)
  64. assert np.allclose(a, ggml.to_numpy(ga))
  65. a = np.random.normal(size=(11, 21)).astype(dtype=np.float32)
  66. ga = ggml.from_numpy(ctx, a)
  67. assert np.allclose(a, ggml.to_numpy(ga))
  68. a = np.random.normal(size=(12, 22, 32)).astype(dtype=np.float32)
  69. ga = ggml.from_numpy(ctx, a)
  70. assert np.allclose(a, ggml.to_numpy(ga))
  71. def test_to_numpy_works_with_f16(ctx: Ctx) -> None:
  72. # We explicitly fill the tensor otherwise they might have non-zero values in them.
  73. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F16, 10)
  74. a = ggml.ggml_set_f32(a, 2.14)
  75. assert np.allclose(ggml.to_numpy(a), np.ones((10,), dtype=np.float16) * 2.14)
  76. b = ggml.ggml_new_tensor_2d(ctx, ggml.GGML_TYPE_F16, 11, 21)
  77. b = ggml.ggml_set_f32(b, 4.18)
  78. assert np.allclose(ggml.to_numpy(b), np.ones((11, 21), dtype=np.float16) * 4.18)
  79. c = ggml.ggml_new_tensor_3d(ctx, ggml.GGML_TYPE_F16, 12, 22, 32)
  80. c = ggml.ggml_set_f32(c, 3.16)
  81. assert np.allclose(ggml.to_numpy(c), np.ones((12, 22, 32), dtype=np.float16) * 3.16)
  82. def test_from_numpy_works_with_f16(ctx: Ctx) -> None:
  83. a = np.random.normal(size=(10,)).astype(dtype=np.float16)
  84. ga = ggml.from_numpy(ctx, a)
  85. assert np.allclose(a, ggml.to_numpy(ga))
  86. a = np.random.normal(size=(11, 21)).astype(dtype=np.float16)
  87. ga = ggml.from_numpy(ctx, a)
  88. assert np.allclose(a, ggml.to_numpy(ga))
  89. a = np.random.normal(size=(12, 22, 32)).astype(dtype=np.float16)
  90. ga = ggml.from_numpy(ctx, a)
  91. assert np.allclose(a, ggml.to_numpy(ga))
  92. def test_unity_model_load() -> None:
  93. model, vocab = ggml.unity_model_load(
  94. "examples/unity/models/unity-large/ggml-model.bin"
  95. )
  96. print(model, vocab)
  97. with ggml.MeasureArena() as arena:
  98. # compute graph
  99. graph = ggml.unity_graph(model, arena)
  100. # required memory
  101. # TODO: why the extra padding ?
  102. mem_size = ggml.ggml_allocr_alloc_graph(arena.ptr, graph) + ggml.GGML_MEM_ALIGN
  103. compute_buffer = torch.zeros(mem_size, dtype=torch.uint8)
  104. with ggml.FixedSizeArena(mem_size) as allocr:
  105. print(f"unity_graph: compute buffer size: {mem_size/1024/1024} MB")
  106. eval_res_ptr = ggml.unity_eval(model, allocr, 1)
  107. eval_res = eval_res_ptr.contents
  108. inpL = ggml.to_numpy(eval_res.nodes[eval_res.n_nodes - 1])
  109. expected_raw = "-0.1308,0.0346,-0.2656,0.2873,-0.0104,0.0574,0.4033,-0.1125,-0.0460,-0.0496"
  110. expected = map(float, expected_raw.split(","))
  111. assert np.allclose(inpL[0, :10], list(expected), atol=1e-4)