1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- from ggml import ffi, lib
- from ggml.utils import init, numpy, copy
- import numpy as np
- from math import pi, cos, sin, ceil
- import matplotlib.pyplot as plt
- ctx = init(mem_size=100*1024*1024) # Will be auto-GC'd
- n = 256
- orig = np.array([
- [
- cos(j * 2 * pi / n) * (sin(i * 2 * pi / n))
- for j in range(n)
- ]
- for i in range(n)
- ], np.float32)
- orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n)
- copy(orig, orig_tensor)
- quants = [
- type for type in range(lib.GGML_TYPE_COUNT)
- if lib.ggml_is_quantized(type) and
- type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] # Apparently not supported
- ]
- # quants = [lib.GGML_TYPE_Q2_K] # Test a single one
- def get_name(type):
- name = lib.ggml_type_name(type)
- return ffi.string(name).decode('utf-8') if name else '?'
- quants.sort(key=get_name)
- quants.insert(0, None)
- print(quants)
- ncols=4
- nrows = ceil(len(quants) / ncols)
- plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight')
- for i, type in enumerate(quants):
- plt.subplot(nrows, ncols, i + 1)
- try:
- if type == None:
- plt.title('Original')
- plt.imshow(orig)
- else:
- quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n)
- copy(orig_tensor, quantized_tensor)
- quantized = numpy(quantized_tensor, allow_copy=True)
- d = quantized - orig
- results = {
- "l2": np.linalg.norm(d, 2),
- "linf": np.linalg.norm(d, np.inf),
- "compression":
- round(lib.ggml_nbytes(orig_tensor) /
- lib.ggml_nbytes(quantized_tensor), 1)
- }
- name = get_name(type)
- print(f'{name}: {results}')
- plt.title(f'{name} ({results["compression"]}x smaller)')
- plt.imshow(quantized, interpolation='nearest')
-
- except Exception as e:
- print(f'Error: {e}')
- plt.show()
|