瀏覽代碼

remove un-needed code

Guillaume Wenzek 1 年之前
父節點
當前提交
a80a3b49f3
共有 4 個文件被更改,包括 15 次插入202 次删除
  1. 0 2
      ggml/examples/unity/model_loader.h
  2. 0 40
      ggml/examples/unity/unity_model_loader.cpp
  3. 0 123
      ggml/examples/unity/unity_model_loader.h
  4. 15 37
      ggml/ggml.py

+ 0 - 2
ggml/examples/unity/model_loader.h

@@ -25,8 +25,6 @@ public:
 
     virtual std::size_t compute_context_size(void *raw_hparams) = 0;
 
-    virtual void tensors_alloc(fairseq2_model& model) = 0;
-
     int load_model_weights(fairseq2_model &model, std::ifstream &fin);
 
 private:

+ 0 - 40
ggml/examples/unity/unity_model_loader.cpp

@@ -28,46 +28,6 @@ unity_model_loader::compute_context_size(void* raw_hparams)
     return hparams->model_byte_size;
 };
 
-struct UnityArch {
-    struct TransformerDecoder text_decoder;
-};
-
-void unity_model_loader::tensors_alloc(fairseq2_model &model)
-{
-    auto hparams = (unity_hparams&)model.hparams;
-    auto& arch = (UnityArch&)model.arch;
-    const auto ctx = model.ctx;
-    auto tensors = model.tensors;
-
-    const auto vocab_size = hparams.nllb_config__vocabulary_size;
-    const auto model_dim = hparams.nllb_config__model_dim;
-
-    // This can be simplified by adding syntax sugar
-
-    // frontend
-    // arch.frontend_embed_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, vocab_size, dim);
-    // tensor_map["text_decoder_frontend.embed.weight"] = arch.frontend_embed_w;
-
-    // layers
-    {
-        const auto n_layers = hparams.nllb_config__num_decoder_layers;
-        arch.text_decoder.layers = std::vector<TransformerDecoderLayer>(n_layers);
-        auto layers = arch.text_decoder.layers;
-        auto num_heads = hparams.nllb_config__num_decoder_attn_heads;
-        for (int i = 0; i < n_layers; ++i) {
-            auto prefix = "text_decoder.layers." + std::to_string(i);
-            MultiheadAttention_init(layers[i].self_attn, model, prefix + "self_attn", model_dim, num_heads);
-            LayerNorm_init(layers[i].self_attn_norm, model, prefix + "self_attn_norm", model_dim);
-        }
-    }
-
-    // // layer_norm
-    // arch.layer_norm_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    // tensor_map["text_decoder.layer_norm.weight"] = arch.layer_norm_w;
-    // arch.layer_norm_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    // tensor_map["text_decoder.layer_norm.bias"] = arch.layer_norm_b;
-};
-
 extern "C" int load_unity_ggml_file(fairseq2_model& model, const char* fname) {
     return load_fairseq2_ggml_file<unity_model_loader>(model, fname);
 }

+ 0 - 123
ggml/examples/unity/unity_model_loader.h

@@ -129,132 +129,9 @@ void read_unity_hparams(unity_hparams* out, std::ifstream &fin) {
 
 };
 
-
-
-
-
-// Embedding
-std::size_t compute_embed_size(int32_t vocab_size, int32_t dim)
-{
-    return vocab_size * dim * ggml_type_size(GGML_TYPE_F32);
-};
-
-// Attention Layer
-
-struct attention_layer {
-    struct ggml_tensor* layer_norm_w; // model_dim
-    struct ggml_tensor* layer_norm_b; // model_dim
-
-    struct ggml_tensor* q_proj_w; // model_dim x model_dim
-    struct ggml_tensor* q_proj_b; // model_dim
-    struct ggml_tensor* k_proj_w; // model_dim x model_dim
-    struct ggml_tensor* k_proj_b; // model_dim
-    struct ggml_tensor* v_proj_w; // model_dim x model_dim
-    struct ggml_tensor* v_proj_b; // model_dim
-
-    struct ggml_tensor* output_proj_w; // model_dim x model_dim
-    struct ggml_tensor* output_proj_b; // model_dim
-};
-
-std::size_t compute_attention_layer_size(int32_t dim)
-{
-    return LayerNorm_size(dim)
-        + 4 * Linear_size(dim, dim); // q, k, v, and out
-};
-
-void init_attention_layer(
-    attention_layer *layer,
-    fairseq2_model &model_ctx,
-    const std::string &prefix)
-{
-    auto hparams = (unity_hparams&)model_ctx.hparams;
-    const auto dim = hparams.nllb_config__model_dim;
-    auto ctx = model_ctx.ctx;
-    auto &tensor_map = model_ctx.tensors;
-
-    layer->layer_norm_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    tensor_map[prefix + "_layer_norm.weight"] = layer->layer_norm_w;
-    layer->layer_norm_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    tensor_map[prefix + "_layer_norm.bias"] = layer->layer_norm_b;
-
-    layer->q_proj_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, dim);
-    tensor_map[prefix + ".q_proj.weight"] = layer->q_proj_w;
-    layer->q_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    tensor_map[prefix + ".q_proj.bias"] = layer->q_proj_b;
-
-    layer->k_proj_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, dim);
-    tensor_map[prefix + ".k_proj.weight"] = layer->k_proj_w;
-    layer->k_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    tensor_map[prefix + ".k_proj.bias"] = layer->k_proj_b;
-
-    layer->v_proj_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, dim);
-    tensor_map[prefix + ".v_proj.weight"] = layer->v_proj_w;
-    layer->v_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    tensor_map[prefix + ".v_proj.bias"] = layer->v_proj_b;
-
-    layer->output_proj_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, dim);
-    tensor_map[prefix + ".output_proj.weight"] = layer->output_proj_w;
-    layer->output_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim);
-    tensor_map[prefix + ".output_proj.bias"] = layer->output_proj_b;
-}
-
-
-// Attention Head
-
-struct attention_head {
-    struct attention_layer* self_attn; // model_dim
-    struct attention_layer* encoder_decoder_attn; // model_dim
-    struct StandardFeedForwardNetwork* ffn;
-};
-
-std::size_t compute_attention_head_size(int32_t dim, int32_t inner_dim)
-{
-    return 2 * compute_attention_layer_size(dim) + StandardFeedForwardNetwork_size(dim, inner_dim);
-};
-
-void init_attention_head(
-    attention_head *head,
-    fairseq2_model &model_ctx,
-    const std::string &prefix)
-{
-    auto hparams = (unity_hparams&)model_ctx.hparams;
-    init_attention_layer(head->self_attn, model_ctx, prefix + ".self_attn");
-    init_attention_layer(head->encoder_decoder_attn, model_ctx, prefix + ".encoder_decoder_attn");
-    StandardFeedForwardNetwork_init((StandardFeedForwardNetwork&)(head->ffn), model_ctx, prefix + ".ffn", hparams.nllb_config__model_dim, hparams.nllb_config__ffn_inner_dim);
-}
-
-// TODO: attention_head_compute_graph
-
-// Text Decoder
-
-struct text_decoder {
-    struct ggml_tensor* frontend_embed_w; // vocab_size x model_dim
-    std::vector<attention_head*> multi_head;
-    struct ggml_tensor* layer_norm_w;
-    struct ggml_tensor* layer_norm_b;
-};
-
-std::size_t compute_context_size(void* raw_hparams)
-{
-    auto hparams = (unity_hparams&)raw_hparams;
-    const auto vocab_size = hparams.nllb_config__vocabulary_size;
-    const auto dim = hparams.nllb_config__model_dim;
-    const auto inner_dim = hparams.nllb_config__ffn_inner_dim;
-    const auto n_layers = hparams.nllb_config__num_decoder_layers;
-
-    const auto overhead = (6 + 12 * n_layers) * 512; // TODO Find out what this is.
-
-    return compute_embed_size(vocab_size, dim)
-        + n_layers * compute_attention_head_size(dim, inner_dim)
-        + LayerNorm_size(dim)
-        + overhead;
-};
-
 class unity_model_loader: public model_loader {
     public:
     void load_hparams(fairseq2_model& model, std::ifstream &fin);
 
     std::size_t compute_context_size(void* raw_hparams);
-
-    void tensors_alloc(fairseq2_model &model);
 };

+ 15 - 37
ggml/ggml.py

@@ -210,14 +210,6 @@ def FixedSizeArena(mem_size: int) -> NativeObj:
     return arena
 
 
-def UnityModel() -> NativeObj:
-    return NativeObj("unity_model")
-
-
-def GptVocab() -> NativeObj:
-    return NativeObj("gpt_vocab")
-
-
 lib.fairseq2_model_set_inference_ctx.argtypes = [ctypes.c_void_p, ggml_context_p]
 
 
@@ -239,20 +231,6 @@ def CppStr(content: str) -> NativeObj:
     return NativeObj("std_string", cpp_str)
 
 
-lib.unity_model_load.argtypes = [ctypes.c_char_p, ctypes.c_void_p, ctypes.c_void_p]
-
-
-def unity_model_load(model_file: Path) -> Tuple[NativeObj, NativeObj]:
-    model = UnityModel()
-    vocab = GptVocab()
-    lib.unity_model_load(
-        ctypes.create_string_buffer(str(model_file).encode("utf-8")),
-        model.ptr,
-        vocab.ptr,
-    )
-    return model, vocab
-
-
 lib.load_unity_ggml_file.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
 lib.load_unity_ggml_file.restype = ctypes.c_int
 
@@ -266,27 +244,27 @@ def load_unity_ggml_file(model_file: Path) -> NativeObj:
     return model
 
 
-lib.unity_audio_encoder_graph.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
-lib.unity_audio_encoder_graph.restype = ctypes.POINTER(ggml_cgraph)
+# lib.unity_audio_encoder_graph.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
+# lib.unity_audio_encoder_graph.restype = ctypes.POINTER(ggml_cgraph)
 
 
-def unity_audio_encoder_graph(model: NativeObj, tensor: ggml_tensor_p) -> ggml_cgraph_p:
-    return lib.unity_audio_encoder_graph(model.ptr, tensor)  # type: ignore
+# def unity_audio_encoder_graph(model: NativeObj, tensor: ggml_tensor_p) -> ggml_cgraph_p:
+#     return lib.unity_audio_encoder_graph(model.ptr, tensor)  # type: ignore
 
 
-lib.unity_eval.argtypes = [
-    ctypes.c_void_p,
-    ctypes.c_void_p,
-    ctypes.POINTER(ggml_tensor),
-    ctypes.c_int,
-]
-lib.unity_eval.restype = ctypes.POINTER(ggml_cgraph)
+# lib.unity_eval.argtypes = [
+#     ctypes.c_void_p,
+#     ctypes.c_void_p,
+#     ctypes.POINTER(ggml_tensor),
+#     ctypes.c_int,
+# ]
+# lib.unity_eval.restype = ctypes.POINTER(ggml_cgraph)
 
 
-def unity_eval(
-    allocr: ctypes.c_void_p, model: NativeObj, tensor: ggml_tensor_p, n_threads: int
-) -> ggml_cgraph_p:
-    return lib.unity_eval(allocr, model.ptr, tensor, n_threads)
+# def unity_eval(
+#     allocr: ctypes.c_void_p, model: NativeObj, tensor: ggml_tensor_p, n_threads: int
+# ) -> ggml_cgraph_p:
+#     return lib.unity_eval(allocr, model.ptr, tensor, n_threads)
 
 
 _FORWARD_CACHE: Dict[str, Callable[..., ggml_tensor_p]] = {}