1 year ago · 4afcf8e68a
--- a/scripts/watermarking/watermarking.py
+++ b/scripts/watermarking/watermarking.py
@@ -10,7 +10,7 @@
 
															 import math
														
 
															 from argparse import ArgumentParser, ArgumentTypeError
														
 
															 from pathlib import Path
														
 
															-from typing import Any, Dict, Optional, Union, cast
														
 
															+from typing import Any, Dict, Union
														
 
															 import audiocraft
														
 
															 import omegaconf
														
@@ -115,6 +115,7 @@ class Watermarker(nn.Module):
 
															 def model_from_checkpoint(
														
 
															     config_file: Union[Path, str] = "seamlesswatermark.yaml",
														
 
															+    checkpoint: str = "",
														
 
															     device: Union[torch.device, str] = "cpu",
														
 
															     dtype: DataType = torch.float32,
														
 
															 ) -> Watermarker:
														
@@ -151,7 +152,11 @@ def model_from_checkpoint(
 
															     """
														
 
															     config_path = Path(__file__).parent / config_file
														
 
															     cfg = omegaconf.OmegaConf.load(config_path)
														
 
															-    state: Dict[str, Any] = torch.load(cfg["checkpoint"], map_location=device)
														
 
															+    if checkpoint and Path(checkpoint).is_file():
														
 
															+        ckpt = checkpoint
														
 
															+    else:
														
 
															+        ckpt = cfg["checkpoint"]
														
 
															+    state: Dict[str, Any] = torch.load(ckpt, map_location=device)
														
 
															     if "model" in state and "xp.cfg" in state:
														
 
															         cfg = omegaconf.OmegaConf.create(state["xp.cfg"])
														
 
															         omegaconf.OmegaConf.resolve(cfg)
														
@@ -188,7 +193,13 @@ def get_detector(cfg: omegaconf.DictConfig):
 
															     kwargs.pop("decoder")
														
 
															     kwargs.pop("encoder")
														
 
															     encoder_kwargs = {**kwargs, **encoder_override_kwargs}
														
 
															-    output_hidden_dim = 8
														
 
															+
														
 
															+    # Some new checkpoints of watermarking was trained on a newer code, where
														
 
															+    # `output_hidden_dim` is renamed to `output_dim`
														
 
															+    if "output_dim" in encoder_kwargs:
														
 
															+        output_hidden_dim = encoder_kwargs.pop("output_dim")
														
 
															+    else:
														
 
															+        output_hidden_dim = 8
														
 
															     encoder = SEANetEncoderKeepDimension(output_hidden_dim, **encoder_kwargs)
														
 
															     last_layer = torch.nn.Conv1d(output_hidden_dim, 2, 1)
														
@@ -220,6 +231,12 @@ if __name__ == "__main__":
 
															         type=str,
														
 
															         help="path to a config or checkpoint file (default: %(default)s)",
														
 
															     )
														
 
															+    parser.add_argument(
														
 
															+        "--checkpoint",
														
 
															+        default="",
														
 
															+        type=str,
														
 
															+        help="inline argument to override the value `checkpoint` specified in the file `model-file`",
														
 
															+    )
														
 
															     sub_parser = parser.add_subparsers(title="actions", dest="sub_cmd")
														
 
															     detect_parser = sub_parser.add_parser("detect")
														
 
															     wm_parser = sub_parser.add_parser("wm")
														
@@ -228,7 +245,7 @@ if __name__ == "__main__":
 
															     args = parser.parse_args()
														
 
															     if args.sub_cmd == "detect":
														
 
															-        model = model_from_checkpoint(args.model_file, device=args.device)
														
 
															+        model = model_from_checkpoint(args.model_file, checkpoint=args.checkpoint, device=args.device)
														
 
															         wav, _ = torchaudio.load(args.file)
														
 
															         wav = wav.unsqueeze(0)
														
 
															         wav = wav.to(args.device)
														
--- a/src/seamless_communication/models/generator/loader.py
+++ b/src/seamless_communication/models/generator/loader.py
@@ -5,8 +5,6 @@
 
															 # MIT_LICENSE file in the root directory of this source tree.
														
 
															-from typing import Any, Mapping
														
 
															-
														
 
															 from fairseq2.assets import asset_store, download_manager
														
 
															 from fairseq2.models.utils import ConfigLoader, ModelLoader
														
--- a/tests/common.py
+++ b/tests/common.py
@@ -56,7 +56,7 @@ def assert_unit_close(
 
															     if percent_unit_tol > 0.0:
														
 
															         num_unit_tol = int(percent_unit_tol * len(a))
														
 
															-    num_unit_diff = (a != b).sum()
														
 
															+    num_unit_diff = (a != b).sum()  # type: ignore
														
 
															     assert (
														
 
															         num_unit_diff <= num_unit_tol
														
 
															     ), f"The difference is beyond tolerance, {num_unit_diff} units are different, tolerance is {num_unit_tol}"
														
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -37,6 +37,10 @@ def pytest_addoption(parser: pytest.Parser) -> None:
 
															 def pytest_sessionstart(session: pytest.Session) -> None:
														
 
															     tests.common.device = cast(Device, session.config.getoption("device"))
														
 
															+    from fairseq2.assets import asset_store
														
 
															+
														
 
															+    asset_store.env_resolvers.append(lambda: "integ_test")
														
 
															+
														
 
															 @pytest.fixture(scope="module")
														
 
															 def example_rate16k_audio() -> AudioDecoderOutput:
														
--- a/tests/integration/models/test_watermarked_vocoder.py
+++ b/tests/integration/models/test_watermarked_vocoder.py
@@ -8,6 +8,7 @@ import sys
 
															 from argparse import Namespace
														
 
															 from pathlib import Path
														
 
															 from typing import Final, List, Optional, cast
														
 
															+import os
														
 
															 import pytest
														
 
															 import torch
														
@@ -51,7 +52,9 @@ def load_watermarking_model() -> Optional[Module]:
 
															     assert wm_spec.loader, f"Module cannot be loaded from {wm_py_file}"
														
 
															     wm_spec.loader.exec_module(wm_py_module)
														
 
															-    return cast(Module, wm_py_module.model_from_checkpoint(device=device, dtype=dtype))
														
 
															+    ckpt = os.getenv("SEAMLESS_WM_CKPT", "")
														
 
															+
														
 
															+    return cast(Module, wm_py_module.model_from_checkpoint(device=device, checkpoint=ckpt, dtype=dtype))
														
 
															 @pytest.mark.parametrize("sr", [16_000, 24_000])
														
@@ -133,7 +136,9 @@ def test_pretssel_vocoder_watermarking(
 
															     # Test that the watermark is detectable
														
 
															     detection = watermarker.detect_watermark(wav_wm)  # type: ignore
														
 
															-    assert torch.all(detection[:, 1, :] > 0.5)
														
 
															+
														
 
															+    # 0.9 is the current lower bound of Watermarking w.r.t all attacks
														
 
															+    assert torch.count_nonzero(torch.gt(detection[:, 1, :], 0.5)) / detection.shape[-1] > 0.9
														
 
															     # Remove the batch and compare parity on the overlapping frames
														
 
															     wav_wm = wav_wm.squeeze(0)