1127 update to latest

2025-11-27 15:44:17 +08:00
parent e16c84aab2
commit a34d39430e
153 changed files with 25705 additions and 53 deletions
--- a/dllm/scripts/tests/test_attention_mask.py
+++ b/dllm/scripts/tests/test_attention_mask.py
@ -0,0 +1,144 @@
+"""
+LLaDA / MoE / Dream / RND attention mask invariance tests (compact version)
+"""
+
+import gc
+
+import torch
+import transformers
+import dllm
+import pytest
+
+ERROR_THRESHOLD = 1e-3
+
+
+def _cuda_cleanup():
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        # Reclaim interprocess memory blocks (useful after large model del)
+        try:
+            torch.cuda.ipc_collect()
+        except Exception:
+            # Not all PyTorch builds expose ipc_collect on all platforms
+            pass
+
+
+def _forward_variants(model):
+    """
+    Run the 5 padding/mask variants and return tensors sliced to the 'real' tokens [1,2,3,4].
+    Returns dict: {'A','B','C','D','E'} each [1, 4, H]
+    """
+    device = model.device
+
+    # A: no padding
+    a_ids = torch.tensor([[1, 2, 3, 4]], device=device)
+    a_mask = torch.tensor([[1, 1, 1, 1]], device=device)
+
+    # B: left-pad a 0
+    b_ids = torch.tensor([[0, 1, 2, 3, 4]], device=device)
+    b_mask = torch.tensor([[0, 1, 1, 1, 1]], device=device)
+
+    # C: right-pad a 0
+    c_ids = torch.tensor([[1, 2, 3, 4, 0]], device=device)
+    c_mask = torch.tensor([[1, 1, 1, 1, 0]], device=device)
+
+    # D: same as A but attention_mask=None
+    d_ids = torch.tensor([[1, 2, 3, 4]], device=device)
+    d_mask = None
+
+    # E: same as A but omit attention_mask entirely
+    e_ids = torch.tensor([[1, 2, 3, 4]], device=device)
+
+    with torch.no_grad():
+        out_A = model(input_ids=a_ids, attention_mask=a_mask).logits  # [1,4,H]
+        out_B = model(input_ids=b_ids, attention_mask=b_mask).logits[:, 1:]  # [1,4,H]
+        out_C = model(input_ids=c_ids, attention_mask=c_mask).logits[:, :-1]  # [1,4,H]
+        out_D = model(input_ids=d_ids, attention_mask=d_mask).logits  # [1,4,H]
+        out_E = model(input_ids=e_ids).logits  # [1,4,H]
+
+    return {"A": out_A, "B": out_B, "C": out_C, "D": out_D, "E": out_E}
+
+
+def _assert_invariance(outs: dict, tag: str):
+    ref = outs["A"]
+    for k in ("B", "C", "D", "E"):
+        assert torch.allclose(
+            ref, outs[k], atol=ERROR_THRESHOLD, rtol=ERROR_THRESHOLD
+        ), f"[{tag}] Mismatch A vs {k}"
+
+
+@pytest.mark.parametrize(
+    "repo, attn_impl, human_name",
+    [
+        ("GSAI-ML/LLaDA-8B-Base", None, "LLaDA Base"),
+        ("inclusionAI/LLaDA-MoE-7B-A1B-Base", None, "LLaDA MoE"),
+        ("Dream-org/Dream-v0-Base-7B", None, "Dream Base"),
+        ("radicalnumerics/RND1-Base-0910", None, "RND Base (native)"),
+        ("radicalnumerics/RND1-Base-0910", "sdpa", "RND Base (SDPA)"),
+    ],
+)
+def test_attention_mask_invariance(repo, attn_impl, human_name):
+    """
+    For each model/backend:
+      1) Check padding/mask invariance across A..E on the 'real' tokens.
+      2) Print a ✅ message for debug visibility (pytest still enforces assertions).
+    """
+    model_path = dllm.utils.resolve_with_base_env(repo, "BASE_MODELS_DIR")
+
+    if attn_impl is None:
+        model = transformers.AutoModel.from_pretrained(
+            model_path, dtype=torch.float32, device_map="auto"
+        ).eval()
+    else:
+        config = transformers.AutoConfig.from_pretrained(
+            model_path, attn_implementation=attn_impl
+        )
+        model = transformers.AutoModel.from_pretrained(
+            model_path, config=config, dtype=torch.float32, device_map="auto"
+        ).eval()
+
+    outs = _forward_variants(model)
+    _assert_invariance(outs, human_name)
+
+    print(f"✅ {human_name} attention mask invariance passed within {ERROR_THRESHOLD}.")
+    del model
+    gc.collect()
+    _cuda_cleanup()
+
+
+def test_rnd_native_vs_sdpa_equivalence():
+    """
+    Verify RND (native attention) and RND (SDPA) produce equivalent logits on the
+    same real tokens across A..E variants.
+    """
+    repo = "radicalnumerics/RND1-Base-0910"
+    model_path = dllm.utils.resolve_with_base_env(repo, "BASE_MODELS_DIR")
+
+    # native
+    model_native = transformers.AutoModel.from_pretrained(
+        model_path, dtype=torch.float32, device_map="auto"
+    ).eval()
+
+    # sdpa
+    config_sdpa = transformers.AutoConfig.from_pretrained(
+        model_path, attn_implementation="sdpa"
+    )
+    model_sdpa = transformers.AutoModel.from_pretrained(
+        model_path, config=config_sdpa, dtype=torch.float32, device_map="auto"
+    ).eval()
+
+    outs_native = _forward_variants(model_native)  # expects helper from your file
+    outs_sdpa = _forward_variants(model_sdpa)
+
+    for k in ("A", "B", "C", "D", "E"):
+        assert torch.allclose(
+            outs_native[k], outs_sdpa[k], atol=ERROR_THRESHOLD, rtol=ERROR_THRESHOLD
+        ), f"[RND cross-backend] native vs SDPA mismatch on {k}"
+
+    print(f"✅ RND native vs SDPA equivalence passed within {ERROR_THRESHOLD}.")
+    # Explicitly drop model references
+    del model_native
+    del model_sdpa
+    # Collect Python garbage and release CUDA caches
+    gc.collect()
+    _cuda_cleanup()
--- a/dllm/scripts/tests/test_dream_generation.py
+++ b/dllm/scripts/tests/test_dream_generation.py