In [1]:
!pip install transformers==4.52.3
Requirement already satisfied: transformers==4.52.3 in /usr/local/lib/python3.12/dist-packages (4.52.3)
Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (3.20.0)
Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (0.36.0)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (2.0.2)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (25.0)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (6.0.3)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (2024.11.6)
Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (2.32.4)
Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (0.21.4)
Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (0.6.2)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (4.67.1)
Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers==4.52.3) (2025.3.0)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers==4.52.3) (4.15.0)
Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers==4.52.3) (1.2.0)
Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (3.4.4)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (3.11)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (2.5.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (2025.10.5)
In [2]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
In [3]:
import numpy as np
import torch
In [4]:
!pip install lz4
!pip install graphviz
Requirement already satisfied: lz4 in /usr/local/lib/python3.12/dist-packages (4.4.5)
Requirement already satisfied: graphviz in /usr/local/lib/python3.12/dist-packages (0.21)
In [5]:
import torch
import torch.nn as nn
from torch.export import Dim
from transformers import AutoTokenizer, AutoModelForCausalLM
from threadpoolctl import threadpool_limits
from dl_backtrace.pytorch_backtrace import DLBacktrace

import warnings
warnings.filterwarnings("ignore")

# --- Environment Setup ---
os.environ["TORCH_LOGS"] = "+dynamic"
num_cores = os.cpu_count()
if num_cores: # Ensure num_cores is not None
    torch.set_num_threads(num_cores)
    print(f"PyTorch num_threads set to: {torch.get_num_threads()}") # Verify
else:
    print("Could not determine number of CPU cores for PyTorch.")
Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root...
Creating extension directory /root/.cache/torch_extensions/py312_cu126/linear_layer_cuda_v3...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/linear_layer_cuda_v3/build.ninja...
Building extension module linear_layer_cuda_v3...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module linear_layer_cuda_v3...
Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root...
Creating extension directory /root/.cache/torch_extensions/py312_cu126/custom_embedding_layer_cuda_v2...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/custom_embedding_layer_cuda_v2/build.ninja...
Building extension module custom_embedding_layer_cuda_v2...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module custom_embedding_layer_cuda_v2...
Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root...
Creating extension directory /root/.cache/torch_extensions/py312_cu126/fused_softmax...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/fused_softmax/build.ninja...
Building extension module fused_softmax...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module fused_softmax...
Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root...
Creating extension directory /root/.cache/torch_extensions/py312_cu126/fused_stabilize_normalize...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/fused_stabilize_normalize/build.ninja...
Building extension module fused_stabilize_normalize...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module fused_stabilize_normalize...
Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root...
Creating extension directory /root/.cache/torch_extensions/py312_cu126/fused_conservation...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/fused_conservation/build.ninja...
Building extension module fused_conservation...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
PyTorch num_threads set to: 12
Loading extension module fused_conservation...
In [6]:
# --- Model and Tokenizer Setup ---
model_id = "Qwen/Qwen3-0.6B"    #"Qwen/Qwen3-4B"
auth_token = "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"

class QwenWrapper(nn.Module):
    def __init__(self, model_id, token):
        super().__init__()
        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.float32,
            token=token
        ).eval()

    def forward(self, input_ids, attention_mask):
        return self.model(input_ids=input_ids, attention_mask=attention_mask).logits

model = QwenWrapper(model_id, auth_token)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
tokenizer.pad_token = tokenizer.eos_token
In [7]:
# --- Input Sentences ---
sentences = ["What is the capital of France?",]
    # "Hello"]  #,
    # "What's 2+2 ?"]

tokens = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True)
input_ids = tokens["input_ids"]
attention_mask = tokens["attention_mask"]

# --- Dynamic Shapes ---
if len(sentences) > 1:
    batch_dim = Dim("batch", min=1, max=len(sentences))
else:
    batch_dim = 1  # Static dimension

seq_dim = Dim("seq", min=1, max=input_ids.shape[1])
dynamic_shapes = {
    "input_ids": {0: batch_dim, 1: seq_dim},
    "attention_mask": {0: batch_dim, 1: seq_dim},
}
In [7]:
 
In [8]:
# --- Create an instance of DLBacktrace ---
ir = DLBacktrace(
    model,
    (input_ids, attention_mask),
    dynamic_shapes=dynamic_shapes,
    device='cuda',    # 'cpu',
    verbose=False
)
Setting up DL-Bactrace
✅ Model exported deterministically
In [8]:
 

Single Token¶

In [9]:
out = ir.sample_auto(
    tokenizer, input_ids, attention_mask,
    temperature=None, top_k=None, top_p=None,
    max_new_tokens=1, min_new_tokens=None,
    eos_token_id=tokenizer.eos_token_id,
    early_stopping=True,
    return_scores=True,
    return_relevance=True,
    return_layerwise_output=True,
)
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
2025-11-04 11:49:41 - dlbacktrace.execution_engine - INFO - ExecutionEngineNoCache initialized with debug=False, log_level=INFO
2025-11-04 11:49:41 - dlbacktrace.execution_engine - INFO - Starting execution with 2 inputs
2025-11-04 11:49:41 - dlbacktrace.execution_engine - INFO - Executing `run_execution_nocache` ...!
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_13` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_16` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_17` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_20` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_21` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_24` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_25` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_28` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_29` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_32` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_33` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_36` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_37` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_40` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_41` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_44` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_45` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_48` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_49` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_52` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_53` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_56` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_57` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_60` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_61` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_64` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_65` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_68` produced extreme values → max: 4.16e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_69` produced extreme values → max: 4.16e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_72` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_73` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_76` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_77` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_80` produced extreme values → max: 4.18e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_81` produced extreme values → max: 4.18e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_84` produced extreme values → max: 4.19e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_85` produced extreme values → max: 4.19e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_88` produced extreme values → max: 4.20e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_89` produced extreme values → max: 4.20e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_92` produced extreme values → max: 4.21e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_93` produced extreme values → max: 4.21e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_96` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_97` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_100` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_101` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_104` produced extreme values → max: 4.23e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_105` produced extreme values → max: 4.23e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_108` produced extreme values → max: 4.21e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_109` produced extreme values → max: 3.92e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_112` produced extreme values → max: 3.92e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_113` produced extreme values → max: 1.72e+06
======arg.shape===== (1, 7, 151936)
next_token_logit: (1, 151936)
predicted_token (from target_indices): [[576]]
batch: 0, token: [576]
target_relevance --- value: 1.0000, shape: (1, 7, 151936)
Backtracing:   0%|          | 0/2877 [00:00<?, ?it/s]
Backtracing:   0%|          | 2/2877 [00:00<03:43, 12.85it/s]
Backtracing:   1%|          | 33/2877 [00:00<00:30, 93.16it/s]
Backtracing:   9%|▉         | 255/2877 [00:00<00:03, 710.36it/s]
Backtracing:  15%|█▌        | 435/2877 [00:00<00:02, 1025.82it/s]
Backtracing:  21%|██▏       | 615/2877 [00:00<00:01, 1252.19it/s]
Backtracing:  28%|██▊       | 815/2877 [00:00<00:01, 1473.01it/s]
Backtracing:  35%|███▌      | 1013/2877 [00:00<00:01, 1620.48it/s]
Backtracing:  42%|████▏     | 1195/2877 [00:01<00:01, 1655.18it/s]
Backtracing:  48%|████▊     | 1377/2877 [00:01<00:00, 1700.16it/s]
Backtracing:  56%|█████▌    | 1600/2877 [00:01<00:00, 1843.61it/s]
Backtracing:  62%|██████▏   | 1790/2877 [00:01<00:00, 1808.14it/s]
Backtracing:  69%|██████▉   | 1991/2877 [00:01<00:00, 1866.86it/s]
Backtracing:  76%|███████▌  | 2189/2877 [00:01<00:00, 1891.46it/s]
Backtracing:  88%|████████▊ | 2519/2877 [00:01<00:00, 2304.35it/s]
Backtracing:  96%|█████████▌| 2752/2877 [00:01<00:00, 1660.52it/s]
Backtracing: 100%|██████████| 2877/2877 [00:01<00:00, 1553.75it/s]

In [10]:
generated, info = out

print(f"Input prompt: {tokenizer.batch_decode(input_ids, skip_special_tokens=True)}")
print(f"Prompt after Generated Tokens: {tokenizer.batch_decode(generated, skip_special_tokens=True)}")
Input prompt: ['What is the capital of France?']
Prompt after Generated Tokens: ['What is the capital of France? The']
In [11]:
timewise_relevance_out = info['relevance_trace']
len(timewise_relevance_out)
Out[11]:
1
In [12]:
timewise_layerwise_output = info['layerwise_output_trace']
len(timewise_layerwise_output)
Out[12]:
1
In [13]:
# Token-wise Relevance Map
ir.visualize_tokenwise_relevance_map(timewise_relevance_out, input_ids, tokenizer, generated_ids=generated)
No description has been provided for this image
In [14]:
# Input Heatmap for n-th Token
ir.visualize_input_heatmap_for_token(timewise_relevance_out, n=0, input_ids=input_ids, tokenizer=tokenizer, generated_ids=generated)
No description has been provided for this image
In [15]:
print("\n===== Visualizing DL-Backtrace =====")
ir.visualize_dlbacktrace(output_path="qwen3_single_token", engine_auto_threshold=2500)
===== Visualizing DL-Backtrace =====
num_nodes: 2877
big graph → collapsing it ...
Calculate relevance using `visualize_relevance_fast(...)`
No description has been provided for this image
✅ Fast graph saved → backtrace_collapsed_fast.svg (nodes=1313, engine=dot)
In [15]:
 
In [15]:
 

Multi-Token¶

1. Greedy Approach¶
In [16]:
# Greedy (no knobs) + stop on EOS as soon as allowed by min_new_tokens
out1 = ir.sample_auto(
    tokenizer, input_ids, attention_mask,
    temperature=None, top_k=None, top_p=None,
    max_new_tokens=1, min_new_tokens=None,
    eos_token_id=tokenizer.eos_token_id,
    early_stopping=True,
    return_scores=True,
    return_relevance=True,
    return_layerwise_output=True,
)
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
2025-11-04 11:53:33 - dlbacktrace.execution_engine - INFO - ExecutionEngineNoCache initialized with debug=False, log_level=INFO
2025-11-04 11:53:33 - dlbacktrace.execution_engine - INFO - Starting execution with 2 inputs
2025-11-04 11:53:33 - dlbacktrace.execution_engine - INFO - Executing `run_execution_nocache` ...!
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_13` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_16` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_17` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_20` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_21` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_24` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_25` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_28` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_29` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_32` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_33` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_36` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_37` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_40` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_41` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_44` produced extreme values → max: 4.18e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_45` produced extreme values → max: 4.18e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_48` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_49` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_52` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_53` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_56` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_57` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_60` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_61` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_64` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_65` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_68` produced extreme values → max: 4.16e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_69` produced extreme values → max: 4.16e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_72` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_73` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_76` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_77` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_80` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_81` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_84` produced extreme values → max: 4.19e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_85` produced extreme values → max: 4.19e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_88` produced extreme values → max: 4.20e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_89` produced extreme values → max: 4.20e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_92` produced extreme values → max: 4.21e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_93` produced extreme values → max: 4.21e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_96` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_97` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_100` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_101` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_104` produced extreme values → max: 4.23e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_105` produced extreme values → max: 4.23e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_108` produced extreme values → max: 4.21e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_109` produced extreme values → max: 3.92e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_112` produced extreme values → max: 3.92e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_113` produced extreme values → max: 1.72e+06
======arg.shape===== (1, 7, 151936)
next_token_logit: (1, 151936)
predicted_token (from target_indices): [[576]]
batch: 0, token: [576]
target_relevance --- value: 1.0000, shape: (1, 7, 151936)
Backtracing:   0%|          | 0/2877 [00:00<?, ?it/s]
Backtracing:   0%|          | 2/2877 [00:00<03:41, 12.98it/s]
Backtracing:   6%|▋         | 186/2877 [00:00<00:03, 876.03it/s]
Backtracing:  13%|█▎        | 363/2877 [00:00<00:02, 1235.59it/s]
Backtracing:  19%|█▉        | 552/2877 [00:00<00:01, 1472.82it/s]
Backtracing:  26%|██▋       | 760/2877 [00:00<00:01, 1675.20it/s]
Backtracing:  33%|███▎      | 942/2877 [00:00<00:01, 1702.93it/s]
Backtracing:  39%|███▉      | 1125/2877 [00:00<00:01, 1735.76it/s]
Backtracing:  47%|████▋     | 1348/2877 [00:00<00:00, 1884.57it/s]
Backtracing:  54%|█████▎    | 1540/2877 [00:00<00:00, 1847.35it/s]
Backtracing:  61%|██████    | 1757/2877 [00:01<00:00, 1943.48it/s]
Backtracing:  68%|██████▊   | 1954/2877 [00:01<00:00, 1870.29it/s]
Backtracing:  75%|███████▍  | 2148/2877 [00:01<00:00, 1890.02it/s]
Backtracing:  85%|████████▍ | 2433/2877 [00:01<00:00, 2171.36it/s]
Backtracing:  92%|█████████▏| 2652/2877 [00:01<00:00, 1514.90it/s]
Backtracing: 100%|██████████| 2877/2877 [00:01<00:00, 1756.58it/s]

In [17]:
generated1, info1 = out1

print(f"Input prompt: {tokenizer.batch_decode(input_ids, skip_special_tokens=True)}")
print(f"Prompt after Generated Tokens: {tokenizer.batch_decode(generated1, skip_special_tokens=True)}")
Input prompt: ['What is the capital of France?']
Prompt after Generated Tokens: ['What is the capital of France? The']
In [18]:
info1.keys()
Out[18]:
dict_keys(['scores_trace', 'relevance_trace', 'layerwise_output_trace'])
In [19]:
timewise_relevance_out1 = info1['relevance_trace']
len(timewise_relevance_out1)
Out[19]:
1
In [20]:
timewise_layerwise_output1 = info1['layerwise_output_trace']
len(timewise_layerwise_output1)
Out[20]:
1
In [21]:
# Token-wise Relevance Map
ir.visualize_tokenwise_relevance_map(timewise_relevance_out1, input_ids, tokenizer, generated_ids=generated1)
No description has been provided for this image
In [22]:
# Input Heatmap for n-th Token
ir.visualize_input_heatmap_for_token(timewise_relevance_out1, n=0, input_ids=input_ids, tokenizer=tokenizer, generated_ids=generated1)
No description has been provided for this image
In [23]:
print("\n===== Visualizing DL-Backtrace =====")
ir.visualize_dlbacktrace(output_path="qwen3_greedy", engine_auto_threshold=2500)
===== Visualizing DL-Backtrace =====
num_nodes: 2877
big graph → collapsing it ...
Calculate relevance using `visualize_relevance_fast(...)`