In [1]:
!pip install transformers==4.52.3
Requirement already satisfied: transformers==4.52.3 in /usr/local/lib/python3.12/dist-packages (4.52.3) Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (3.20.0) Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (0.36.0) Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (2.0.2) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (25.0) Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (6.0.3) Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (2024.11.6) Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (2.32.4) Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (0.21.4) Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (0.6.2) Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.52.3) (4.67.1) Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers==4.52.3) (2025.3.0) Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers==4.52.3) (4.15.0) Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers==4.52.3) (1.2.0) Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (3.4.4) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (3.11) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (2.5.0) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.52.3) (2025.10.5)
In [2]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
In [3]:
import numpy as np
import torch
In [4]:
!pip install lz4
!pip install graphviz
Requirement already satisfied: lz4 in /usr/local/lib/python3.12/dist-packages (4.4.5)
Requirement already satisfied: graphviz in /usr/local/lib/python3.12/dist-packages (0.21)
In [5]:
import torch
import torch.nn as nn
from torch.export import Dim
from transformers import AutoTokenizer, AutoModelForCausalLM
from threadpoolctl import threadpool_limits
from dl_backtrace.pytorch_backtrace import DLBacktrace
import warnings
warnings.filterwarnings("ignore")
# --- Environment Setup ---
os.environ["TORCH_LOGS"] = "+dynamic"
num_cores = os.cpu_count()
if num_cores: # Ensure num_cores is not None
torch.set_num_threads(num_cores)
print(f"PyTorch num_threads set to: {torch.get_num_threads()}") # Verify
else:
print("Could not determine number of CPU cores for PyTorch.")
Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root... Creating extension directory /root/.cache/torch_extensions/py312_cu126/linear_layer_cuda_v3... Detected CUDA files, patching ldflags Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/linear_layer_cuda_v3/build.ninja... Building extension module linear_layer_cuda_v3... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module linear_layer_cuda_v3... Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root... Creating extension directory /root/.cache/torch_extensions/py312_cu126/custom_embedding_layer_cuda_v2... Detected CUDA files, patching ldflags Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/custom_embedding_layer_cuda_v2/build.ninja... Building extension module custom_embedding_layer_cuda_v2... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module custom_embedding_layer_cuda_v2... Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root... Creating extension directory /root/.cache/torch_extensions/py312_cu126/fused_softmax... Detected CUDA files, patching ldflags Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/fused_softmax/build.ninja... Building extension module fused_softmax... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module fused_softmax... Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root... Creating extension directory /root/.cache/torch_extensions/py312_cu126/fused_stabilize_normalize... Detected CUDA files, patching ldflags Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/fused_stabilize_normalize/build.ninja... Building extension module fused_stabilize_normalize... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module fused_stabilize_normalize... Using /root/.cache/torch_extensions/py312_cu126 as PyTorch extensions root... Creating extension directory /root/.cache/torch_extensions/py312_cu126/fused_conservation... Detected CUDA files, patching ldflags Emitting ninja build file /root/.cache/torch_extensions/py312_cu126/fused_conservation/build.ninja... Building extension module fused_conservation... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
PyTorch num_threads set to: 12
Loading extension module fused_conservation...
In [6]:
# --- Model and Tokenizer Setup ---
model_id = "Qwen/Qwen3-0.6B" #"Qwen/Qwen3-4B"
auth_token = "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"
class QwenWrapper(nn.Module):
def __init__(self, model_id, token):
super().__init__()
self.model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float32,
token=token
).eval()
def forward(self, input_ids, attention_mask):
return self.model(input_ids=input_ids, attention_mask=attention_mask).logits
model = QwenWrapper(model_id, auth_token)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
tokenizer.pad_token = tokenizer.eos_token
In [7]:
# --- Input Sentences ---
sentences = ["What is the capital of France?",]
# "Hello"] #,
# "What's 2+2 ?"]
tokens = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True)
input_ids = tokens["input_ids"]
attention_mask = tokens["attention_mask"]
# --- Dynamic Shapes ---
if len(sentences) > 1:
batch_dim = Dim("batch", min=1, max=len(sentences))
else:
batch_dim = 1 # Static dimension
seq_dim = Dim("seq", min=1, max=input_ids.shape[1])
dynamic_shapes = {
"input_ids": {0: batch_dim, 1: seq_dim},
"attention_mask": {0: batch_dim, 1: seq_dim},
}
In [7]:
In [8]:
# --- Create an instance of DLBacktrace ---
ir = DLBacktrace(
model,
(input_ids, attention_mask),
dynamic_shapes=dynamic_shapes,
device='cuda', # 'cpu',
verbose=False
)
Setting up DL-Bactrace
✅ Model exported deterministically
In [8]:
Single Token¶
In [9]:
out = ir.sample_auto(
tokenizer, input_ids, attention_mask,
temperature=None, top_k=None, top_p=None,
max_new_tokens=1, min_new_tokens=None,
eos_token_id=tokenizer.eos_token_id,
early_stopping=True,
return_scores=True,
return_relevance=True,
return_layerwise_output=True,
)
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
2025-11-04 11:49:41 - dlbacktrace.execution_engine - INFO - ExecutionEngineNoCache initialized with debug=False, log_level=INFO
2025-11-04 11:49:41 - dlbacktrace.execution_engine - INFO - Starting execution with 2 inputs
2025-11-04 11:49:41 - dlbacktrace.execution_engine - INFO - Executing `run_execution_nocache` ...!
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_13` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_16` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_17` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_20` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_21` produced extreme values → max: 4.20e+07
2025-11-04 11:49:43 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_24` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_25` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_28` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_29` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_32` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_33` produced extreme values → max: 4.20e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_36` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_37` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_40` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_41` produced extreme values → max: 4.19e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_44` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_45` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_48` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_49` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_52` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_53` produced extreme values → max: 4.18e+07
2025-11-04 11:49:44 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_56` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_57` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_60` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_61` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_64` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_65` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_68` produced extreme values → max: 4.16e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_69` produced extreme values → max: 4.16e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_72` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_73` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_76` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_77` produced extreme values → max: 4.17e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_80` produced extreme values → max: 4.18e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_81` produced extreme values → max: 4.18e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_84` produced extreme values → max: 4.19e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_85` produced extreme values → max: 4.19e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_88` produced extreme values → max: 4.20e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_89` produced extreme values → max: 4.20e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_92` produced extreme values → max: 4.21e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_93` produced extreme values → max: 4.21e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_96` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_97` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_100` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_101` produced extreme values → max: 4.22e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_104` produced extreme values → max: 4.23e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_105` produced extreme values → max: 4.23e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_108` produced extreme values → max: 4.21e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_109` produced extreme values → max: 3.92e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_112` produced extreme values → max: 3.92e+07
2025-11-04 11:49:45 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_113` produced extreme values → max: 1.72e+06
======arg.shape===== (1, 7, 151936) next_token_logit: (1, 151936) predicted_token (from target_indices): [[576]] batch: 0, token: [576] target_relevance --- value: 1.0000, shape: (1, 7, 151936)
Backtracing: 0%| | 0/2877 [00:00<?, ?it/s]
Backtracing: 0%| | 2/2877 [00:00<03:43, 12.85it/s]
Backtracing: 1%| | 33/2877 [00:00<00:30, 93.16it/s]
Backtracing: 9%|▉ | 255/2877 [00:00<00:03, 710.36it/s]
Backtracing: 15%|█▌ | 435/2877 [00:00<00:02, 1025.82it/s]
Backtracing: 21%|██▏ | 615/2877 [00:00<00:01, 1252.19it/s]
Backtracing: 28%|██▊ | 815/2877 [00:00<00:01, 1473.01it/s]
Backtracing: 35%|███▌ | 1013/2877 [00:00<00:01, 1620.48it/s]
Backtracing: 42%|████▏ | 1195/2877 [00:01<00:01, 1655.18it/s]
Backtracing: 48%|████▊ | 1377/2877 [00:01<00:00, 1700.16it/s]
Backtracing: 56%|█████▌ | 1600/2877 [00:01<00:00, 1843.61it/s]
Backtracing: 62%|██████▏ | 1790/2877 [00:01<00:00, 1808.14it/s]
Backtracing: 69%|██████▉ | 1991/2877 [00:01<00:00, 1866.86it/s]
Backtracing: 76%|███████▌ | 2189/2877 [00:01<00:00, 1891.46it/s]
Backtracing: 88%|████████▊ | 2519/2877 [00:01<00:00, 2304.35it/s]
Backtracing: 96%|█████████▌| 2752/2877 [00:01<00:00, 1660.52it/s]
Backtracing: 100%|██████████| 2877/2877 [00:01<00:00, 1553.75it/s]
In [10]:
generated, info = out
print(f"Input prompt: {tokenizer.batch_decode(input_ids, skip_special_tokens=True)}")
print(f"Prompt after Generated Tokens: {tokenizer.batch_decode(generated, skip_special_tokens=True)}")
Input prompt: ['What is the capital of France?'] Prompt after Generated Tokens: ['What is the capital of France? The']
In [11]:
timewise_relevance_out = info['relevance_trace']
len(timewise_relevance_out)
Out[11]:
1
In [12]:
timewise_layerwise_output = info['layerwise_output_trace']
len(timewise_layerwise_output)
Out[12]:
1
In [13]:
# Token-wise Relevance Map
ir.visualize_tokenwise_relevance_map(timewise_relevance_out, input_ids, tokenizer, generated_ids=generated)
In [14]:
# Input Heatmap for n-th Token
ir.visualize_input_heatmap_for_token(timewise_relevance_out, n=0, input_ids=input_ids, tokenizer=tokenizer, generated_ids=generated)
In [15]:
print("\n===== Visualizing DL-Backtrace =====")
ir.visualize_dlbacktrace(output_path="qwen3_single_token", engine_auto_threshold=2500)
===== Visualizing DL-Backtrace ===== num_nodes: 2877 big graph → collapsing it ... Calculate relevance using `visualize_relevance_fast(...)`
✅ Fast graph saved → backtrace_collapsed_fast.svg (nodes=1313, engine=dot)
In [15]:
In [15]:
Multi-Token¶
1. Greedy Approach¶
In [16]:
# Greedy (no knobs) + stop on EOS as soon as allowed by min_new_tokens
out1 = ir.sample_auto(
tokenizer, input_ids, attention_mask,
temperature=None, top_k=None, top_p=None,
max_new_tokens=1, min_new_tokens=None,
eos_token_id=tokenizer.eos_token_id,
early_stopping=True,
return_scores=True,
return_relevance=True,
return_layerwise_output=True,
)
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
2025-11-04 11:53:33 - dlbacktrace.execution_engine - INFO - ExecutionEngineNoCache initialized with debug=False, log_level=INFO
2025-11-04 11:53:33 - dlbacktrace.execution_engine - INFO - Starting execution with 2 inputs
2025-11-04 11:53:33 - dlbacktrace.execution_engine - INFO - Executing `run_execution_nocache` ...!
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_13` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_16` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_17` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_20` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_21` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_24` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_25` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_28` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_29` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_32` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_33` produced extreme values → max: 4.20e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_36` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_37` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_40` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_41` produced extreme values → max: 4.19e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_44` produced extreme values → max: 4.18e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_45` produced extreme values → max: 4.18e+07
2025-11-04 11:53:35 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_48` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_49` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_52` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_53` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_56` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_57` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_60` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_61` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_64` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_65` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_68` produced extreme values → max: 4.16e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_69` produced extreme values → max: 4.16e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_72` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_73` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_76` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_77` produced extreme values → max: 4.17e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_80` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_81` produced extreme values → max: 4.18e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_84` produced extreme values → max: 4.19e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_85` produced extreme values → max: 4.19e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_88` produced extreme values → max: 4.20e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_89` produced extreme values → max: 4.20e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_92` produced extreme values → max: 4.21e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_93` produced extreme values → max: 4.21e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_96` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_97` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_100` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_101` produced extreme values → max: 4.22e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_104` produced extreme values → max: 4.23e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_105` produced extreme values → max: 4.23e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_108` produced extreme values → max: 4.21e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_109` produced extreme values → max: 3.92e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_112` produced extreme values → max: 3.92e+07
2025-11-04 11:53:36 - dlbacktrace.execution_engine - WARNING - [WARNING:EXTREME] Node `pow_113` produced extreme values → max: 1.72e+06
======arg.shape===== (1, 7, 151936) next_token_logit: (1, 151936) predicted_token (from target_indices): [[576]] batch: 0, token: [576] target_relevance --- value: 1.0000, shape: (1, 7, 151936)
Backtracing: 0%| | 0/2877 [00:00<?, ?it/s]
Backtracing: 0%| | 2/2877 [00:00<03:41, 12.98it/s]
Backtracing: 6%|▋ | 186/2877 [00:00<00:03, 876.03it/s]
Backtracing: 13%|█▎ | 363/2877 [00:00<00:02, 1235.59it/s]
Backtracing: 19%|█▉ | 552/2877 [00:00<00:01, 1472.82it/s]
Backtracing: 26%|██▋ | 760/2877 [00:00<00:01, 1675.20it/s]
Backtracing: 33%|███▎ | 942/2877 [00:00<00:01, 1702.93it/s]
Backtracing: 39%|███▉ | 1125/2877 [00:00<00:01, 1735.76it/s]
Backtracing: 47%|████▋ | 1348/2877 [00:00<00:00, 1884.57it/s]
Backtracing: 54%|█████▎ | 1540/2877 [00:00<00:00, 1847.35it/s]
Backtracing: 61%|██████ | 1757/2877 [00:01<00:00, 1943.48it/s]
Backtracing: 68%|██████▊ | 1954/2877 [00:01<00:00, 1870.29it/s]
Backtracing: 75%|███████▍ | 2148/2877 [00:01<00:00, 1890.02it/s]
Backtracing: 85%|████████▍ | 2433/2877 [00:01<00:00, 2171.36it/s]
Backtracing: 92%|█████████▏| 2652/2877 [00:01<00:00, 1514.90it/s]
Backtracing: 100%|██████████| 2877/2877 [00:01<00:00, 1756.58it/s]
In [17]:
generated1, info1 = out1
print(f"Input prompt: {tokenizer.batch_decode(input_ids, skip_special_tokens=True)}")
print(f"Prompt after Generated Tokens: {tokenizer.batch_decode(generated1, skip_special_tokens=True)}")
Input prompt: ['What is the capital of France?'] Prompt after Generated Tokens: ['What is the capital of France? The']
In [18]:
info1.keys()
Out[18]:
dict_keys(['scores_trace', 'relevance_trace', 'layerwise_output_trace'])
In [19]:
timewise_relevance_out1 = info1['relevance_trace']
len(timewise_relevance_out1)
Out[19]:
1
In [20]:
timewise_layerwise_output1 = info1['layerwise_output_trace']
len(timewise_layerwise_output1)
Out[20]:
1
In [21]:
# Token-wise Relevance Map
ir.visualize_tokenwise_relevance_map(timewise_relevance_out1, input_ids, tokenizer, generated_ids=generated1)
In [22]:
# Input Heatmap for n-th Token
ir.visualize_input_heatmap_for_token(timewise_relevance_out1, n=0, input_ids=input_ids, tokenizer=tokenizer, generated_ids=generated1)
In [23]:
print("\n===== Visualizing DL-Backtrace =====")
ir.visualize_dlbacktrace(output_path="qwen3_greedy", engine_auto_threshold=2500)
===== Visualizing DL-Backtrace ===== num_nodes: 2877 big graph → collapsing it ... Calculate relevance using `visualize_relevance_fast(...)`