# Test environment detection
is_solveit = in_solveit()
print(f"Running in Solveit: {is_solveit}")
assert isinstance(is_solveit, bool)Running in Solveit: False
This module implements the RLM (Recursive Language Model) protocol from rlmpaper using claudette as the LLM backend.
ns: dict parameter (no frame walking)_rlmpaper_compat.py(answer, iterations, ns) for inspectionCheck if running in Solveit environment.
Solveit injects __msg_id into the call stack. This function tests for that.
These functions provide the REPL environment’s llm_query and llm_query_batched capabilities. They require an explicit namespace dict and store results there.
Query a sub-LLM and store the result in namespace.
Args: prompt: The prompt to send to the LLM ns: Namespace dict where result will be stored name: Variable name for storing the result model: Claude model to use
Returns: The LLM’s response (also stored in ns[name])
✓ llm_query signature test passed
Query LLM with multiple prompts concurrently.
NOTE: Currently executes sequentially due to claudette’s synchronous API. True concurrency pending claudette async API support. The async scaffolding is in place for when that becomes available.
Args: prompts: List of prompt strings ns: Namespace dict where results will be stored name: Variable name for storing the list of results model: Claude model to use
Returns: List of LLM responses (also stored in ns[name])
# Test llm_query_batched signature
test_ns = {}
# Note: Commented out to avoid API calls during CI
# prompts = ["Say 'one'", "Say 'two'"]
# result = llm_query_batched(prompts, test_ns, name='batch')
# assert 'batch' in test_ns
# assert len(test_ns['batch']) == 2
print("✓ llm_query_batched signature test passed")✓ llm_query_batched signature test passed
Execute Python code in a namespace and capture stdout/stderr.
Execute code in namespace and return result.
Captures stdout, stderr, and any exceptions. The namespace is mutated with any variables created during execution.
Args: code: Python code to execute ns: Namespace dict for execution
Returns: REPLResult with stdout, stderr, locals snapshot, execution_time
# Test exec_code with explicit namespace
test_ns = {}
result = exec_code("x = 2 + 2\nprint(x)", test_ns)
assert test_ns['x'] == 4
assert '4' in result.stdout
assert result.execution_time > 0
print("✓ exec_code works")
# Test error handling
test_ns = {}
result = exec_code("raise ValueError('test error')", test_ns)
assert 'ValueError: test error' in result.stderr
print("✓ exec_code error handling works")✓ exec_code works
✓ exec_code error handling works
The main RLM iteration loop. Follows the rlmpaper protocol: 1. Build system prompt with metadata 2. Add first-iteration safeguard 3. Execute REPL code blocks 4. Check for FINAL/FINAL_VAR 5. Repeat until answer or max iterations
Run RLM loop until FINAL or max iterations.
This implements the RLM protocol: the root LLM emits repl code blocks which are executed in a namespace with context, llm_query, llm_query_batched, and FINAL_VAR available. The loop continues until the model returns FINAL(…) or FINAL_VAR(…).
Args: query: User’s question to answer context: Context data (str, list of str, or dict) ns: Namespace dict (if None, creates fresh namespace) model: Claude model to use max_iters: Maximum iterations before giving up logger: RLMLogger instance for JSON-lines logging (optional) verbose: Enable Rich console output (default: False)
Returns: (answer, iterations, namespace) tuple where: - answer: Final answer string (or fallback if max_iters reached) - iterations: List of RLMIteration objects - namespace: The dict containing all REPL variables
✓ QueryMetadata works
✓ find_code_blocks works
# Test find_final_answer (from _rlmpaper_compat)
assert find_final_answer("FINAL(42)") == "42"
assert find_final_answer("FINAL(The answer is 42)") == "The answer is 42"
# Test FINAL_VAR
test_ns = {'result': 'hello world'}
assert find_final_answer("FINAL_VAR(result)", ns=test_ns) == "hello world"
# Test no final
assert find_final_answer("Just some text") is None
print("✓ find_final_answer works")✓ find_final_answer works
# Test rlm_run with simple mock scenario
# Note: This doesn't call LLM APIs, just tests the structure
context = ["The capital of France is Paris."]
test_ns = {}
# We can't easily test without API calls, but we can verify the function signature
# and that it sets up the namespace correctly
meta = QueryMetadata(context)
test_ns['context'] = context
test_ns['llm_query'] = partial(llm_query, ns=test_ns, model='claude-sonnet-4-5')
test_ns['llm_query_batched'] = partial(llm_query_batched, ns=test_ns, model='claude-sonnet-4-5')
# Test that FINAL_VAR is now set up as a callable function
def _test_final_var(variable_name: str) -> str:
variable_name = variable_name.strip().strip('"').strip("'")
if variable_name in test_ns:
return str(test_ns[variable_name])
return f"Error: Variable '{variable_name}' not found in namespace"
test_ns['FINAL_VAR'] = _test_final_var
assert 'context' in test_ns
assert 'llm_query' in test_ns
assert 'llm_query_batched' in test_ns
assert 'FINAL_VAR' in test_ns
assert callable(test_ns['llm_query'])
assert callable(test_ns['llm_query_batched'])
assert callable(test_ns['FINAL_VAR'])
# Test FINAL_VAR function behavior
test_ns['my_var'] = 'test value'
assert test_ns['FINAL_VAR']('my_var') == 'test value'
assert 'Error' in test_ns['FINAL_VAR']('nonexistent')
print("✓ rlm_run namespace setup works")
print("✓ FINAL_VAR executable function works")✓ rlm_run namespace setup works
✓ FINAL_VAR executable function works
# Simple usage (requires API key)
context = ["The speed of light is 299,792,458 m/s."]
answer, iterations, ns = rlm_run("What is the speed of light?", context)
print(f"Answer: {answer}")
print(f"Iterations: {len(iterations)}")
print(f"Variables in namespace: {[k for k in ns.keys() if not k.startswith('_')]}")# Persistent namespace across runs
ns = {}
# Define example contexts
context1 = {'prompt': 'Query ontology for X', 'tools': [...]}
context2 = {'prompt': 'Query ontology for Y', 'tools': [...]}
# Run multiple queries, reusing namespace
answer1, iters1, ns = rlm_run("What is X?", context1, ns=ns)
answer2, iters2, ns = rlm_run("What about Y?", context2, ns=ns)
# ns now contains variables from both runs