Testing RLM Memory Step-by-Step

Incremental testing of dataset memory with RLM using solveit approach

Step 1: Import and Setup

First, let’s import what we need and verify the modules load.

from rlm.dataset import setup_dataset_context
from rlm.core import rlm_run
from rlm.ontology import setup_ontology_context

print("✓ Imports successful")

Step 2: Basic Dataset Setup

Create a dataset and verify it initializes correctly.

# Create namespace
ns = {}

# Setup dataset
result = setup_dataset_context(ns)
print(result)

# Verify namespace has what we expect
assert 'ds' in ns, "Should have dataset"
assert 'ds_meta' in ns, "Should have metadata"
assert 'mem_add' in ns, "Should have mem_add function"
assert 'mem_query' in ns, "Should have mem_query function"

print(f"✓ Dataset created with session_id: {ns['ds_meta'].session_id}")

Step 3: Test Memory Operations Directly

Before using with RLM, verify memory operations work.

# Add a fact
result = ns['mem_add']('http://ex.org/alice', 'http://ex.org/age', '30',
                       source='test', reason='Testing mem_add')
print(result)

# Verify it was stored
ds_meta = ns['ds_meta']
assert len(ds_meta.mem) == 1, "Should have 1 triple in memory"
print(f"✓ Memory has {len(ds_meta.mem)} triple(s)")

# Verify provenance was recorded
assert len(ds_meta.prov) > 0, "Should have provenance events"
print(f"✓ Provenance has {len(ds_meta.prov)} event(s)")

# Test query
results = ns['mem_query']('SELECT ?s ?age WHERE { ?s <http://ex.org/age> ?age }')
print(f"Query results: {results}")

assert len(results) == 1, "Should have 1 result"
assert results[0]['age'] == '30', "Age should be 30"
print("✓ Query works correctly")

# Test describe
desc = ns['mem_describe']('http://ex.org/alice')
print(f"Description: {desc}")

assert len(desc['as_subject']) == 1, "Should have 1 triple where alice is subject"
print("✓ Describe works correctly")

Step 4: Test RLM with Simple Memory Task

Now let’s see if RLM can use memory operations.

# Create fresh namespace for RLM test
ns_rlm = {}
setup_dataset_context(ns_rlm)

print(f"Starting with {len(ns_rlm['ds_meta'].mem)} triples in memory")

# Give RLM a simple memory task
context = """
You have access to these memory operations:

```python
# Add a fact to memory
mem_add(subject_uri, predicate_uri, object_value, source='agent', reason='...')

# Query memory with SPARQL
results = mem_query('SELECT ?s ?p ?o WHERE { ?s ?p ?o }', limit=100)
```

Store facts as you discover them.
"""

query = """
Please store this fact in memory:
- Subject: http://example.org/alice
- Predicate: http://example.org/age  
- Object: 30

Then query memory to verify it was stored and tell me what you found.
"""

print("Running RLM...")
print("="*60)

# Run RLM (requires API key)
answer, iterations, ns_rlm = rlm_run(
    query=query,
    context=context,
    ns=ns_rlm,
    max_iters=5
)

print("="*60)
print(f"Answer: {answer}")
print("="*60)

# Verify RLM used memory
print(f"\nMemory now has: {len(ns_rlm['ds_meta'].mem)} triples")
print(f"Provenance events: {len(ns_rlm['ds_meta'].prov)}")
print(f"Iterations used: {len(iterations)}")

# Show what was stored
print("\nMemory contents:")
for s, p, o in ns_rlm['ds_meta'].mem.triples((None, None, None)):
    print(f"  {s}")
    print(f"    {p}")
    print(f"      {o}")

# Check that RLM actually called memory operations
print("\nCode blocks executed:")
for i, iteration in enumerate(iterations):
    print(f"\n--- Iteration {i} ---")
    for j, block in enumerate(iteration.code_blocks):
        print(f"\nCode block {j}:")
        print(block.code)
        
        # Check if it used memory operations
        if 'mem_add' in block.code:
            print("  ✓ Used mem_add")
        if 'mem_query' in block.code:
            print("  ✓ Used mem_query")
        
        # Show result
        if block.result.stdout:
            print(f"  Output: {block.result.stdout[:200]}")

Step 5: Test Memory Persistence Across Runs

Verify that memory persists when reusing the same namespace.

# Remember how much memory we had
mem_before = len(ns_rlm['ds_meta'].mem)
print(f"Memory before second run: {mem_before} triples")

# Run again with same namespace
query2 = "Query memory to find Alice's age. What did you find?"

answer2, iterations2, ns_rlm = rlm_run(
    query=query2,
    context=context,
    ns=ns_rlm,  # Reuse same namespace!
    max_iters=5
)

print(f"\nAnswer: {answer2}")
print(f"Memory after second run: {len(ns_rlm['ds_meta'].mem)} triples")
print(f"Session ID (same?): {ns_rlm['ds_meta'].session_id}")

assert len(ns_rlm['ds_meta'].mem) >= mem_before, "Memory should persist"
print("✓ Memory persisted across runs")

Step 6: Test Provenance Inspection

Look at what provenance was recorded.

from rdflib import RDF, Namespace

RLM_PROV = Namespace('urn:rlm:prov:')
ds_meta = ns_rlm['ds_meta']

print(f"Total provenance events: {len(ds_meta.prov)}")
print("\nProvenance details:")

# Show each event
for event in list(ds_meta.prov.subjects(RDF.type, None))[:5]:  # First 5
    print(f"\nEvent: {event}")
    
    # Get all properties of this event
    for p, o in ds_meta.prov.predicate_objects(event):
        pred_name = str(p).split('/')[-1]
        print(f"  {pred_name}: {o}")

Step 7: Test with Ontology Integration

Now test RLM using both ontology and memory together.

# Check if ontology exists
from pathlib import Path

ont_path = Path('ontology/prov.ttl')
if ont_path.exists():
    print(f"✓ Found ontology at {ont_path}")
else:
    print(f"⚠ Ontology not found at {ont_path}")
    print("Will skip ontology integration test")

# Setup namespace with both dataset and ontology
if ont_path.exists():
    ns_ont = {}
    setup_dataset_context(ns_ont)
    setup_ontology_context(str(ont_path), ns_ont, name='prov')
    
    print(f"Dataset ready: {len(ns_ont['ds_meta'].mem)} triples in memory")
    print(f"Ontology loaded: {len(ns_ont['prov_meta'].classes)} classes")
    print(f"Ontology properties: {len(ns_ont['prov_meta'].properties)}")

if ont_path.exists():
    context_ont = """
You have access to:

1. PROV Ontology (in prov_meta):
   - prov_meta.classes - list of class URIs
   - prov_meta.labels - dict of URI -> label
   - search_by_label(text) - find entities by label
   - describe_entity(uri) - get entity details

2. Memory operations:
   - mem_add(subject, predicate, object, source='agent', reason='...')
   - mem_query(sparql)

Use the ontology to understand concepts, then store notes in memory.
"""
    
    query_ont = """
Find the Activity class in the PROV ontology.
Get its label and comment.
Store a note about it in memory using:
  subject: the Activity URI
  predicate: http://example.org/myNote
  object: your summary
Then query memory to verify.
"""
    
    print("Running RLM with ontology + memory...")
    print("="*60)

if ont_path.exists():
    answer_ont, iters_ont, ns_ont = rlm_run(
        query=query_ont,
        context=context_ont,
        ns=ns_ont,
        max_iters=8
    )
    
    print("="*60)
    print(f"Answer: {answer_ont}")
    print("="*60)
    
    print(f"\nMemory: {len(ns_ont['ds_meta'].mem)} triples")
    print(f"Iterations: {len(iters_ont)}")
    
    # Show what was stored
    print("\nMemory contents:")
    for s, p, o in ns_ont['ds_meta'].mem.triples((None, None, None)):
        print(f"  Subject: {s}")
        print(f"  Predicate: {p}")
        print(f"  Object: {o[:100]}...")  # Truncate long objects

Step 8: Test Work Graphs

Test the scratch graph workflow.

# Create fresh namespace
ns_work = {}
setup_dataset_context(ns_work)

# Test work graph operations directly
uri, graph = ns_work['work_create']('test_task')
print(f"Created work graph: {uri}")

# Add something to it
from rdflib import URIRef, Literal
graph.add((URIRef('http://ex.org/temp'), URIRef('http://ex.org/value'), Literal('42')))
print(f"Work graph has {len(graph)} triples")

# Promote to mem
result = ns_work['work_to_mem']('test_task', reason='Test promotion')
print(result)
print(f"Memory now has {len(ns_work['ds_meta'].mem)} triples")

# Cleanup
result = ns_work['work_cleanup'](task_id='test_task')
print(result)
print(f"Work graphs remaining: {len(ns_work['ds_meta'].work_graphs)}")

print("✓ Work graph workflow works")

Step 9: Test Bounded Views

Test the inspection functions.

# Using namespace from earlier tests
if 'ns_rlm' in locals() and len(ns_rlm['ds_meta'].mem) > 0:
    test_ns = ns_rlm
else:
    # Create test data
    test_ns = {}
    setup_dataset_context(test_ns)
    test_ns['mem_add']('http://ex.org/alice', 'http://ex.org/age', '30')
    test_ns['mem_add']('http://ex.org/bob', 'http://ex.org/age', '25')

# Test dataset_stats
stats = test_ns['dataset_stats']()
print("Dataset Stats:")
print(stats)
print()

# Test list_graphs
graphs = test_ns['list_graphs']()
print(f"All graphs ({len(graphs)}):")
for uri, count in graphs:
    print(f"  {uri}: {count} triples")

# Test graph_sample
mem_uri = f"urn:rlm:{test_ns['ds_meta'].name}:mem"
sample = test_ns['graph_sample'](mem_uri, limit=5)
print(f"\nMemory sample ({len(sample)} triples):")
for s, p, o in sample:
    print(f"  {s} -> {p} -> {o}")

Step 10: Test Snapshots

Test save and restore functionality.

import tempfile
import os

# Create test data
ns_snap = {}
setup_dataset_context(ns_snap)
ns_snap['mem_add']('http://ex.org/alice', 'http://ex.org/age', '30')
ns_snap['mem_add']('http://ex.org/bob', 'http://ex.org/age', '25')

print(f"Before snapshot: {len(ns_snap['ds_meta'].mem)} triples")

# Save snapshot
snapshot_path = tempfile.mktemp(suffix='.trig')
result = ns_snap['snapshot_dataset'](path=snapshot_path)
print(f"\n{result}")
print(f"File size: {os.path.getsize(snapshot_path)} bytes")

# Show snapshot contents
print("\nSnapshot preview (first 30 lines):")
with open(snapshot_path, 'r') as f:
    for i, line in enumerate(f):
        if i >= 30:
            break
        print(f"  {line.rstrip()}")

# Restore snapshot
from rlm.dataset import load_snapshot

ns_restored = {}
result = load_snapshot(snapshot_path, ns_restored, name='restored')
print(f"\n{result}")

print(f"Restored memory: {len(ns_restored['restored_meta'].mem)} triples")
print(f"Restored provenance: {len(ns_restored['restored_meta'].prov)} events")

# Verify contents match
assert len(ns_restored['restored_meta'].mem) == len(ns_snap['ds_meta'].mem)
print("\n✓ Snapshot restore successful")

# Cleanup
os.unlink(snapshot_path)

Summary

This notebook tested:

✓ Step 1: Module imports
✓ Step 2: Dataset setup
✓ Step 3: Direct memory operations (add, query, describe)
✓ Step 4: RLM using memory operations
✓ Step 5: Memory persistence across RLM runs
✓ Step 6: Provenance inspection
✓ Step 7: Ontology + memory integration
✓ Step 8: Work graph workflow
✓ Step 9: Bounded view functions
✓ Step 10: Snapshot/restore

All components working correctly!