Add Nostr Event Storage Specification

2025-12-30 09:51:22 -05:00
parent 515165e447
commit 9d50fbccc2
1 changed files with 970 additions and 0 deletions
--- a/Nostr-Event-Storage-Specification.md
+++ b/Nostr-Event-Storage-Specification.md
@@ -0,0 +1,970 @@
 # Nostr Event Storage Specification
 ## 1. Common Storage Rules
 These rules apply to both client and relay implementations.
 ### Rule 1.1: Event Deduplication
 Events with identical `id` fields are considered duplicates. Store only one copy.
 ```
 on_event_received(event):
  if exists(event.id):
    return DUPLICATE
  else:
    store(event)
    return STORED
 ```
 **Example:**
 ```
 First:  id="4376c65d..."  → STORED
 Second: id="4376c65d..."  → DUPLICATE (rejected)
 ```
 ### Rule 1.2: Replaceable Event Semantics
 For kinds 0, 3, and 10000-19999, keep only the newest event per (kind, pubkey) pair.
 ```
 is_replaceable(kind):
  return kind in [0, 3] or (10000 <= kind < 20000)
 on_replaceable_event(event):
  key = (event.kind, event.pubkey)
  existing = find_by_key(key)
  if existing and existing.created_at >= event.created_at:
    return REPLACED  // incoming is older
  if existing:
    delete(existing)
  store(event)
  return STORED
 ```
 **Example:**
 ```
 Store: kind=0, pubkey="abc123", created_at=1000
 Store: kind=0, pubkey="abc123", created_at=1500
 Result: Only second event remains (timestamp 1500)
 ```
 ### Rule 1.3: Addressable Event Semantics
 For kinds 30000-39999, keep only the newest event per (kind, pubkey, d-tag) tuple.
 ```
 is_addressable(kind):
  return 30000 <= kind < 40000
 extract_d_tag(event):
  for tag in event.tags:
    if tag[0] == "d":
      return tag[1]
  return ""
 on_addressable_event(event):
  d_value = extract_d_tag(event)
  key = (event.kind, event.pubkey, d_value)
  existing = find_by_key(key)
  if existing and existing.created_at >= event.created_at:
    return REPLACED
  if existing:
    delete(existing)
  store(event)
  return STORED
 ```
 **Example:**
 ```json
 {
  "kind": 30023,
  "pubkey": "abc123...",
  "tags": [["d", "article-1"]],
  "created_at": 1000
 }
 ```
 Address: `(30023, "abc123...", "article-1")`
 ### Rule 1.4: Ephemeral Event Handling
 Events with kinds 20000-29999 must not be stored.
 ```
 is_ephemeral(kind):
  return 20000 <= kind < 30000
 on_event_received(event):
  if is_ephemeral(event.kind):
    return EPHEMERAL  // forward only, never store
 ```
 **Example:**
 ```
 kind=25000 → Never store, only forward to subscribers
 kind=15000 → Store normally
 ```
 ### Rule 1.5: Deletion Enforcement
 Kind 5 events delete previously stored events by the same author.
 ```
 on_deletion_event(event):
  if event.kind != 5:
    return
  // Extract event IDs from 'e' tags
  deleted_ids = []
  for tag in event.tags:
    if tag[0] == "e" and len(tag) >= 2:
      deleted_ids.append(tag[1])
  // Delete matching events
  for id in deleted_ids:
    existing = find_by_id(id)
    if existing and existing.pubkey == event.pubkey:
      delete(existing)
      mark_deleted(id, event.pubkey)
 ```
 **Example deletion event:**
 ```json
 {
  "kind": 5,
  "pubkey": "abc123...",
  "tags": [
    ["e", "4376c65d..."],
    ["e", "5c83da77..."]
  ]
 }
 ```
 Deletes events `4376c65d...` and `5c83da77...` if authored by `abc123...`
 ### Rule 1.6: Deletion by Address
 Kind 5 events can delete addressable events using 'a' tags.
 ```
 on_deletion_event(event):
  // Extract addresses from 'a' tags
  deleted_addresses = []
  for tag in event.tags:
    if tag[0] == "a" and len(tag) >= 2:
      deleted_addresses.append(tag[1])
  for address in deleted_addresses:
    (kind, pubkey, d_value) = parse_address(address)
    if pubkey == event.pubkey:
      existing = find_by_address(kind, pubkey, d_value)
      if existing and existing.created_at < event.created_at:
        delete(existing)
        mark_deleted(address, event.pubkey, event.created_at)
 ```
 **Example:**
 ```json
 {
  "kind": 5,
  "tags": [
    ["a", "30023:abc123...:article-1"]
  ]
 }
 ```
 ### Rule 1.7: Prevent Re-insertion After Deletion
 Once an event is deleted, block future attempts to store it.
 ```
 on_event_received(event):
  if is_deleted(event.id, event.pubkey):
    return BLOCKED
  if is_addressable(event.kind):
    address = make_address(event.kind, event.pubkey, extract_d_tag(event))
    deletion_timestamp = get_deletion_timestamp(address, event.pubkey)
    if deletion_timestamp and event.created_at < deletion_timestamp:
      return BLOCKED
 ```
 ### Rule 1.8: Filter Matching Logic
 Events match a filter if they satisfy all specified conditions.
 ```
 matches_filter(event, filter):
  // ids: prefix match
  if filter.ids:
    if not any(event.id.startswith(prefix) for prefix in filter.ids):
      return false
  // authors: prefix match
  if filter.authors:
    if not any(event.pubkey.startswith(prefix) for prefix in filter.authors):
      return false
  // kinds: exact match
  if filter.kinds:
    if event.kind not in filter.kinds:
      return false
  // since/until: timestamp range
  if filter.since and event.created_at < filter.since:
    return false
  if filter.until and event.created_at > filter.until:
    return false
  // tag filters: must have matching tag
  for (tag_name, values) in filter.tag_filters:
    found = false
    for tag in event.tags:
      if tag[0] == tag_name and tag[1] in values:
        found = true
        break
    if not found:
      return false
  return true
 ```
 **Example filter:**
 ```json
 {
  "kinds": [1],
  "authors": ["abc123"],
  "#e": ["4376c65d..."]
 }
 ```
 Matches: kind=1, pubkey starts with "abc123", has tag ["e", "4376c65d..."]
 ### Rule 1.9: Multi-Filter OR Logic
 Multiple filters in a query combine with OR logic.
 ```
 matches_any_filter(event, filters):
  for filter in filters:
    if matches_filter(event, filter):
      return true
  return false
 ```
 **Example:**
 ```json
 [
  {"kinds": [1]},
  {"kinds": [6], "authors": ["abc123"]}
 ]
 ```
 Matches: All kind 1 events OR (kind 6 events from "abc123")
 ### Rule 1.10: Result Ordering
 Return events in descending timestamp order, using ID as tiebreaker.
 ```
 sort_events(events):
  return sorted(events, 
    key=lambda e: (-e.created_at, e.id))
 ```
 **Example:**
 ```
 Event A: created_at=1500, id="aaa..."
 Event B: created_at=1500, id="bbb..."
 Event C: created_at=1200, id="ccc..."
 Order: [A, B, C]
 ```
 ### Rule 1.11: Limit Application
 When a filter specifies `limit`, return at most that many results.
 ```
 apply_limit(events, filter):
  if filter.limit:
    return events[:filter.limit]
  return events
 ```
 ### Rule 1.12: Tag Index Extraction
 Index single-letter tags for efficient querying.
 ```
 extract_indexable_tags(event):
  indexed = []
  for tag in event.tags:
    if len(tag) >= 2 and len(tag[0]) == 1:
      indexed.append((tag[0], tag[1]))
  return indexed
 ```
 **Example:**
 ```json
 "tags": [
  ["e", "4376c65d...", "wss://relay.com"],
  ["p", "abc123..."],
  ["expiration", "1673433737"]
 ]
 ```
 Indexed: `[("e", "4376c65d..."), ("p", "abc123...")]`
 ---
 ## 2. Client Storage Rules
 These rules apply only to client implementations.
 ### Rule 2.1: Memory-Bounded Storage
 Clients must limit memory consumption through eviction.
 ```
 max_events = 10000  // configurable
 on_event_stored():
  if count_events() > max_events:
    evict_oldest_unclaimed()
 ```
 ### Rule 2.2: Claiming System
 Track which subscriptions reference each event to prevent premature eviction.
 ```
 claims = Map<event_id, Set<subscription_id>>
 claim_event(event_id, subscription_id):
  claims[event_id].add(subscription_id)
 release_claim(event_id, subscription_id):
  claims[event_id].remove(subscription_id)
 is_claimed(event_id):
  return claims[event_id] is not empty
 evict_oldest_unclaimed():
  for event in lru_order():
    if not is_claimed(event.id):
      delete(event)
      return
 ```
 ### Rule 2.3: Subscription Deduplication
 Identical subscriptions share the same underlying query.
 ```
 active_subscriptions = Map<filter_hash, Observable>
 subscribe(filters):
  hash = hash_filters(filters)
  if active_subscriptions.contains(hash):
    return active_subscriptions[hash]
  observable = create_query_observable(filters)
  active_subscriptions[hash] = observable
  return observable
 ```
 ### Rule 2.4: Reactive Updates
 When new events arrive, notify all matching subscriptions immediately.
 ```
 on_event_stored(event):
  for (subscription_id, filters) in active_subscriptions:
    if matches_any_filter(event, filters):
      emit_to_subscription(subscription_id, event)
 ```
 ### Rule 2.5: Optional Validation
 Clients may skip signature verification for events from trusted sources.
 ```
 on_event_received(event, source):
  if is_trusted_source(source):
    store(event)  // skip validation
  else:
    if validate_signature(event):
      store(event)
    else:
      reject(event)
 ```
 ### Rule 2.6: Loader Integration
 When queried events are missing, invoke loaders to fetch from network.
 ```
 get_event(event_id):
  event = find_by_id(event_id)
  if event:
    return event
  if event_loader:
    fetched = event_loader(event_id)
    if fetched:
      store(fetched)
      return fetched
  return null
 ```
 ### Rule 2.7: Metadata Decoration
 Clients may annotate events with runtime metadata without persisting it.
 ```
 // Store metadata in separate map, not in event object
 metadata = WeakMap<Event, Metadata>
 set_metadata(event, key, value):
  if not metadata.has(event):
    metadata.set(event, {})
  metadata.get(event)[key] = value
 // Example metadata: relay hints, cache flags
 ```
 ---
 ## 3. Relay Storage Rules
 These rules apply only to relay implementations.
 ### Rule 3.1: Full Validation Pipeline
 Relays must validate every event before storage.
 ```
 on_event_received(event):
  // Step 1: Structure validation
  if not validate_structure(event):
    return ["OK", event.id, false, "invalid: malformed structure"]
  // Step 2: ID validation
  computed_id = compute_event_id(event)
  if computed_id != event.id:
    return ["OK", event.id, false, "invalid: incorrect id"]
  // Step 3: Signature validation
  if not verify_signature(event):
    return ["OK", event.id, false, "invalid: signature verification failed"]
  // Step 4: Store
  result = store(event)
  return ["OK", event.id, true, ""]
 ```
 ### Rule 3.2: Durable Storage
 All stored events must survive process restart.
 ```
 // Use persistent storage backend
 // - Relational: SQLite, PostgreSQL, MySQL
 // - Key-Value: LMDB, Badger
 // - Ensure write-ahead logging or equivalent durability
 ```
 ### Rule 3.3: EOSE Semantics
 Send EOSE after delivering all stored events matching a subscription.
 ```
 on_subscription(subscription_id, filters):
  stored_events = query_stored(filters)
  for event in stored_events:
    send(["EVENT", subscription_id, event])
  send(["EOSE", subscription_id])
  // Continue sending new matching events
 ```
 **Example:**
 ```
 Client: ["REQ", "sub1", {"kinds": [1], "limit": 5}]
 Relay:  ["EVENT", "sub1", {...}]  // stored event 1
 Relay:  ["EVENT", "sub1", {...}]  // stored event 2
 Relay:  ["EOSE", "sub1"]
 Relay:  ["EVENT", "sub1", {...}]  // new real-time event
 ```
 ### Rule 3.4: Concurrent Client Support
 Handle multiple simultaneous connections without data corruption.
 ```
 // Use appropriate concurrency primitives
 // - Relational: Database transactions (SERIALIZABLE isolation)
 // - Key-Value: Explicit mutexes or lock-free data structures
 // - Read operations should not block writes
 ```
 ### Rule 3.5: Per-Filter Limit Enforcement
 When multiple filters have limits, apply each limit independently before combining.
 ```
 query_multi_filter(filters):
  results = Set()
  for filter in filters:
    batch = query_single_filter(filter)
    if filter.limit:
      batch = batch[:filter.limit]
    results.union(batch)
  return sort_events(results)
 ```
 **Example:**
 ```json
 [
  {"kinds": [1], "limit": 10},
  {"kinds": [6], "limit": 5}
 ]
 ```
 Returns: Up to 10 kind-1 events + up to 5 kind-6 events
 ### Rule 3.6: Write Confirmation
 Send OK message after each EVENT command.
 ```
 on_event_command(["EVENT", event]):
  result = process_event(event)
  if result == STORED:
    send(["OK", event.id, true, ""])
  elif result == DUPLICATE:
    send(["OK", event.id, true, "duplicate: already stored"])
  elif result == BLOCKED:
    send(["OK", event.id, false, "blocked: event deleted"])
  elif result == INVALID:
    send(["OK", event.id, false, "invalid: " + reason])
 ```
 ### Rule 3.7: Subscription Cleanup
 Support CLOSE command to end subscriptions.
 ```
 on_close_command(["CLOSE", subscription_id]):
  remove_subscription(subscription_id)
  // Optionally send confirmation
  send(["CLOSED", subscription_id, "subscription ended"])
 ```
 ---
 ## 4. Optional Features
 ### Optional Rule 4.1: Expiration Support (NIP-40)
 Store and honor expiration timestamps.
 ```
 extract_expiration(event):
  for tag in event.tags:
    if tag[0] == "expiration" and len(tag) >= 2:
      return parse_int(tag[1])
  return null
 on_event_received(event):
  expiration = extract_expiration(event)
  if expiration and current_timestamp() > expiration:
    return REJECTED  // already expired
  store(event)
  if expiration:
    schedule_deletion(event.id, expiration)
 schedule_deletion(event_id, timestamp):
  at_time(timestamp):
    delete(event_id)
 ```
 **Example:**
 ```json
 {
  "tags": [["expiration", "1673433737"]],
  "created_at": 1673347337
 }
 ```
 Event expires 24 hours after creation.
 ### Optional Rule 4.2: Full-Text Search
 Index content field for text queries.
 ```
 on_event_stored(event):
  if is_searchable(event):
    add_to_search_index(event.id, event.content)
 query_with_search(filter):
  if filter.search:
    matching_ids = search_index.query(filter.search)
    events = [find_by_id(id) for id in matching_ids]
    events = [e for e in events if matches_filter(e, filter)]
    return events
  else:
    return normal_query(filter)
 ```
 **Example filter:**
 ```json
 {
  "kinds": [1],
  "search": "bitcoin protocol"
 }
 ```
 ### Optional Rule 4.3: Event Counting (NIP-45)
 Support COUNT command without returning full events.
 ```
 on_count_command(["COUNT", subscription_id, ...filters]):
  count = 0
  for filter in filters:
    count += count_matching(filter)
  send(["COUNT", subscription_id, {"count": count}])
 ```
 ### Optional Rule 4.4: Proof of Work Validation (NIP-13)
 Verify proof-of-work difficulty claims.
 ```
 validate_pow(event):
  for tag in event.tags:
    if tag[0] == "nonce" and len(tag) >= 3:
      target_difficulty = parse_int(tag[2])
      actual_difficulty = count_leading_zero_bits(event.id)
      return actual_difficulty >= target_difficulty
  return true  // no PoW requirement
 ```
 ### Optional Rule 4.5: Compression (Relay Only)
 Compress stored event data to reduce disk usage.
 ```
 store_event(event):
  json = serialize(event)
  compressed = compress(json, algorithm=zstd)
  write_to_storage(event.id, compressed)
 retrieve_event(event_id):
  compressed = read_from_storage(event_id)
  json = decompress(compressed)
  return deserialize(json)
 ```
 ### Optional Rule 4.6: Read Replicas (Relay Only)
 Distribute read load across multiple database instances.
 ```
 // Write to master
 store_event(event):
  master_db.insert(event)
 // Read from replica (round-robin or random)
 query_events(filter):
  replica = select_replica()
  return replica.query(filter)
 ```
 ### Optional Rule 4.7: Negentropy Set Reconciliation (Relay Only)
 Support efficient synchronization protocol.
 ```
 // Maintain pre-computed BTree fingerprints
 on_event_stored(event):
  for cached_filter in negentropy_cache:
    if matches_filter(event, cached_filter):
      cached_filter.btree.insert(event.id, event.created_at)
 on_negentropy_request(filter, client_btree):
  server_btree = get_or_build_btree(filter)
  differences = compute_differences(client_btree, server_btree)
  send_differences(differences)
 ```
 ---
 ## 5. Special Cases
 ### Special Case 5.1: Timestamp Ties
 When sorting events with identical timestamps, use event ID as tiebreaker.
 ```
 // Required for deterministic ordering
 sort_key(event):
  return (-event.created_at, event.id)  // descending time, ascending ID
 ```
 ### Special Case 5.2: Empty Filters
 A filter with no fields matches all events.
 ```
 filter = {}  // matches everything
 ```
 Client may apply default limit to prevent overwhelming results.
 ### Special Case 5.3: Zero-Length d-tag
 Addressable events without a d-tag use empty string as identifier.
 ```
 extract_d_tag(event):
  for tag in event.tags:
    if tag[0] == "d":
      return tag[1] if len(tag) >= 2 else ""
  return ""  // no d-tag found
 ```
 **Example:**
 ```json
 {"kind": 30023, "tags": [["d", ""]]}
 {"kind": 30023, "tags": []}
 ```
 Both have address: `30023:<pubkey>:`
 ### Special Case 5.4: Kind 5 Self-Deletion
 A deletion event can reference its own ID in e-tags.
 ```
 on_deletion_event(event):
  // Process deletions normally
  process_e_tags(event)
  // Then store the deletion event itself
  // (it may delete itself, which is valid)
  store(event)
 ```
 ### Special Case 5.5: Replacement Timestamp Ties
 When replaceable events have identical timestamps, keep lexicographically lower ID.
 ```
 on_replaceable_event(event):
  existing = find_replaceable(event.kind, event.pubkey)
  if existing:
    if existing.created_at > event.created_at:
      return REPLACED  // existing is newer
    elif existing.created_at == event.created_at:
      if existing.id < event.id:
        return REPLACED  // existing ID wins tie
  delete(existing)
  store(event)
 ```
 ### Special Case 5.6: Tag Value Limits
 Implementations should handle large tag values gracefully.
 ```
 // Truncate or reject events with excessively large tags
 max_tag_value_length = 1024  // configurable
 validate_tags(event):
  for tag in event.tags:
    for value in tag:
      if len(value) > max_tag_value_length:
        return false  // or truncate
  return true
 ```
 ### Special Case 5.7: Multiple d-tags
 If an event has multiple d-tags, use the first one.
 ```
 extract_d_tag(event):
  for tag in event.tags:
    if tag[0] == "d" and len(tag) >= 2:
      return tag[1]  // return first match
  return ""
 ```
 ---
 ## 6. Implementation Considerations
 ### Consideration 6.1: Index Selection
 Choose appropriate indexes based on query patterns.
 **Essential indexes:**
 - Primary: `id` (unique)
 - Kind: `(kind, created_at DESC)`
 - Author: `(pubkey, created_at DESC)`
 - Time: `(created_at DESC)`
 - Tags: `(tag_name, tag_value, created_at DESC)`
 **Compound indexes for common patterns:**
 - Author + Kind: `(pubkey, kind, created_at DESC)`
 - Replaceable: `(kind, pubkey)` where kind is replaceable
 - Addressable: `(kind, pubkey, d_tag)` where kind is addressable
 ### Consideration 6.2: Batch Processing
 Group writes into transactions to reduce overhead.
 ```
 batch_size = 100
 pending_events = []
 on_event_received(event):
  pending_events.append(event)
  if len(pending_events) >= batch_size:
    transaction:
      for e in pending_events:
        store(e)
    pending_events.clear()
 ```
 ### Consideration 6.3: Lazy Tag Indexing
 For memory-constrained clients, build tag indexes on-demand.
 ```
 tag_indexes = LRU_Cache<(tag_name, tag_value), Set<event_id>>
 query_tag(tag_name, tag_value):
  key = (tag_name, tag_value)
  if not tag_indexes.contains(key):
    // Build index on first access
    matching = []
    for event in all_events():
      for tag in event.tags:
        if tag[0] == tag_name and tag[1] == tag_value:
          matching.append(event.id)
    tag_indexes[key] = matching
  return tag_indexes[key]
 ```
 ### Consideration 6.4: Binary Storage
 For relay implementations, consider binary encoding to reduce storage size.
 ```
 // Pack event into binary format
 packed_event = pack(
  id_bytes,      // 32 bytes
  pubkey_bytes,  // 32 bytes
  created_at,    // 8 bytes (uint64)
  kind,          // 4 bytes (uint32)
  tags_encoded,  // variable
  content_length,// 4 bytes (uint32)
  // content and sig stored separately
 )
 ```
 ### Consideration 6.5: Connection Pooling
 Relays should manage database connections efficiently.
 ```
 pool_config:
  min_connections: 5
  max_connections: 50
  idle_timeout: 60s
  connection_lifetime: 3600s
 query(sql):
  conn = pool.acquire()
  try:
    result = conn.execute(sql)
    return result
  finally:
    pool.release(conn)
 ```
 ### Consideration 6.6: Rate Limiting
 Protect relay resources from abuse.
 ```
 limits = Map<client_ip, TokenBucket>
 on_client_request(client_ip, request):
  bucket = limits[client_ip]
  if not bucket.consume(1):
    send(["NOTICE", "rate limit exceeded"])
    disconnect(client_ip)
    return
  process_request(request)
 ```
 ### Consideration 6.7: Storage Migration
 Plan for schema changes and data migration.
 ```
 // Version stored schema
 schema_version = 3
 on_startup():
  stored_version = read_schema_version()
  if stored_version < schema_version:
    migrate(from=stored_version, to=schema_version)
    update_schema_version(schema_version)
 ```