Synchronization Protocol

library(automerge)

Overview

Automerge’s synchronization protocol enables multiple peers to exchange changes and converge to the same document state. The protocol is efficient, handling only missing changes between peers, and works over any transport (files, HTTP, WebSockets, etc.).

High-Level Sync (Recommended)

For most use cases, use the high-level sync helpers that handle the protocol automatically.

Bidirectional Sync

The simplest way to synchronize two documents:

# Create two peers with different data
peer1 <- am_create()
peer1[["edited_by"]] <- "peer1"
peer1[["data1"]] <- 100
am_commit(peer1, "Peer1 changes")

peer2 <- am_create()
peer2[["edited_by"]] <- "peer2"
peer2[["data2"]] <- 200
am_commit(peer2, "Peer2 changes")

# Automatic bidirectional sync (documents modified in place)
rounds <- am_sync(peer1, peer2)

rounds
#> [1] 4

# Both documents now have all data
peer1[["data1"]]
#> [1] 100
peer1[["data2"]]
#> [1] 200
peer2[["data1"]]
#> [1] 100
peer2[["data2"]]
#> [1] 200

am_close(peer1)
am_close(peer2)

When to use: Simple peer-to-peer synchronization where both sides need all changes.

One-Way Merge

For pulling changes from one document to another without sending changes back:

# Create source document
source <- am_create()
source[["version"]] <- "2.0"
source[["features"]] <- list("auth", "api")
am_commit(source)

# Create target document
target <- am_create()
target[["version"]] <- "1.0"
target[["development"]] <- TRUE
am_commit(target)

# Pull changes from source to target
am_merge(target, source)

# Target now has source's changes
target[["version"]]
#> [1] "2.0"

# Source is unchanged
names(source)
#> [1] "features" "version"

am_close(source)
am_close(target)

When to use: Client pulling updates from server, importing changes from another peer.

Low-Level Sync Protocol

For network protocols or fine-grained control, use the low-level API.

Basic Protocol Flow

# Create two peers
peer3 <- am_create()
peer3[["source"]] <- "peer3"
am_commit(peer3)

peer4 <- am_create()
peer4[["source"]] <- "peer4"
am_commit(peer4)

# Each peer maintains its own sync state
sync3 <- am_sync_state()
sync4 <- am_sync_state()

# Exchange messages until converged
round <- 0
repeat {
  round <- round + 1

  # Peer 3 generates message
  msg3 <- am_sync_encode(peer3, sync3)
  if (is.null(msg3)) {
    break
  } # No more messages

  # Peer 4 receives and processes
  am_sync_decode(peer4, sync4, msg3)

  # Peer 4 generates response
  msg4 <- am_sync_encode(peer4, sync4)
  if (is.null(msg4)) {
    break
  } # No more messages

  # Peer 3 receives and processes
  am_sync_decode(peer3, sync3, msg4)

  if (round > 100) {
    warning("Sync did not converge in 100 rounds")
    break
  }
}

round
#> [1] 3
peer3[["source"]]
#> [1] "peer3"
peer4[["source"]]
#> [1] "peer3"

am_close(peer3)
am_close(peer4)

Protocol Components

Sync State (am_sync_state()):

Tracks what each peer knows
Document-independent (can be reused)
Should be preserved across sessions for efficiency

Sync Encode (am_sync_encode(doc, sync_state)):

Generates sync message to send to peer
Returns NULL when no more changes to send
Message is binary (raw vector)

Sync Decode (am_sync_decode(doc, sync_state, message)):

Receives sync message from peer
Updates document and sync state
Returns document invisibly for chaining

Change-Based Synchronization

For scenarios where you want explicit control over individual changes:

Exporting and Importing Changes

# Create base document that will be shared
base_doc <- am_create()
base_doc[["v1"]] <- "first"
am_commit(base_doc, "Version 1")

# Fork to create two peers with shared history
peer_a <- am_fork(base_doc)
peer_b <- am_fork(base_doc)

# Remember the fork point
fork_heads <- am_get_heads(peer_a)

# Peer A makes changes
peer_a[["v2"]] <- "second"
am_commit(peer_a, "Version 2")

peer_a[["v3"]] <- "third"
am_commit(peer_a, "Version 3")

# Export changes since fork point
changes <- am_get_changes(peer_a, fork_heads)

length(changes)
#> [1] 2

# Peer B applies the changes
am_apply_changes(peer_b, changes)

# Verify both peers are in sync
peer_b[["v2"]]
#> [1] "second"
peer_b[["v3"]]
#> [1] "third"

Saving Changes to Files

# Save individual changes to files (serialize to raw bytes first)
temp_dir <- tempdir()
for (i in seq_along(changes)) {
  change_file <- file.path(temp_dir, sprintf("change_%03d.bin", i))
  writeBin(am_change_to_bytes(changes[[i]]), change_file)
}

# Later: load changes and restore as am_change objects
loaded_changes <- list()
for (i in seq_along(changes)) {
  change_file <- file.path(temp_dir, sprintf("change_%03d.bin", i))
  loaded_changes[[i]] <- am_change_from_bytes(
    readBin(change_file, "raw", file.size(change_file))
  )
}

# Apply loaded changes to a peer
peer_c <- am_fork(base_doc)
am_apply_changes(peer_c, loaded_changes)

# Verify
peer_c[["v3"]]
#> [1] "third"

am_close(base_doc)
am_close(peer_a)
am_close(peer_b)
am_close(peer_c)

Decomposing Saved Documents

If you have a saved document (from am_save()) and want to extract its individual changes without loading the full document, use am_load_changes():

# Save a document with multiple commits
doc_decompose <- am_create()
doc_decompose[["x"]] <- 1
am_commit(doc_decompose, "Add x")
doc_decompose[["y"]] <- 2
am_commit(doc_decompose, "Add y")
saved_bytes <- am_save(doc_decompose)

# Extract individual changes from the saved bytes
extracted_changes <- am_load_changes(saved_bytes)
length(extracted_changes)
#> [1] 2

# Inspect each change
for (ch in extracted_changes) {
  cat(am_change_message(ch), "\n")
}
#> Add x 
#> Add y

# Apply selectively to a new document
doc_selective <- am_create()
am_apply_changes(doc_selective, extracted_changes[1]) # Only first change
doc_selective[["x"]]
#> [1] 1
doc_selective[["y"]] # NULL - second change not applied
#> NULL

am_close(doc_decompose)
am_close(doc_selective)

When to use: Git-like workflows, change logs, selective sync.

Document Heads and History

Understanding Heads

Heads are the most recent commits in a document’s history. They represent the current state’s frontier in the change graph.

# Create document and make changes
doc_main <- am_create()
doc_main[["version"]] <- "1.0"
am_commit(doc_main, "Initial version")

# Get heads (fingerprint of current state)
heads_v1 <- am_get_heads(doc_main)
length(heads_v1)
#> [1] 1

# Make more changes
doc_main[["version"]] <- "2.0"
doc_main[["feature"]] <- "new"
am_commit(doc_main, "Version 2")

heads_v2 <- am_get_heads(doc_main)
identical(heads_v1, heads_v2)
#> [1] FALSE

Heads are useful for:

Tracking which changes you’ve already seen
Computing incremental updates
Detecting whether documents have diverged
Querying historical document states (pass heads to am_cursor(), am_cursor_position(), am_marks(), or am_marks_at())

Checking Document Completeness

After applying changes from external sources, you can verify a document has all its dependencies with am_get_missing_deps():

doc_check <- am_create()
doc_check[["data"]] <- "complete"
am_commit(doc_check)

# A complete document has no missing deps
missing <- am_get_missing_deps(doc_check)
missing
#> list()

am_close(doc_check)

Working with History

# Get full change history
history <- am_get_changes(doc_main)
length(history)
#> [1] 2

# Inspect individual changes (am_change objects returned directly)
for (i in seq_along(history)) {
  cat(sprintf(
    "  [%d] seq=%g message=%s\n",
    i,
    am_change_seq(history[[i]]),
    am_change_message(history[[i]]) %||% "(none)"
  ))
}
#>   [1] seq=1 message=Initial version
#>   [2] seq=2 message=Version 2

# Extract multiple fields from the same change
change <- history[[2]]
am_change_hash(change)     # Unique hash
#>  [1] 59 5d 84 e8 28 0d a4 e3 e1 1a b7 54 cf 99 bb 64 70 d8 0d dd 74 6e
#> [23] 4f 8c 46 6c 52 5b 25 3e 63 69
am_change_actor_id(change) # Who made this change
#>  [1] 64 bc 35 de c3 d7 30 b1 de 77 1b 77 22 50 73 95
am_change_time(change)     # When
#> [1] "1970-01-01 UTC"
am_change_deps(change)     # Parent changes
#> [[1]]
#>  [1] 07 c6 93 b8 1d b8 e6 54 e7 13 4c 25 96 d2 29 a1 b3 9c b3 e8 e4 17
#> [23] 33 6f 8d dd c5 60 af b0 83 b9
am_change_size(change)     # Number of operations
#> [1] 2

# Get changes between two points in history
changes_since_v1 <- am_get_changes(doc_main, heads_v1)
str(changes_since_v1)
#> List of 1
#>  $ :Class 'am_change' <externalptr>

am_close(doc_main)

Incremental Sync Pattern

Common pattern: track last sync point and only send new changes

# Server document that accumulates changes
server <- am_create()
server[["users"]] <- 0L
am_commit(server, "Initialize")

# Client syncs and remembers server state
client <- am_fork(server)
last_sync_heads <- am_get_heads(server)

# Client makes local changes
client[["users"]] <- 1L
client[["local_cache"]] <- TRUE
am_commit(client, "Client updates")

# Server receives changes from other clients
server[["users"]] <- 5L
server[["server_config"]] <- "production"
am_commit(server, "Server updates")

# Client pulls only new server changes since last sync
new_changes <- am_get_changes(server, last_sync_heads)
length(new_changes)
#> [1] 1

am_apply_changes(client, new_changes)

# Update sync point
last_sync_heads <- am_get_heads(server)

# Client now has server's changes
client[["server_config"]]
#> [1] "production"

# Server can also pull client's changes
client_changes <- am_get_changes(client, am_get_heads(server))
am_apply_changes(server, client_changes)

server[["local_cache"]]
#> [1] TRUE

am_close(server)
am_close(client)

Detecting Divergence

# Create two peers that will diverge
peer_x <- am_create()
peer_x[["id"]] <- "x"
am_commit(peer_x)

peer_y <- am_fork(peer_x)

# Remember common state
common_heads <- am_get_heads(peer_x)

# Both make different changes
peer_x[["data_x"]] <- "from X"
am_commit(peer_x, "X's change")

peer_y[["data_y"]] <- "from Y"
am_commit(peer_y, "Y's change")

# Check if they've diverged
x_heads <- am_get_heads(peer_x)
y_heads <- am_get_heads(peer_y)

identical(x_heads, y_heads)
#> [1] FALSE

# Get each peer's changes since common state
x_changes <- am_get_changes(peer_x, common_heads)
y_changes <- am_get_changes(peer_y, common_heads)

str(x_changes)
#> List of 1
#>  $ :Class 'am_change' <externalptr>
str(y_changes)
#> List of 1
#>  $ :Class 'am_change' <externalptr>

# Sync to merge divergent histories
rounds <- am_sync(peer_x, peer_y)
rounds
#> [1] 4

# After sync, heads are identical again
identical(am_get_heads(peer_x), am_get_heads(peer_y))
#> [1] TRUE

am_close(peer_x)
am_close(peer_y)

Concurrent Edits

Understanding how concurrent edits merge:

# Create shared starting point
base <- am_create()
base[["counter"]] <- am_counter(0)
base[["status"]] <- "draft"
am_commit(base)

# Fork to two editors
editor1 <- am_fork(base)
editor2 <- am_fork(base)

# Concurrent edits
# Editor 1: increment counter, change status
am_counter_increment(editor1, AM_ROOT, "counter", 5)
editor1[["status"]] <- "review"
am_commit(editor1, "Editor 1 changes")

# Editor 2: increment counter differently, change status differently
am_counter_increment(editor2, AM_ROOT, "counter", 3)
editor2[["status"]] <- "published"
am_commit(editor2, "Editor 2 changes")

# Sync editors
rounds <- am_sync(editor1, editor2)

# Counter: Both increments sum (CRDT)
editor1[["counter"]]
#> <Automerge Counter: 8 >

# Status: Deterministic conflict resolution (one value wins)
editor1[["status"]]
#> [1] "published"

am_close(base)
am_close(editor1)
am_close(editor2)

Sync Performance

The Sync Frequency Tradeoff

There’s a fundamental tradeoff between collaboration latency and efficiency:

# Strategy A: Frequent sync (lower latency, more overhead)
doc_frequent <- am_create()
peer_frequent <- am_create()

sync_count_frequent <- 0
for (i in 1:10) {
  doc_frequent[[paste0("k", i)]] <- i
  am_commit(doc_frequent, paste("Change", i))

  rounds <- am_sync(doc_frequent, peer_frequent)
  sync_count_frequent <- sync_count_frequent + 1
}

# 10 separate commits, 10 sync operations
sync_count_frequent
#> [1] 10

# Strategy B: Batched commits (higher latency, less overhead)
doc_batched <- am_create()
peer_batched <- am_create()

# Batch all changes into one commit
for (i in 1:10) {
  doc_batched[[paste0("k", i)]] <- i
}
am_commit(doc_batched, "Batch of 10 changes")

rounds <- am_sync(doc_batched, peer_batched)

# 1 commit, 1 sync operation
rounds
#> [1] 4

# Compare document sizes
length(am_save(doc_frequent))
#> [1] 265
length(am_save(doc_batched))
#> [1] 186

am_close(doc_frequent)
am_close(peer_frequent)
am_close(doc_batched)
am_close(peer_batched)

Batching creates smaller documents because:

Fewer commit metadata entries
Better compression of related changes
Less overhead from commit boundaries

Choosing a Strategy

Use frequent syncs when:

Multiple users editing simultaneously
Low-latency collaboration is critical (real-time editing)
Users need to see each other’s changes quickly
Working over reliable, fast networks

Use batched commits when:

Single user making many changes
Optimizing for storage/bandwidth
Syncing over slow or metered connections
Changes are logically related (e.g., form submission)

Hybrid approach (recommended for most cases):

# Batch related changes, sync periodically
doc_hybrid <- am_create()

# User fills out a form (batch these)
doc_hybrid[["name"]] <- "Alice"
doc_hybrid[["email"]] <- "alice@example.com"
doc_hybrid[["preferences"]] <- list("theme" = "dark")
am_commit(doc_hybrid, "Update user profile")

# Sync after logical unit of work
# result <- sync_with_server(doc_hybrid)

# Later: another logical unit
doc_hybrid[["last_login"]] <- Sys.time()
am_commit(doc_hybrid, "Record login")

# Sync again
# result <- sync_with_server(doc_hybrid)

am_close(doc_hybrid)

Reusing Sync State

Sync state tracks what each peer has seen, enabling efficient incremental sync:

# Without reusing sync state (inefficient)
doc_no_reuse <- am_create()
doc_no_reuse[["v1"]] <- 1
am_commit(doc_no_reuse)

peer_no_reuse <- am_create()

# First sync
rounds1 <- am_sync(doc_no_reuse, peer_no_reuse)
rounds1
#> [1] 4

# Add more changes
doc_no_reuse[["v2"]] <- 2
am_commit(doc_no_reuse)

# Second sync (creates new sync state, may resend some data)
rounds2 <- am_sync(doc_no_reuse, peer_no_reuse)
rounds2
#> [1] 4

# With reusing sync state (efficient)
doc_reuse <- am_create()
doc_reuse[["v1"]] <- 1
am_commit(doc_reuse)

peer_reuse <- am_create()
sync_state_local <- am_sync_state()
sync_state_peer <- am_sync_state()

# Manual sync with persistent state
msg1 <- am_sync_encode(doc_reuse, sync_state_local)
am_sync_decode(peer_reuse, sync_state_peer, msg1)
msg2 <- am_sync_encode(peer_reuse, sync_state_peer)

if (!is.null(msg2)) {
  am_sync_decode(doc_reuse, sync_state_local, msg2)
}

# Add more changes
doc_reuse[["v2"]] <- 2
am_commit(doc_reuse)

# Second sync reuses state (only sends new changes)
msg3 <- am_sync_encode(doc_reuse, sync_state_local)
am_sync_decode(peer_reuse, sync_state_peer, msg3)

# Sync state remembers what was already exchanged

am_close(doc_no_reuse)
am_close(peer_no_reuse)
am_close(doc_reuse)
am_close(peer_reuse)

To persist sync state across R sessions (or send it over a network), serialize and restore it with am_sync_state_encode() / am_sync_state_decode():

# After initial sync, save sync state for later reuse
sync_bytes <- am_sync_state_encode(sync_state_local)

# Later (e.g., after restarting R):
restored_sync <- am_sync_state_decode(sync_bytes)

# Continue syncing efficiently with the restored state

When to persist sync state:

Long-lived connections (WebSocket servers)
Periodic sync jobs (cron, scheduled tasks)
Client apps that reconnect frequently
Reduces redundant data transfer significantly

Measuring Performance

# Compare efficiency of different approaches
measure_sync <- function(n_changes, batch_size) {
  doc <- am_create()
  peer <- am_create()

  changes_made <- 0
  syncs_performed <- 0

  for (i in seq_len(n_changes)) {
    doc[[paste0("k", i)]] <- i
    changes_made <- changes_made + 1

    # Commit every batch_size changes
    if (changes_made %% batch_size == 0) {
      am_commit(doc, sprintf("Batch at %d", changes_made))
      rounds <- am_sync(doc, peer)
      syncs_performed <- syncs_performed + 1
    }
  }

  # Final commit if needed
  if (changes_made %% batch_size != 0) {
    am_commit(doc, "Final batch")
    rounds <- am_sync(doc, peer)
    syncs_performed <- syncs_performed + 1
  }

  result <- list(
    changes = changes_made,
    syncs = syncs_performed,
    size = length(am_save(doc))
  )

  am_close(doc)
  am_close(peer)

  result
}

# No batching: commit and sync after every change
result_no_batch <- measure_sync(20, 1)

# Medium batching: commit every 5 changes
result_medium <- measure_sync(20, 5)

# Full batching: single commit at end
result_full <- measure_sync(20, 20)

# Compare results
result_no_batch$syncs
#> [1] 20
result_medium$syncs
#> [1] 4
result_full$syncs
#> [1] 1

# Document sizes
result_no_batch$size
#> [1] 461
result_medium$size
#> [1] 276
result_full$size
#> [1] 233

Next Steps

Learn about CRDT Concepts to understand merge behavior
Check the Quick Reference for all sync functions