Goal
Compare yaml12 and yaml for reading and
writing YAML over a range of document sizes.
We intentionally avoid testing semantic differences between YAML 1.1 and 1.2, as well as less-used YAML features like tags and streams. Instead, we focus on the simple default usage of both.
Operations:
- read: YAML file → R object
-
write: R object → YAML (written to
nullfile())
Setup
Generate inputs
To benchmark, we create YAML files consisting of a repeating sequence
of a small, fixed “mixed” node, repeated 2^(1:15) times.
This node is designed to exercise the full YAML 1.2 core schema (every
core type is represented). We generate these files with
yaml12::write_yaml().
Each file is written to a temporary file and then we measure read
time. Note that on macOS and Linux, tempfile() paths often
live on a tmpfs in RAM, so this typically won’t trigger a
write to disk under most circumstances.
# A YAML node that exercises every core schema type:
# seq, map, str, bool, int, float, null
mixed_node <- list(
str = c(
"Lorem ipsum dolor sit amet, vel accumsan vitae faucibus ultrices leo",
"neque? Et cursus lacinia, ut, sit donec facilisi eu interdum. Dui",
"ipsum, vitae ligula commodo convallis ac sed nunc. Ipsum at nec lacus",
"eros suscipit vitae."
),
block_str = "lorem \n ipsum \n dolor\n",
bools = c(TRUE, FALSE),
ints = c(123L, -123L),
floats = c(123.456, -123.456),
null = NULL
)
make_yaml_doc <- function(size = 1) {
path <- tempfile(fileext = ".yaml")
yaml12::write_yaml(rep(list(mixed_node), size), path)
path
}
docs <- sapply(2^(1:15), make_yaml_doc)Read performance
read_results <- lapply(docs, function(doc) {
result <- bench::mark(
yaml12::read_yaml(doc),
yaml::read_yaml(doc),
check = FALSE
)
result$file_size <- file.info(doc)$size
result
})
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.Read results summary
read_results_df <- bind_rows(read_results) |>
mutate(expression = as.factor(sapply(expression, deparse1)))
read_results_df |>
ggplot(aes(x = file_size, y = median, color = expression)) +
labs(
y = "Median Run Time",
x = "File Size",
title = "Read Performance"
) +
geom_point() +
geom_smooth(linewidth = .5) +
scale_x_log10(labels = scales::label_bytes()) +
scale_y_continuous(trans = bench::bench_time_trans())
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Read results (detailed)
invisible(lapply(read_results, \(result) {
print(plot(result) + ggplot2::ggtitle(scales::label_bytes()(result$file_size[1])))
print(summary(result, filter_gc = FALSE))
}))
#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::r… 856 24µs 30.2µs 22778. 3.39KB 2.28 10000
#> 2 yaml::rea… 856 58.3µs 72.2µs 13001. 34.81KB 4.00 6501
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::r… 1704 35.6µs 42.2µs 22483. 0B 2.25 10000
#> 2 yaml::rea… 1704 79.3µs 93.9µs 10187. 10.9KB 6.00 5094
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::read_… 3400 59.2µs 71.1µs 13457. 0B 2.00
#> 2 yaml::read_ya… 3400 121.2µs 142.1µs 6648. 13.7KB 6.00
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::re… 6792 108µs 132µs 7329. 0B 0 3664
#> 2 yaml::read… 6792 206µs 238µs 3994. 19.4KB 8.00 1997
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::re… 13576 201µs 243µs 4072. 304B 0 2037
#> 2 yaml::read… 13576 378µs 434µs 2129. 31.1KB 5.99 1067
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::re… 27144 402µs 475µs 2024. 560B 2.00 1012
#> 2 yaml::read… 27144 724µs 836µs 1165. 69.8KB 7.99 583
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 yaml12::rea… 54280 768.91µs 929.02µs 1068. 1.05KB 0
#> 2 yaml::read_… 54280 1.44ms 1.65ms 593. 147.06KB 7.99
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::r… 108552 1.67ms 1.9ms 521. 2.05KB 0 261
#> 2 yaml::rea… 108552 3.05ms 3.49ms 273. 301.6KB 5.97 137
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::r… 217096 3.29ms 3.76ms 266. 4.05KB 0 133
#> 2 yaml::rea… 217096 6.95ms 7.76ms 115. 610.65KB 5.96 58
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::read_… 434184 6.86ms 7.62ms 130. 8.05KB 1.97
#> 2 yaml::read_ya… 434184 18.23ms 21.23ms 42.6 1.2MB 3.87
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <dbl> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::r… 868360 14.9ms 15.9ms 62.7 16.05KB 0 32
#> 2 yaml::rea… 868360 69ms 73.4ms 12.9 2.41MB 3.70 7
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::read_… 1736712 30.4ms 31.9ms 31.3 32.05KB 1.96
#> 2 yaml::read_ya… 1736712 315.9ms 329.9ms 3.03 4.82MB 1.52
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::read_… 3473416 61.51ms 64.59ms 14.7 64.05KB 0
#> 2 yaml::read_ya… 3473416 1.56s 1.56s 0.640 9.65MB 0.640
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 yaml12::rea… 6946824 128.09ms 131.56ms 7.47 128KB 1.87
#> 2 yaml::read_… 6946824 6.43s 6.43s 0.155 19.3MB 0.311
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression file_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <dbl> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::read_… 13893640 257.7ms 262.7ms 3.81 256KB 1.90
#> 2 yaml::read_ya… 13893640 26.6s 26.6s 0.0376 38.6MB 0.263
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>
Write performance
objs <- lapply(2^(1:15), function(n) rep(list(mixed_node), n))
write_results <- lapply(objs, function(obj) {
result <- bench::mark(
yaml12::write_yaml(obj, nullfile()),
yaml::write_yaml(obj, nullfile()),
check = FALSE
)
result$obj_size <- object.size(obj)
result
})
#> Warning: Some expressions had a GC in every iteration; so filtering is
#> disabled.Write results summary:
write_results_df <- bind_rows(write_results) |>
mutate(expression = as.factor(sapply(expression, deparse1)))
write_results_df |>
ggplot(aes(x = obj_size, y = median, color = expression)) +
labs(
y = "Median Run Time",
x = "Object Size",
title = "Write Performance"
) +
geom_point() +
geom_smooth(linewidth = .5) +
scale_x_log10(labels = scales::label_bytes()) +
scale_y_continuous(trans = bench::bench_time_trans())
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Write results (detailed)
invisible(lapply(write_results, \(result) {
print(plot(result) + ggplot2::ggtitle(scales::label_bytes()(as.numeric(result$obj_size[1]))))
print(summary(result, filter_gc = FALSE))
}))
#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wr… 3360 by… 14.2µs 17.8µs 52249. 3KB 0 10000
#> 2 yaml::writ… 3360 by… 34.7µs 43.3µs 21601. 33.7KB 4.32 10000
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wr… 6672 by… 18µs 22.4µs 41831. 0B 0 10000
#> 2 yaml::writ… 6672 by… 46.7µs 56.5µs 16466. 1.56KB 4.00 8228
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wr… 13296 b… 26.1µs 31.1µs 30600. 0B 0 10000
#> 2 yaml::writ… 13296 b… 68.4µs 82.2µs 11559. 3.07KB 4.00 5777
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <objct_> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::write_… 26544 b… 42.4µs 51.1µs 18691. 0B 0
#> 2 yaml::write_ya… 26544 b… 115.5µs 137.2µs 6945. 6.09KB 4.00
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <objct_> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::write_… 53040 b… 73.3µs 87.7µs 11055. 0B 0
#> 2 yaml::write_ya… 53040 b… 208.4µs 245.2µs 3896. 12.1KB 6.00
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wri… 106032 … 139µs 163µs 5931. 0B 0 2966
#> 2 yaml::write… 106032 … 397µs 455µs 2125. 24.2KB 4.00 1063
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wri… 212016 … 273µs 307µs 3140. 0B 0 1570
#> 2 yaml::write… 212016 … 778µs 884µs 1107. 48.3KB 4.00 554
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <objct_> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 yaml12::writ… 423984 … 543.95µs 609.67µs 1598. 0B 0
#> 2 yaml::write_… 423984 … 1.62ms 1.76ms 555. 96.6KB 3.99
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wr… 847920 … 1.1ms 1.2ms 819. 0B 0 410
#> 2 yaml::writ… 847920 … 3.27ms 3.53ms 269. 193KB 3.98 135
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wr… 1695792… 2.22ms 2.42ms 410. 0B 0 205
#> 2 yaml::writ… 1695792… 6.48ms 7.03ms 140. 386KB 3.99 70
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <objct_> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::write_… 3391536… 4.51ms 4.94ms 201. 0B 0
#> 2 yaml::write_ya… 3391536… 13.33ms 13.93ms 70.6 772KB 3.92
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <objct_> <bch:t> <bch:t> <dbl> <bch:byt> <dbl>
#> 1 yaml12::write_… 6783024… 9.42ms 9.94ms 99.9 0B 0
#> 2 yaml::write_ya… 6783024… 26.81ms 27.91ms 35.4 1.51MB 3.93
#> # ℹ 7 more variables: n_itr <int>, n_gc <dbl>, total_time <bch:tm>,
#> # result <list>, memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::wr… 1356600… 19.3ms 20ms 49.4 0B 0 25
#> 2 yaml::writ… 1356600… 55.7ms 56.4ms 17.4 3.02MB 5.81 9
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:t> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::w… 2713195… 39.8ms 41ms 24.4 0B 0 13
#> 2 yaml::wri… 2713195… 110.5ms 113ms 8.76 6.03MB 3.50 5
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>

#> # A tibble: 2 × 14
#> expression obj_size min median `itr/sec` mem_alloc `gc/sec` n_itr
#> <bch:expr> <objct_> <bch:t> <bch:> <dbl> <bch:byt> <dbl> <int>
#> 1 yaml12::w… 5426385… 79.5ms 81ms 12.4 0B 0 7
#> 2 yaml::wri… 5426385… 225ms 227ms 4.41 12.1MB 4.41 3
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> # memory <list>, time <list>, gc <list>
