
When you create many data frames and reports within a single project, questions may arise such as “Which data frame is occupying the most capacity?” or “Which data frame does that dashboard reference, and what is its size?”
In this note, we will explain how to use R scripts to retrieve this information as a data frame.
The two R scripts used in this note are as follows:
get_dataframe_sizes() is a function that calculates the
disk usage of all data frames in the project and returns a data frame
sorted by total size in descending order.
get_report_dataframe() is a function that returns a list
of which data frames are referenced by the dashboards and notes within
the project. You can check the size information of the data frames
associated with each report type.
Both functions can be called without any arguments.
First, register the R script file code as a script in your project.
Select “Scripts” from the “Scripts” menu at the top of the project screen.

Once the script editor opens, set an arbitrary script name (e.g.,
size_check).
Next, paste the following content into the editor:
library(jsonlite)
library(dplyr)
.load_json <- function(path) {
tryCatch(
jsonlite::fromJSON(path, simplifyVector = FALSE),
error = function(e) NULL
)
}
.load_dataframes_meta <- function(project_dir) {
df_dir <- file.path(project_dir, "dataframes")
files <- list.files(df_dir, pattern = "\\.json$", full.names = TRUE)
meta <- list()
for (f in files) {
data <- .load_json(f)
if (is.null(data) || is.null(data[["name"]])) next
belongs_to <- data[["belongsTo"]][["id"]]
display_name <- data[["displayName"]]
if (is.null(display_name) || !nzchar(display_name)) display_name <- data[["name"]]
meta[[data[["name"]]]] <- list(
display_name = display_name,
belongs_to = belongs_to
)
}
meta
}
.build_step_to_root <- function(meta) {
cache <- new.env(hash = TRUE, parent = emptyenv())
resolve_root <- function(name, visited = character(0)) {
if (exists(name, envir = cache, inherits = FALSE)) {
return(get(name, envir = cache, inherits = FALSE))
}
if (name %in% visited) return(name)
df <- meta[[name]]
if (is.null(df) || is.null(df[["belongs_to"]])) {
assign(name, name, envir = cache)
return(name)
}
visited <- c(visited, name)
root <- resolve_root(df[["belongs_to"]], visited)
assign(name, root, envir = cache)
root
}
for (name in names(meta)) resolve_root(name)
as.list(cache)
}
.calculate_sizes <- function(project_dir, step_to_root, meta) {
roots <- names(meta)[sapply(names(meta), function(n) is.null(meta[[n]][["belongs_to"]]))]
sizes <- data.frame(
dataframe_name = roots,
display_name = sapply(roots, function(n) meta[[n]][["display_name"]]),
rdata_bytes = 0L,
data_bytes = 0L,
stringsAsFactors = FALSE
)
rdata_dir <- file.path(project_dir, "rdata")
for (f in list.files(rdata_dir, pattern = "\\.parquet$", full.names = TRUE)) {
stem <- tools::file_path_sans_ext(basename(f))
root <- step_to_root[[stem]]
if (!is.null(root) && root %in% roots) {
idx <- which(sizes$dataframe_name == root)
sizes$rdata_bytes[idx] <- sizes$rdata_bytes[idx] + file.info(f)$size
}
}
data_dir <- file.path(project_dir, "data")
if (dir.exists(data_dir)) {
all_files <- list.files(data_dir, full.names = TRUE)
all_files <- all_files[!file.info(all_files)$isdir]
for (f in all_files) {
stem <- tools::file_path_sans_ext(basename(f))
root <- step_to_root[[stem]]
if (!is.null(root) && root %in% roots) {
idx <- which(sizes$dataframe_name == root)
sizes$data_bytes[idx] <- sizes$data_bytes[idx] + file.info(f)$size
}
}
}
sizes$rdata_size_mb <- round(sizes$rdata_bytes / 1024^2, 2)
sizes$data_size_mb <- round(sizes$data_bytes / 1024^2, 2)
sizes$total_size_mb <- round(sizes$rdata_size_mb + sizes$data_size_mb, 2)
sizes
}
get_dataframe_sizes <- function(project_dir = dirname(getwd())) {
project_dir <- normalizePath(project_dir, mustWork = TRUE)
meta <- .load_dataframes_meta(project_dir)
step_to_root <- .build_step_to_root(meta)
sizes <- .calculate_sizes(project_dir, step_to_root, meta)
sizes |>
select(dataframe_name, display_name, rdata_size_mb, data_size_mb, total_size_mb) |>
arrange(desc(total_size_mb))
}
get_report_dataframe <- function(project_dir = dirname(getwd())) {
project_dir <- normalizePath(project_dir, mustWork = TRUE)
meta <- .load_dataframes_meta(project_dir)
step_to_root <- .build_step_to_root(meta)
sizes <- .calculate_sizes(project_dir, step_to_root, meta)
markdowns_dir <- file.path(project_dir, "markdowns")
layout_dir <- file.path(project_dir, "layout")
rows <- list()
for (md_file in sort(list.files(markdowns_dir, pattern = "\\.json$", full.names = TRUE))) {
md <- .load_json(md_file)
if (is.null(md)) next
report_name <- if (!is.null(md[["name"]])) md[["name"]] else tools::file_path_sans_ext(basename(md_file))
report_display <- md[["displayName"]]
if (is.null(report_display) || !nzchar(report_display)) report_display <- report_name
report_type <- if (isTRUE(md[["_isDashboard"]])) "dashboard" else "note"
viz_ids <- character(0)
if (!is.null(md[["dependencies"]])) {
for (dep in md[["dependencies"]]) {
if (!is.null(dep[["type"]]) && dep[["type"]] == "viz" && !is.null(dep[["id"]])) {
viz_ids <- c(viz_ids, dep[["id"]])
}
}
}
df_names <- character(0)
for (viz_id in viz_ids) {
viz_file <- file.path(layout_dir, paste0(viz_id, ".json"))
if (!file.exists(viz_file)) next
viz <- .load_json(viz_file)
if (is.null(viz) || is.null(viz[["dependencies"]])) next
for (dep in viz[["dependencies"]]) {
if (is.character(dep)) df_names <- c(df_names, dep)
}
}
df_names <- sort(unique(df_names))
for (df_name in df_names) {
size_row <- sizes[sizes$dataframe_name == df_name, ]
rows <- c(rows, list(data.frame(
report_name = report_name,
report_display_name = report_display,
report_type = report_type,
dataframe_name = df_name,
display_name = if (nrow(size_row) > 0) size_row$display_name else df_name,
rdata_size_mb = if (nrow(size_row) > 0) size_row$rdata_size_mb else 0.0,
data_size_mb = if (nrow(size_row) > 0) size_row$data_size_mb else 0.0,
total_size_mb = if (nrow(size_row) > 0) size_row$total_size_mb else 0.0,
stringsAsFactors = FALSE
)))
}
}
if (length(rows) == 0) {
return(data.frame(
report_name = character(0), report_display_name = character(0),
report_type = character(0),
dataframe_name = character(0), display_name = character(0),
rdata_size_mb = numeric(0), data_size_mb = numeric(0), total_size_mb = numeric(0)
))
}
bind_rows(rows)
}
After pasting the code, click the “Run” button. If it completes without errors, the script registration is finished.

Click the “Add Data Frame” button and select “R Script” as the data source.

When the R script editor opens, write the following to execute the script you just registered:
get_dataframe_sizes()

Clicking the “Run” button will return a data frame with the following columns:
The results are sorted by total_size_mb in descending order.
By clicking the Run button, you can check the information for all data frames in the project within a data frame format.

Similar to the get_dataframe_sizes function, you can use the get_report_dataframe function to see a list of which data frames each dashboard or note references, along with their sizes.
In the “R Script” data source editor, write the following:
get_report_dataframe()

Clicking the “Run” button will return a data frame with the following columns:
report_name: Internal name of the report
report_display_name: Name of the report as displayed in Exploratory
report_type: Type of report (“dashboard” or “note”)
dataframe_name: Internal name of the data frame the report depends on
display_name: Display name of that data frame
rdata_size_mb: Size of the Parquet file saved in the project’s rdata folder (MB)
data_size_mb: Size of the original data saved in the data folder (MB)
total_size_mb: The total size of both.
The results are sorted by total_size_mb in descending order.
