suppressPackageStartupMessages({library(dplyr)library(tidyr)library(stringr)library(readr)library(readxl)library(purrr)library(stringdist)library(tibble)})# Absolute paths — qmd-location-independent (Quarto sets cwd to the qmd folder by default)PROJECT_ROOT <-"/mnt/d/OneDrive - Yayasan Transformasi Kesejahteraan Bangsa/1. Project Center/ARC8. Global Surgery Institute/8.1 Workforce"DATA_DIR <-file.path(PROJECT_ROOT, "data")OUTPUT_DIR <-file.path(PROJECT_ROOT, "output")if (!dir.exists(OUTPUT_DIR)) dir.create(OUTPUT_DIR, recursive =TRUE)
This document builds the canonical workforce dataset for Indonesia surgical care. HC and SIP are carried as parallel columns from load to final output — never collapsed.
1 Load raw data
1.1 SIRS
Code
hosp_list <-read.csv(file.path(DATA_DIR, "hospitals_national.csv"))bed <-read.csv(file.path(DATA_DIR, "hospitals_beds.csv"))services <-read.csv(file.path(DATA_DIR, "hospitals_services.csv"))staff <-read.csv(file.path(DATA_DIR, "hospitals_staff.csv"))# Hospitals that are in the SIRS hospital registry but submitted no bed/service/staff# data. These are retained for registry completeness but flagged as ineligible for the# SAO analysis because workforce counts cannot be computed.sirs_errors_path <-file.path(DATA_DIR, "hospitals_errors.csv")if (file.exists(sirs_errors_path)) { sirs_empty <-read.csv(sirs_errors_path, colClasses ="character") %>%mutate(hospital_id =as.integer(hospital_id)) %>%select(hospital_id, hospital_name_empty = hospital_name, sirs_error = error)} else { sirs_empty <- tibble::tibble(hospital_id =integer(0),hospital_name_empty =character(0),sirs_error =character(0))}cat("SIRS rows — hospitals:", nrow(hosp_list)," | beds:", nrow(bed)," | services:", nrow(services)," | staff:", nrow(staff)," | empty (no SIRS data):", nrow(sirs_empty), "\n")
Loaded live from bpjs_data.reguler.member via Postgres. Weighted by PSTV15 (bobot), filtered to member_status IN ('Active','Inactive') (excludes deceased), member_year = '2024'. Kab codes reconciled against the BPS shapefile.
35 BPJS kab codes use pre-2022 legacy values (mostly Papua splits) that don’t map to current BPS names without an authoritative Kemendagri lookup. Those ~7M pop are documented as gaps rather than guessed.
Code
suppressPackageStartupMessages({library(DBI); library(RPostgres); library(sf)})con <-dbConnect(Postgres(),host ="localhost",port =5432,dbname ="bpjs_data",user ="julianbs",password ="123123")denom_raw <-dbGetQuery(con, " SELECT\"PSTV10\"::int AS kab_code, COUNT(*) AS n_sample, SUM(\"PSTV15\") AS pop FROM reguler.member WHERE member_year = '2024' AND member_status IN ('Active','Inactive') AND \"PSTV10\" IS NOT NULL GROUP BY 1;")cat("BPJS kab rows:", nrow(denom_raw)," | weighted pop (M):", round(sum(denom_raw$pop) /1e6, 1), "\n")
BPJS kab rows: 515 | weighted pop (M): 291.6
2 Prepare SIRS
2.1 Bed aggregation
Bellwether bed classes: Kelas I, Kelas II, Kelas III, KRIS JKN.
# --- Generic name cleaning ---------------------------------------------------norm_region <-function(x) { x |>as.character() |>str_to_upper() |>str_replace_all("[^A-Z0-9]", " ") |>str_replace_all("\\s+", " ") |>str_trim()}# --- Facility name normalization ---------------------------------------------norm_faskes <-function(x) { x |>str_to_upper() |>str_replace_all("[^A-Z0-9 ]", " ") |>str_squish() |>str_replace("^RUMAH SAKIT UMUM DAERAH ", "RSUD ") |>str_replace("^RS UMUM DAERAH ", "RSUD ") |>str_replace("^RUMAH SAKIT UMUM PUSAT ", "RSUP ") |>str_replace("^RS UMUM PUSAT ", "RSUP ") |>str_replace("^RUMAH SAKIT UMUM ", "RSU ") |>str_replace("^RUMAH SAKIT ", "RS ") |>str_replace("^RS U ", "RSU ") |>str_replace_all("\\bDRS\\b", " ") |>str_replace_all("\\bDR\\b", " ") |>str_replace_all("\\bH\\b", " ") |>str_replace_all("\\bHJ\\b", " ") |>str_replace_all("\\bHJH\\b", " ") |>str_replace_all("\\bPROF\\b", " ") |>str_replace_all("\\bPROFESOR\\b", " ") |>str_replace_all("\\bIR\\b", " ") |>str_squish()}# --- Province normalization --------------------------------------------------fix_province <-function(x) { x <-str_to_upper(str_squish(x)) x[x =="JAKARTA"] <-"DKI JAKARTA" x[x =="BANGKA BELITUNG"] <-"KEPULAUAN BANGKA BELITUNG" x[x =="KEP BANGKA BELITUNG"] <-"KEPULAUAN BANGKA BELITUNG" x[x =="KEP. BANGKA BELITUNG"] <-"KEPULAUAN BANGKA BELITUNG" x[x =="YOGYAKARTA"] <-"DI YOGYAKARTA" x[x =="D.I. YOGYAKARTA"] <-"DI YOGYAKARTA" x[x =="DAERAH ISTIMEWA YOGYAKARTA"] <-"DI YOGYAKARTA"# Collapse post-2022 Papua splits back to pre-split for SIRS/DREAMS compat x[x =="PAPUA BARAT DAYA"] <-"PAPUA BARAT" x[x =="PAPUA SELATAN"] <-"PAPUA" x[x =="PAPUA TENGAH"] <-"PAPUA" x[x =="PAPUA PEGUNUNGAN"] <-"PAPUA" x}# --- Kabupaten alias table ---------------------------------------------------# Canonical form = AUTHORITATIVE Kemendagri Permendagri 137/2017 name.# Variants come from SIRS and DREAMS free-text regency strings and must map# to the exact `kab_name` that appears in auth_kab_lookup.rds.## Only entries where SIRS/DREAMS differ from auth belong here. If a source# string already normalizes to the auth name, NO alias is needed.kab_aliases <- tibble::tribble(~variant, ~canonical,# DKI: SIRS uses 'Kota Jakarta X', auth is 'KOTA JAKARTA X' — already matches.# No alias needed for DKI.# Name variants where SIRS/DREAMS differ from auth form:"KOTA PADANG SIDEMPUAN", "KOTA PADANGSIDIMPUAN","KOTA DUMAI", "KOTA D U M A I","SIAK", "S I A K","KOTA BATAM", "KOTA B A T A M","BANYUASIN", "BANYU ASIN","MUKO MUKO", "MUKOMUKO","TULANG BAWANG", "TULANGBAWANG","KARANGASEM", "KARANG ASEM","KEPULAUAN SIAU TAGULANDANG BIARO", "SIAU TAGULANDANG BIARO","KEP SIAU TAGULANDANG BIARO", "SIAU TAGULANDANG BIARO","PANGKAJENE KEPULAUAN", "PANGKAJENE DAN KEPULAUAN","KOTA PARE PARE", "KOTA PAREPARE","KOTA BAU BAU", "KOTA BAUBAU","PASANGKAYU", "MAMUJU UTARA","KEPULAUAN TANIMBAR", "MALUKU TENGGARA BARAT",# Hyphen vs space variants"TOLI TOLI", "TOLI-TOLI","TOJO UNA UNA", "TOJO UNA-UNA",# Other common legacy variants from Sumut Lubuklinggau which isn't a# name issue but a frequent typo we've seen"KOTA LUBUK LINGGAU", "KOTA LUBUKLINGGAU")# --- Canonicalize any kab name via the alias table ---------------------------canonicalize_kab <-function(x, alias_tbl = kab_aliases) { x_norm <-norm_region(x) idx <-match(x_norm, norm_region(alias_tbl$variant))ifelse(is.na(idx), x_norm, norm_region(alias_tbl$canonical[idx]))}fix_kab <-function(x) canonicalize_kab(x)
4.2 4-way crosswalk: SIRS × DREAMS × SHP × BPJS
Authoritative kab names from Kemendagri Permendagri 137/2017 (via auth_kab_lookup.rds). 514 kabs, one row each, keyed on kab_name (canonical). Every data source joins via name (not code), because the BPS shapefile uses pre-revision codes for several kabs (notably Kaltim’s 2012 Kaltara split) which would otherwise cause silent population misassignment.
Code
# Load authoritative kab lookup (514 rows: bps_code, kab_name, prov_name, ...)auth_kab <-readRDS(file.path(OUTPUT_DIR, "auth_kab_lookup.rds"))auth_to_shp_alias <-readRDS(file.path(OUTPUT_DIR, "auth_to_shp_alias.rds"))# Attach shapefile kab_code to each authoritative row via name (direct or alias).suppressPackageStartupMessages(library(sf))SHP_DIR <-"/mnt/d/OneDrive - Yayasan Transformasi Kesejahteraan Bangsa/1. Project Center/File SHP"shp_raw <-st_read(file.path(SHP_DIR, "kab_kota", "kab.shp"), quiet =TRUE) |>st_drop_geometry()shp_lookup <- shp_raw |>transmute(shp_kab_code =as.integer(KODE_KAB_S),shp_prov_code =as.integer(KODE_PROP_),shp_kab_name =toupper(as.character(NAMA_KAB)),shp_prov_name =toupper(as.character(NAMA_PROP)) )# Canonical base: every row keyed on kab_name (authoritative, unique)canonical <- auth_kab |>left_join(auth_to_shp_alias |>select(auth_name, shp_name),by =c("kab_name"="auth_name")) |>mutate(shp_name =coalesce(shp_name, kab_name)) |>left_join(shp_lookup, by =c("shp_name"="shp_kab_name")) |>transmute(# Authoritative identitybps_code = bps_code,kab_name_canonical = kab_name,# prov_name_canonical collapses Papua post-2022 splits back to pre-split names# for pipeline-wide consistency with fix_province().# prov_name_new preserves the post-2022 split names (for analyses that need them).prov_name_canonical=fix_province(prov_name),prov_name_new = prov_name,prov_code_legacy = prov_code_legacy,prov_name_legacy =toupper(prov_name_legacy),# Shapefile linkage (for geometry joins downstream) shp_kab_code, shp_prov_code,shp_kab_name = shp_name, shp_prov_name,# Normalized key for matching hospital/DREAMS stringskey_norm =norm_region(kab_name) )stopifnot(nrow(canonical) ==514, sum(is.na(canonical$shp_kab_code)) ==0)cat("Canonical base rows:", nrow(canonical), "\n")
Canonical base rows: 514
Code
# Match SIRS regency strings (already canonicalized via fix_kab/fix_province)# to authoritative names by normalized form.sirs_map <- hosp_list |>distinct(sirs_province_raw = province, sirs_regency_raw = regency) |>mutate(key_norm =canonicalize_kab(sirs_regency_raw)) |>left_join(canonical |>select(bps_code, key_norm_canon = key_norm),by =c("key_norm"="key_norm_canon")) |>distinct(bps_code, sirs_province_raw, sirs_regency_raw) |>filter(!is.na(bps_code))cat("SIRS matched to canonical:", nrow(sirs_map), "\n")
# BPJS kab codes (PSTV10) are identical to authoritative Permendagri codes (verified 514/514).# So the BPJS→canonical join is a clean merge on bps_code. No code remapping needed.if (!exists("con") ||!DBI::dbIsValid(con)) { con <-dbConnect(Postgres(), host ="localhost", port =5432,dbname ="bpjs_data", user ="julianbs", password ="123123" )}bpjs_raw_codes <-dbGetQuery(con, " SELECT DISTINCT\"PSTV09\"::int AS pstv09,\"PSTV09_NEW\"::int AS pstv09_new,\"PSTV10\"::int AS pstv10 FROM reguler.member WHERE \"PSTV10\" IS NOT NULL AND \"PSTV10\" != 9999 AND member_year = '2024';")bpjs_map <- bpjs_raw_codes |>inner_join(canonical |>select(bps_code), by =c("pstv10"="bps_code")) |>rename(bpjs_pstv09 = pstv09, bpjs_pstv09_new = pstv09_new, bpjs_pstv10 = pstv10)cat("BPJS matched to canonical:", nrow(bpjs_map), "/", nrow(bpjs_raw_codes), "\n")
# A tibble: 0 × 2
# ℹ 2 variables: province_key <chr>, n <int>
Code
# Retro-attach island to the saved crosswalk (island_lookup only defined at this point)kab_crosswalk <- kab_crosswalk %>%left_join(island_lookup, by =c("prov_name_canonical"="province_key")) %>%relocate(island, .after = prov_name_canonical)saveRDS(kab_crosswalk, file.path(OUTPUT_DIR, "kab_crosswalk.rds"))cat("Re-saved kab_crosswalk.rds with island column\n")
Re-saved kab_crosswalk.rds with island column
6 Build district-level summary
6.1 Eligibility flag
Code
# Attach sirs_empty flag from hosp_sirs so it carries through the joined workforce datasethosp_workforce <- hosp_workforce %>%left_join(hosp_sirs %>%select(hospital_id, sirs_empty, sirs_error),by =c("matched_hospital_id"="hospital_id")) %>%mutate(sirs_empty =coalesce(sirs_empty, FALSE),hosp_eligible =case_when( sirs_empty ~0L, # no SIRS data submitted!is.na(class) & bed_total >0~1L, match_type =="sirs_only"&!is.na(class) & bed_total >0~1L,TRUE~0L ) )cat("Hospitals flagged sirs_empty (excluded from SAO):",sum(hosp_workforce$sirs_empty, na.rm =TRUE), "\n")
Hospitals flagged sirs_empty (excluded from SAO): 13