vignettes/drake_continuous.Rmd
drake_continuous.Rmd
First we define packages:
then ‘swissmunicipalities’ example data (with only 2 domains):
data(swissmunicipalities)
swissmun <- swissmunicipalities[swissmunicipalities$REG < 3,
c("REG","COM","Nom","HApoly",
"Surfacesbois","Surfacescult",
"Airbat","POPTOT")]
head(swissmun)
#> REG COM Nom HApoly Surfacesbois Surfacescult Airbat POPTOT
#> 2 1 6621 Geneve 1593 67 31 773 177964
#> 4 2 351 Bern 5162 1726 1041 1070 128634
#> 5 1 5586 Lausanne 4136 1635 714 856 124914
#> 9 2 371 Biel (BE) 2123 976 196 463 48655
#> 10 2 942 Thun 2158 425 694 523 40377
#> 11 2 355 Koniz 5099 1567 2621 515 37782
Finally, the plan:
plan <- drake_plan(
# Definition of the sampling frame
frame = buildFrameDF(
df = swissmun,
id = "COM",
domainvalue= "REG",
X = c("HApoly","POPTOT"),
Y =c("Surfacesbois", "Airbat")),
# Definition of precision constraints
cv = as.data.frame(list(
DOM = rep("DOM1",length(unique(swissmun$REG))),
CV1 = rep(0.10,length(unique(swissmun$REG))),
CV2 = rep(0.10,length(unique(swissmun$REG))),
domainvalue= c(1:length(unique(swissmun$REG))))),
# Solution with kmeans
kmean = KmeansSolution2(frame=frame,
errors=cv,
maxclusters=5),
# Determination of number of strata for each domain
nstrat = tapply(kmean$suggestions,
kmean$domainvalue,
FUN=function(x) length(unique(x))),
# Initial solutions
sugg = prepareSuggestion(kmean=kmean,
frame=frame,
nstrat=nstrat),
# Optimisation
solution = optimStrata(method="continuous",
framesamp=frame,
errors= cv,
suggestions = sugg,
nStrat=nstrat,
iter = 25,
pops= 10),
# Optimal strata
strata_structure = summaryStrata(solution$framenew,
solution$aggr_strata,
progress=FALSE),
# Expected CVs with optimal strata
expected_CV1 = expected_CV(solution$aggr_strata),
# Adjust allocation with affordable sample size
sample_size = 150,
adjustedStrata = adjustSize(size=sample_size,strata=solution$aggr_strata,cens=NULL),
# New expected CVs
expected_CV2 = expected_CV(adjustedStrata),
# Evaluation by simulation
eval = evalSolution(frame = solution$framenew,
outstrata = adjustedStrata,
nsampl = 500,
progress = FALSE),
# Final selection of the sample
sample = selectSample(solution$framenew,adjustedStrata)
)
We have just defined this plan:
print(plan)
#> # A tibble: 13 × 2
#> target command
#> <chr> <expr_lst>
#> 1 frame buildFrameDF(df = swissmun, id = "COM", domainvalue = "REG", X = c("HApoly", "POPTOT"), Y = c("Surfacesbois", "Airbat"))
#> 2 cv as.data.frame(list(DOM = rep("DOM1", length(unique(swissmun$REG))), CV1 = rep(0.1, length(unique(swissmun$REG))), CV2 = rep(0.1, length(unique(swissmun$REG))), domainvalue = c(1:length(unique(swissmun$REG)))))
#> 3 kmean KmeansSolution2(frame = frame, errors = cv, maxclusters = 5)
#> 4 nstrat tapply(kmean$suggestions, kmean$domainvalue, FUN = function(x) length(unique(x)))
#> 5 sugg prepareSuggestion(kmean = kmean, frame = frame, nstrat = nstrat)
#> 6 solution optimStrata(method = "continuous", framesamp = frame, errors = cv, suggestions = sugg, nStrat = nstrat, iter = 25, pops = 10)
#> 7 strata_structure summaryStrata(solution$framenew, solution$aggr_strata, progress = FALSE)
#> 8 expected_CV1 expected_CV(solution$aggr_strata)
#> 9 sample_size 150
#> 10 adjustedStrata adjustSize(size = sample_size, strata = solution$aggr_strata, cens = NULL)
#> 11 expected_CV2 expected_CV(adjustedStrata)
#> 12 eval evalSolution(frame = solution$framenew, outstrata = adjustedStrata, nsampl = 500, progress = FALSE)
#> 13 sample selectSample(solution$framenew, adjustedStrata)
visualised in this way:
vis_drake_graph(plan, ncol_legend = NULL,
main="Optimization with 'continuous' method")
and now we execute:
make(plan)
#> ✔ All targets are already up to date.
First of all, the total size of the optimized sample:
Then, the structure of the optimized strata:
loadd(strata_structure)
strata_structure
#> Domain Stratum Population Allocation SamplingRate Lower_X1 Upper_X1 Lower_X2 Upper_X2
#> 1 1 1 346 10 0.030101 32 935 27 2283
#> 2 1 2 125 13 0.102664 185 3031 65 3247
#> 3 1 3 51 5 0.099717 129 2984 3312 29559
#> 4 1 4 64 19 0.291158 3144 24269 78 22454
#> 5 1 5 3 2 0.666667 1593 28225 6536 177964
#> 6 2 1 468 7 0.015805 34 823 30 1322
#> 7 2 2 298 11 0.037152 168 1782 79 4916
#> 8 2 3 93 7 0.069973 152 4182 128 9609
#> 9 2 4 54 13 0.249651 421 20079 272 128634
The expected CVs given the optimized strata:
loadd(expected_CV1)
expected_CV1
#> cv(Y1) cv(Y2)
#> DOM1 0.099 0.099
#> DOM2 0.100 0.100
and after the adjustment of the allocation in the strata based on the affordable sample size:
loadd(expected_CV2)
expected_CV2
#> cv(Y1) cv(Y2)
#> DOM1 0.070 0.071
#> DOM2 0.074 0.072
The results of simulation with the adjusted allocation:
loadd(eval)
eval$coeff_var
#> CV1 CV2 dom
#> 1 0.0670 0.0743 DOM1
#> 2 0.0731 0.0733 DOM2
eval$rel_bias
#> y1 y2 dom
#> 1 -0.0041 0.0026 DOM1
#> 2 -0.0002 -0.0014 DOM2
Finally, the selection of the sample from the adjusted strata:
loadd(sample)
head(sample)
#> DOMAINVALUE STRATO ID X1 X2 Y1 Y2 LABEL WEIGHTS FPC
#> 1 1 1 5500 237 193 24 9 1 19.22222 0.05202312
#> 2 1 1 5533 500 543 184 17 1 19.22222 0.05202312
#> 3 1 1 6178 301 27 124 3 1 19.22222 0.05202312
#> 4 1 1 5935 86 50 13 2 1 19.22222 0.05202312
#> 5 1 1 5514 679 917 131 27 1 19.22222 0.05202312
#> 6 1 1 5905 688 299 176 12 1 19.22222 0.05202312