避ける別の考え方length(x) * length(ref)
検索:
ff = function(x, ref, tol = 1e-10)
{
sref = sort(ref)
i = findInterval(x, sref, all.inside = TRUE)
dif1 = abs(x - sref[i])
dif2 = abs(x - sref[i + 1])
dif = dif1 > dif2
dif1[dif] = dif2[dif]
dif1 <= tol
}
ff(c(1.0, 1+1e-13, 1.01, 2, 2+1e-9, 2-1e-11), c(1, 2, 3))
#[1] TRUE TRUE FALSE TRUE FALSE TRUE
と比較する:
set.seed(911)
X = sample(1e2, 5e5, TRUE) + (sample(c(1e-8, 1e-9, 1e-10, 1e-12, 1e-13), 5e5, TRUE) * sample(c(-1, 1), 5e5, TRUE))
REF = as.double(1:1e2)
all.equal(ff(X, REF), select_in(X, REF))
#[1] TRUE
tol = 1e-10 #set this for Pierre's function
microbenchmark::microbenchmark(select_in(X, REF), fselect_in(X, REF), X %~% REF, ff(X, REF), { round(X, 10); round(REF, 10) }, times = 35)
#Unit: milliseconds
# expr min lq median uq max neval
# select_in(X, REF) 1259.95876 1324.52371 1380.10492 1428.78677 1495.61810 35
# fselect_in(X, REF) 121.47241 123.72678 125.28932 128.56770 142.15676 35
# X %~% REF 2023.78159 2088.97226 2161.66973 2219.46164 2547.89849 35
# ff(X, REF) 67.35003 69.39804 71.20871 73.22626 94.04477 35
# { round(X, 10) round(REF, 10) } 96.20344 96.88344 99.10093 102.66328 117.75189 35
フランクのmatch
がfindInterval
よりも速く、そして確かに、round
で過ごしたほとんどの時間を持つ必要があります。
@Frank nope :)返信として投稿してください – baptiste
@フランクの素晴らしいアイデア! –