RcppとopenMPを使用した切捨て正規分布からの高速サンプリング

更新：RcppとopenMPを使用した切捨て正規分布からの高速サンプリング

Dirkの提案を実装しようとしました。コメント？私は現在JSMで忙しいですが、ギャラリーのRmdを編む前にいくつかのフィードバックを得たいと思います。 Armadilloから通常のRcppに戻ってきました。何の価値も加えていないからです。 R：が付いたスカラーバージョンはかなりいいです。 mean/sdが希望の出力長のベクトルではなく、スカラとして入力された場合は、描画回数としてパラメータnを設定する必要があります。

トランケートされた正規分布のサンプルを必要とするMCMCアプリケーションがたくさんあります。私はTNの既存の実装を構築し、それに並列計算を追加しました。

問題：

誰もが、さらに潜在的な速度の向上を参照してくださいしていますか？ ~~ベンチマークの最後のケースでは、rtruncnormが高速になることがあります。~~ Rcppの実装は、既存のパッケージよりも常に高速ですが、さらに改善することはできますか？
私は共有できない複雑なモデルの中で実行し、私のRセッションはクラッシュしました。しかし、私は体系的にそれを再現することはできないので、コードの別の部分であった可能性があります。誰かがテネシー州で働いている場合は、テストして教えてください。アップデート：私は更新されたコードに問題はありませんでしたが、私に教えてください。私は物事をまとめる方法

：私の知る限りでは、最速の実装がCRAN上ではなく、ソースコードはOSU statをダウンロードすることができます。 msmとtruncormでの競合する実装は、私のベンチマークでは低速でした。そのトリックは、プロポーション分布を効率的に調整することです。ここで、指数関数は切り捨てられたNormalの尾部に対してうまく機能します。私はChrisのコードを取って、 "Rcpp'ed"して、それにいくつかのopenMPスパイスを追加しました。動的なスケジュールはここでは最適です。サンプリングには境界に応じてより多くの時間またはより短い時間がかかります。私が嫌なことを見つけたことの一つ：二重で作業したいとき、NumericVectorタイプに基づいた統計分布がたくさんあります。私はちょうどその周りに自分の道をコーディングした

HERESにRcppコード：

libs=c("truncnorm","msm","inline","Rcpp","RcppArmadillo","rbenchmark") 
if(sum(!(libs %in% .packages(all.available = TRUE)))>0){ install.packages(libs[!(libs %in% .packages(all.available = TRUE))])} 
for(i in 1:length(libs)) {library(libs[i],character.only = TRUE,quietly=TRUE)} 


#needed for openMP parallel 
Sys.setenv("PKG_CXXFLAGS"="-fopenmp") 
Sys.setenv("PKG_LIBS"="-fopenmp") 

#no of cores for openMP version 
cores = 4 

#surce code from same dir 
Rcpp::sourceCpp('truncnorm.cpp') 


#sample size 
nn=1000000 


bb= 100 
aa=-100 
benchmark(rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),cores), rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),1),rtnorm(nn,rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn)),rtruncnorm(nn, a=aa, b=100, mean = 0, sd = 1) , order="relative", replications=3 )[,1:4] 

aa=0 
benchmark(rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),cores), rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),1),rtnorm(nn,rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn)),rtruncnorm(nn, a=aa, b=100, mean = 0, sd = 1) , order="relative", replications=3 )[,1:4] 

aa=2 
benchmark(rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),cores), rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),1),rtnorm(nn,rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn)),rtruncnorm(nn, a=aa, b=100, mean = 0, sd = 1) , order="relative", replications=3 )[,1:4] 

aa=50 
benchmark(rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),cores), rtnm(rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn),1),rtnorm(nn,rep(0,nn),rep(1,nn),rep(aa,nn),rep(100,nn)),rtruncnorm(nn, a=aa, b=100, mean = 0, sd = 1) , order="relative", replications=3 )[,1:4]

速度が上限/下限に依存するため、いくつかのベンチマークの実行が必要です：

#include <Rcpp.h> 
#include <omp.h> 


// norm_rs(a, b) 
// generates a sample from a N(0,1) RV restricted to be in the interval 
// (a,b) via rejection sampling. 
// ====================================================================== 

// [[Rcpp::export]] 

double norm_rs(double a, double b) 
{ 
    double x; 
    x = Rf_rnorm(0.0, 1.0); 
    while((x < a) || (x > b)) x = norm_rand(); 
    return x; 
} 

// half_norm_rs(a, b) 
// generates a sample from a N(0,1) RV restricted to the interval 
// (a,b) (with a > 0) using half normal rejection sampling. 
// ====================================================================== 

// [[Rcpp::export]] 

double half_norm_rs(double a, double b) 
{ 
    double x; 
    x = fabs(norm_rand()); 
    while((x<a) || (x>b)) x = fabs(norm_rand()); 
    return x; 
} 

// unif_rs(a, b) 
// generates a sample from a N(0,1) RV restricted to the interval 
// (a,b) using uniform rejection sampling. 
// ====================================================================== 

// [[Rcpp::export]] 

double unif_rs(double a, double b) 
{ 
    double xstar, logphixstar, x, logu; 

    // Find the argmax (b is always >= 0) 
    // This works because we want to sample from N(0,1) 
    if(a <= 0.0) xstar = 0.0; 
    else xstar = a; 
    logphixstar = R::dnorm(xstar, 0.0, 1.0, 1.0); 

    x = R::runif(a, b); 
    logu = log(R::runif(0.0, 1.0)); 
    while(logu > (R::dnorm(x, 0.0, 1.0,1.0) - logphixstar)) 
    { 
     x = R::runif(a, b); 
     logu = log(R::runif(0.0, 1.0)); 
    } 
    return x; 
} 

// exp_rs(a, b) 
// generates a sample from a N(0,1) RV restricted to the interval 
// (a,b) using exponential rejection sampling. 
// ====================================================================== 

// [[Rcpp::export]] 

double exp_rs(double a, double b) 
{ 
    double z, u, rate; 

// Rprintf("in exp_rs"); 
    rate = 1/a; 
//1/a 

    // Generate a proposal on (0, b-a) 
    z = R::rexp(rate); 
    while(z > (b-a)) z = R::rexp(rate); 
    u = R::runif(0.0, 1.0); 

    while(log(u) > (-0.5*z*z)) 
    { 
     z = R::rexp(rate); 
     while(z > (b-a)) z = R::rexp(rate); 
     u = R::runif(0.0,1.0); 
    } 
    return(z+a); 
} 




// rnorm_trunc(mu, sigma, lower, upper) 
// 
// generates one random normal RVs with mean 'mu' and standard 
// deviation 'sigma', truncated to the interval (lower,upper), where 
// lower can be -Inf and upper can be Inf. 
//====================================================================== 

// [[Rcpp::export]] 
double rnorm_trunc (double mu, double sigma, double lower, double upper) 
{ 
int change; 
double a, b; 
double logt1 = log(0.150), logt2 = log(2.18), t3 = 0.725; 
double z, tmp, lograt; 

change = 0; 
a = (lower - mu)/sigma; 
b = (upper - mu)/sigma; 

// First scenario 
if((a == R_NegInf) || (b == R_PosInf)) 
    { 
    if(a == R_NegInf) 
     { 
    change = 1; 
    a = -b; 
    b = R_PosInf; 
     } 

    // The two possibilities for this scenario 
    if(a <= 0.45) z = norm_rs(a, b); 
    else z = exp_rs(a, b); 
    if(change) z = -z; 
    } 
// Second scenario 
else if((a * b) <= 0.0) 
    { 
    // The two possibilities for this scenario 
    if((R::dnorm(a, 0.0, 1.0,1.0) <= logt1) || (R::dnorm(b, 0.0, 1.0, 1.0) <= logt1)) 
     { 
    z = norm_rs(a, b); 
     } 
    else z = unif_rs(a,b); 
    } 
// Third scenario 
else 
    { 
    if(b < 0) 
     { 
    tmp = b; b = -a; a = -tmp; change = 1; 
     } 

    lograt = R::dnorm(a, 0.0, 1.0, 1.0) - R::dnorm(b, 0.0, 1.0, 1.0); 
    if(lograt <= logt2) z = unif_rs(a,b); 
    else if((lograt > logt1) && (a < t3)) z = half_norm_rs(a,b); 
    else z = exp_rs(a,b); 
    if(change) z = -z; 
    } 
    double output; 
    output = sigma*z + mu; 
return (output); 
} 


// rtnm(mu, sigma, lower, upper, cores) 
// 
// generates one random normal RVs with mean 'mu' and standard 
// deviation 'sigma', truncated to the interval (lower,upper), where 
// lower can be -Inf and upper can be Inf. 
// mu, sigma, lower, upper are vectors, and vectorized calls of this function 
// speed up computation 
// cores is an intege, representing the number of cores to be used in parallel 
//====================================================================== 


// [[Rcpp::export]] 

Rcpp::NumericVector rtnm(Rcpp::NumericVector mus, Rcpp::NumericVector sigmas, Rcpp::NumericVector lower, Rcpp::NumericVector upper, int cores){ 
    omp_set_num_threads(cores); 
    int nobs = mus.size(); 
    Rcpp::NumericVector out(nobs); 
    double logt1 = log(0.150), logt2 = log(2.18), t3 = 0.725; 
    double a,b, z, tmp, lograt; 

    int change; 

    #pragma omp parallel for schedule(dynamic) 
    for(int i=0;i<nobs;i++) { 

    a = (lower(i) - mus(i))/sigmas(i); 
    b = (upper(i) - mus(i))/sigmas(i); 
    change=0; 
    // First scenario 
    if((a == R_NegInf) || (b == R_PosInf)) 
     { 
     if(a == R_NegInf) 
      { 
       change = 1; 
       a = -b; 
       b = R_PosInf; 
      } 

     // The two possibilities for this scenario 
     if(a <= 0.45) z = norm_rs(a, b); 
     else z = exp_rs(a, b); 
     if(change) z = -z; 
     } 
    // Second scenario 
    else if((a * b) <= 0.0) 
     { 
     // The two possibilities for this scenario 
     if((R::dnorm(a, 0.0, 1.0,1.0) <= logt1) || (R::dnorm(b, 0.0, 1.0, 1.0) <= logt1)) 
      { 
       z = norm_rs(a, b); 
      } 
     else z = unif_rs(a,b); 
     } 

    // Third scenario 
    else 
     { 
     if(b < 0) 
      { 
       tmp = b; b = -a; a = -tmp; change = 1; 
      } 

     lograt = R::dnorm(a, 0.0, 1.0, 1.0) - R::dnorm(b, 0.0, 1.0, 1.0); 
     if(lograt <= logt2) z = unif_rs(a,b); 
     else if((lograt > logt1) && (a < t3)) z = half_norm_rs(a,b); 
     else z = exp_rs(a,b); 
     if(change) z = -z; 
     } 
    out(i)=sigmas(i)*z + mus(i);   
    } 

return(out); 
}

そして、ここでは、ベンチマークです。別の例の場合におけるアルゴリズムキックの、異なる部分

出典

2013-07-29 Inferrator

なぜN. Chopine高速切断正規分布を実装しようとしていないhttp://link.springer.com/article/10.1007%2Fs11222-009-9168-1？ – dickoa

inv_sqrt_2piの余分な数字は無用です。あなたは、rでそれほど正確な浮動小数点数を得ることはできません。 'print（inv_sqrt_2pi、digits = 18）' [1] 0.398942280401432703 –

@dickoa興味深いことに、その論文は私を逃した。 – Inferrator

本当に速いのコメント：

あなたがRcppArmadillo.hを含める場合は、Rcpp.hを含める必要はありません - 実際には、あなたはいけないと私たちもテスト。その
rep(oneDraw, n)はnコールを行います。私はあなたを返すn個描く一度呼び出される関数を記述します - あなた自身を保存すると、それは速くなりますn-1の関数呼び出しのオーバーヘッド

たくさんのコメントはNumericVectorタイプに基づいています

私がdoubleと一緒に作業したいとき、はいくつかの誤解を明らかにするかもしれません：NumericVectorは、内部Rタイプのための私たちの便利なプロキシクラスです：コピーはありません。あなたはstd::vector<double>または好きな形を自由に使うことができます。
私は切り詰めた法線についてはほとんど分かりませんので、アルゴリズムの詳細についてはコメントできません。
一度それを練ったら、Rcpp Galleryの投稿を考えてください。

出典

2013-07-29 02:59:30

RcppとopenMPを使用した切捨て正規分布からの高速サンプリング

答えて

関連する問題