2016-04-30 8 views
0

レポートの要約データフレームを作成中です。私は手動でデータフレームを作成することができました。私は、結果の作成を簡素化する関数を作成しています。R関数のリストからベクトルを抽出する

手動プロセスは、私が探している結果

 filter question    answer  value 
    1  None  R     0 0.0000000 
    2  None  R     1 1.0000000 
    3  None  R     2 2.0000000 
    4  None  R     3 1.0000000 
    5  None  R     4 1.0000000 
    6  None  R     5 1.0000000 
    7  None  R     6 1.0000000 
    8  None  R     7 1.0000000 
    9  None  R     8 0.0000000 
    10 None  R     9 1.0000000 
    11 None  R     10 0.0000000 
    12 None  R    <NA> 1.0000000 
    13 None  R    sum 39.0000000 
    14 None  R    length 10.0000000 
    15 None  R    mean 4.3333333 
    16 None  R standard.deviation 2.6457513 
    17 None  R    var 7.0000000 
    18 None  R    median 4.0000000 
    19 None  R    min 1.0000000 
    20 None  R    max 9.0000000 
    21 None  R  quantile.0% 1.0000000 
    22 None  R  quantile.25% 2.0000000 
    23 None  R  quantile.50% 4.0000000 
    24 None  R  quantile.75% 6.0000000 
    25 None  R  quantile.100% 9.0000000 
    26 None  R    skew 0.3275692 
    27 None  R   kurtosis -1.5333333 
    28 None  S     0 0.0000000 
    29 None  S     1 0.0000000 
    30 None  S     2 0.0000000 
    31 None  S     3 1.0000000 
    32 None  S     4 0.0000000 
    33 None  S     5 1.0000000 
    34 None  S     6 1.0000000 
    35 None  S     7 3.0000000 
    36 None  S     8 1.0000000 
    37 None  S     9 1.0000000 
    38 None  S     10 0.0000000 
    39 None  S     11 0.0000000 
    40 None  S     12 0.0000000 
    41 None  S     13 0.0000000 
    42 None  S     14 0.0000000 
    43 None  S     15 0.0000000 
    44 None  S     16 0.0000000 
    45 None  S     17 0.0000000 
    46 None  S     18 0.0000000 
    47 None  S     19 0.0000000 
    48 None  S     20 0.0000000 
    49 None  S    <NA> 2.0000000 
    50 None  S    sum 52.0000000 
    51 None  S    length 10.0000000 
    52 None  S    mean 6.5000000 
    53 None  S standard.deviation 1.8516402 
    54 None  S    var 3.4285714 
    55 None  S    median 7.0000000 
    56 None  S    min 3.0000000 
    57 None  S    max 9.0000000 
    58 None  S  quantile.0% 3.0000000 
    59 None  S  quantile.25% 5.7500000 
    60 None  S  quantile.50% 7.0000000 
    61 None  S  quantile.75% 7.2500000 
    62 None  S  quantile.100% 9.0000000 
    63 None  S    skew -0.4252986 
    64 None  S   kurtosis -1.3028646 
    65 None  W     0 0.0000000 
    66 None  W     1 0.0000000 
    67 None  W     2 1.0000000 
    68 None  W     3 0.0000000 
    69 None  W     4 2.0000000 
    70 None  W     5 2.0000000 
    71 None  W     6 2.0000000 
    72 None  W     7 2.0000000 
    73 None  W     8 1.0000000 
    74 None  W     9 0.0000000 
    75 None  W     10 0.0000000 
    76 None  W     11 0.0000000 
    77 None  W     12 0.0000000 
    78 None  W     13 0.0000000 
    79 None  W     14 0.0000000 
    80 None  W     15 0.0000000 
    81 None  W     16 0.0000000 
    82 None  W     17 0.0000000 
    83 None  W     18 0.0000000 
    84 None  W     19 0.0000000 
    85 None  W     20 0.0000000 
    86 None  W     21 0.0000000 
    87 None  W     22 0.0000000 
    88 None  W     23 0.0000000 
    89 None  W     24 0.0000000 
    90 None  W     25 0.0000000 
    91 None  W     26 0.0000000 
    92 None  W     27 0.0000000 
    93 None  W     28 0.0000000 
    94 None  W     29 0.0000000 
    95 None  W     30 0.0000000 
    96 None  W    <NA> 0.0000000 
    97 None  W    sum 54.0000000 
    98 None  W    length 10.0000000 
    99 None  W    mean 5.4000000 
    100 None  W standard.deviation 1.7763883 
    101 None  W    var 3.1555556 
    102 None  W    median 5.5000000 
    103 None  W    min 2.0000000 
    104 None  W    max 8.0000000 
    105 None  W  quantile.0% 2.0000000 
    106 None  W  quantile.25% 4.2500000 
    107 None  W  quantile.50% 5.5000000 
    108 None  W  quantile.75% 6.7500000 
    109 None  W  quantile.100% 8.0000000 
    110 None  W    skew -0.3339582 
    111 None  W   kurtosis -0.9871315 

を生成

# create the summary function 
    summaryStatistics <- function(x,levels) { 
     xx <- na.omit(x) 
     c(table(factor(x, levels=levels), useNA='always', exclude=NULL), 
      sum=sum(xx), 
      length=length(x), 
      mean=mean(xx), 
      standard.deviation=sqrt(var(xx)), 
      var=(var(xx)), 
      median=median(xx), 
      min=min(xx), 
      max=max(xx), 
      quantile=quantile(xx), 
      skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) , 
      kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3 
     ) 
    } 

    # create the test data frame 
    Id <- c(1,2,3,4,5,6,7,8,9,10) 
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3) 
    ClassB <- c(2,1,1,3,3,2,1,1,3,3) 
    R <- c(1,2,3,NA,9,2,4,5,6,7) 
    S <- c(3,7,NA,9,5,8,7,NA,7,6) 
    W <- c(4,5,6,7,2,4,5,6,7,8) 

    df <- data.frame(Id,ClassA,ClassB,R,S,W) 

    ClassAAnswers <- c(1:5,NA) 
    ClassBAnswers <- c(1:5,NA) 

    RAnswers <- c(0:10,NA); 
    SAnswers <- c(0:20,NA); 
    WAnswers <- c(0:30,NA); 
    answers.list <- list(RAnswers,SAnswers,WAnswers); 

    RSW.df <- df[c('R','S','W')]; 

    # create the result 
    result <- setNames(
     nm=c('answer','question','value'), 
     as.data.frame(
      as.table(
       simplify2array(
        lapply(
         df[c('R')], 
         summaryStatistics, 
         RAnswers 
        ) 
       ) 
      ) 
     ) 
    ) 

    result <- rbind(result, 
     setNames(
       nm=c('answer','question','value'), 
       as.data.frame(
        as.table(
         simplify2array(
          lapply(
           df[c('S')], 
           summaryStatistics, 
           SAnswers 
          ) 
         ) 
        ) 
       ) 
     ) 
    ) 

    result <- rbind(result, 
     setNames(
       nm=c('answer','question','value'), 
       as.data.frame(
        as.table(
         simplify2array(
          lapply(
           df[c('W')], 
           summaryStatistics, 
           WAnswers 
          ) 
         ) 
        ) 
       ) 
     ) 
    ) 

    # change the order to question, answer, value 
    result <- result[, c(2, 1, 3)] 

    # add the filter 
    result <- cbind(filter='None',result) 

    # return the result 
    result 

です。

私はデータフレームと可能な回答をステップ実行する関数を作成しました。 ベクトルをハードコードすると、上記の結果と一致する結果が得られます。

extractSummaryDataframe <- function(questions.dataframe, answers.list, filter) { 

     result <- data.frame(
      answer=factor(), 
      question=factor(), 
      value=double() 
     ) ; 
     listIndex <- 0 ; 
     for (name in names(questions.dataframe)){ 
      listIndex <- listIndex + 1 ; 
      result <- rbind(result, 
       setNames(
         nm=c('answer','question','value'), 
         as.data.frame(
          as.table(
           simplify2array(
            lapply(
             questions.dataframe[c(name)], 
             summaryStatistics, 
             c(0:10,NA) 
            ) 
           ) 
          ) 
         ) 
       ) 
      )   
     } 

     result <- cbind(filter=filter,result) ; 
     result 
    } 

    extractSummaryDataframe(RSW.df, answers.list, 'None') 

戻り

 filter    answer question  value 
    1 None     0  R 0.0000000 
    2 None     1  R 1.0000000 
    3 None     2  R 2.0000000 
    4 None     3  R 1.0000000 
    5 None     4  R 1.0000000 
    6 None     5  R 1.0000000 
    7 None     6  R 1.0000000 
    8 None     7  R 1.0000000 
    9 None     8  R 0.0000000 
    10 None     9  R 1.0000000 
    11 None     10  R 0.0000000 
    12 None    <NA>  R 1.0000000 
    13 None    sum  R 39.0000000 
    14 None    length  R 10.0000000 
    15 None    mean  R 4.3333333 
    16 None standard.deviation  R 2.6457513 
    17 None    var  R 7.0000000 
    18 None    median  R 4.0000000 
    19 None    min  R 1.0000000 
    20 None    max  R 9.0000000 
    21 None  quantile.0%  R 1.0000000 
    22 None  quantile.25%  R 2.0000000 
    23 None  quantile.50%  R 4.0000000 
    24 None  quantile.75%  R 6.0000000 
    25 None  quantile.100%  R 9.0000000 
    26 None    skew  R 0.3275692 
    27 None   kurtosis  R -1.5333333 
    28 None     0  S 0.0000000 
    29 None     1  S 0.0000000 
    30 None     2  S 0.0000000 
    31 None     3  S 1.0000000 
    32 None     4  S 0.0000000 
    33 None     5  S 1.0000000 
    34 None     6  S 1.0000000 
    35 None     7  S 3.0000000 
    36 None     8  S 1.0000000 
    37 None     9  S 1.0000000 
    38 None     10  S 0.0000000 
    39 None    <NA>  S 2.0000000 
    40 None    sum  S 52.0000000 
    41 None    length  S 10.0000000 
    42 None    mean  S 6.5000000 
    43 None standard.deviation  S 1.8516402 
    44 None    var  S 3.4285714 
    45 None    median  S 7.0000000 
    46 None    min  S 3.0000000 
    47 None    max  S 9.0000000 
    48 None  quantile.0%  S 3.0000000 
    49 None  quantile.25%  S 5.7500000 
    50 None  quantile.50%  S 7.0000000 
    51 None  quantile.75%  S 7.2500000 
    52 None  quantile.100%  S 9.0000000 
    53 None    skew  S -0.4252986 
    54 None   kurtosis  S -1.3028646 
    55 None     0  W 0.0000000 
    56 None     1  W 0.0000000 
    57 None     2  W 1.0000000 
    58 None     3  W 0.0000000 
    59 None     4  W 2.0000000 
    60 None     5  W 2.0000000 
    61 None     6  W 2.0000000 
    62 None     7  W 2.0000000 
    63 None     8  W 1.0000000 
    64 None     9  W 0.0000000 
    65 None     10  W 0.0000000 
    66 None    <NA>  W 0.0000000 
    67 None    sum  W 54.0000000 
    68 None    length  W 10.0000000 
    69 None    mean  W 5.4000000 
    70 None standard.deviation  W 1.7763883 
    71 None    var  W 3.1555556 
    72 None    median  W 5.5000000 
    73 None    min  W 2.0000000 
    74 None    max  W 8.0000000 
    75 None  quantile.0%  W 2.0000000 
    76 None  quantile.25%  W 4.2500000 
    77 None  quantile.50%  W 5.5000000 
    78 None  quantile.75%  W 6.7500000 
    79 None  quantile.100%  W 8.0000000 
    80 None    skew  W -0.3339582 
    81 None   kurtosis  W -0.9871315 

しかし、私はそれがextractSummaryDataframe <

ような名前をステップとして、各リスト項目を使用しようとすると - 関数(questions.dataframe、answers.list、フィルタ) {

 result <- data.frame(
      answer=factor(), 
      question=factor(), 
      value=double() 
     ) ; 
     listIndex <- 0 ; 
     for (name in names(questions.dataframe)){ 
      listIndex <- listIndex + 1 ; 
      result <- rbind(result, 
       setNames(
         nm=c('answer','question','value'), 
         as.data.frame(
          as.table(
           simplify2array(
            lapply(
             questions.dataframe[c(name)], 
             summaryStatistics, 
             answers.list[listIndex] 
            ) 
           ) 
          ) 
         ) 
       ) 
      )   
     } 

     result <- cbind(filter=filter,result) ; 
     result 
    } 

    extractSummaryDataframe(RSW.df, answers.list, 'None') 

       filter 
    1 None 
    2 None 
    3 None 
    4 None 
    5 None 
    6 None 
    7 None 
    8 None 
    9 None 
    10 None 
    11 None 
    12 None 
    13 None 
    14 None 
    15 None 
    16 None 
    17 None 
    18 None 
    19 None 
    20 None 
    21 None 
    22 None 
    23 None 
    24 None 
    25 None 
    26 None 
    27 None 
    28 None 
    29 None 
    30 None 
    31 None 
    32 None 
    33 None 
    34 None 
    35 None 
    36 None 
    37 None 
    38 None 
    39 None 
    40 None 
    41 None 
    42 None 
    43 None 
    44 None 
    45 None 
    46 None 
    47 None 
    48 None 
    49 None 
    50 None 
    51 None 
                                 answer 
    1                     c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NA) 
    2                              <NA> 
    3                              sum 
    4                             length 
    5                              mean 
    6                          standard.deviation 
    7                              var 
    8                             median 
    9                              min 
    10                              max 
    11                            quantile.0% 
    12                           quantile.25% 
    13                           quantile.50% 
    14                           quantile.75% 
    15                           quantile.100% 
    16                             skew 
    17                            kurtosis 
    18           c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, NA) 
    19                             <NA> 
    20                              sum 
    21                             length 
    22                             mean 
    23                          standard.deviation 
    24                              var 
    25                             median 
    26                              min 
    27                              max 
    28                            quantile.0% 
    29                           quantile.25% 
    30                           quantile.50% 
    31                           quantile.75% 
    32                           quantile.100% 
    33                             skew 
    34                            kurtosis 
    35 c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, NA) 
    36                             <NA> 
    37                              sum 
    38                             length 
    39                             mean 
    40                          standard.deviation 
    41                              var 
    42                             median 
    43                              min 
    44                              max 
    45                            quantile.0% 
    46                           quantile.25% 
    47                           quantile.50% 
    48                           quantile.75% 
    49                           quantile.100% 
    50                             skew 
    51                            kurtosis 
     question  value 
    1   R 0.0000000 
    2   R 10.0000000 
    3   R 39.0000000 
    4   R 10.0000000 
    5   R 4.3333333 
    6   R 2.6457513 
    7   R 7.0000000 
    8   R 4.0000000 
    9   R 1.0000000 
    10  R 9.0000000 
    11  R 1.0000000 
    12  R 2.0000000 
    13  R 4.0000000 
    14  R 6.0000000 
    15  R 9.0000000 
    16  R 0.3275692 
    17  R -1.5333333 
    18  S 0.0000000 
    19  S 10.0000000 
    20  S 52.0000000 
    21  S 10.0000000 
    22  S 6.5000000 
    23  S 1.8516402 
    24  S 3.4285714 
    25  S 7.0000000 
    26  S 3.0000000 
    27  S 9.0000000 
    28  S 3.0000000 
    29  S 5.7500000 
    30  S 7.0000000 
    31  S 7.2500000 
    32  S 9.0000000 
    33  S -0.4252986 
    34  S -1.3028646 
    35  W 0.0000000 
    36  W 10.0000000 
    37  W 54.0000000 
    38  W 10.0000000 
    39  W 5.4000000 
    40  W 1.7763883 
    41  W 3.1555556 
    42  W 5.5000000 
    43  W 2.0000000 
    44  W 8.0000000 
    45  W 2.0000000 
    46  W 4.2500000 
    47  W 5.5000000 
    48  W 6.7500000 
    49  W 8.0000000 
    50  W -0.3339582 
    51  W -0.9871315 

これは私が探している結果と似ていませんまたは。

answers.list [1]をc(0:10、NA)のように認識させるための構文は何ですか?

答えて

0

私が持っていた構文エラーはanswers.list [listIndex]はanswers.list [[listIndex]]である必要がありました。

これは、クラス関数を使用してこれを発見しました。

class(answers.list) returned list: expected. 

class(answers.list[1]) returned list: unexpected. 

class(answers.list[[1]]) returned integer: which is what I was looking for. 

新しいコードは、私が:-)を探していたまさにです

 filter question    answer  value 
    1  None  R     0 0.0000000 
    2  None  R     1 1.0000000 
    3  None  R     2 2.0000000 
    4  None  R     3 1.0000000 
    5  None  R     4 1.0000000 
    6  None  R     5 1.0000000 
    7  None  R     6 1.0000000 
    8  None  R     7 1.0000000 
    9  None  R     8 0.0000000 
    10 None  R     9 1.0000000 
    11 None  R     10 0.0000000 
    12 None  R    <NA> 1.0000000 
    13 None  R    sum 39.0000000 
    14 None  R    length 10.0000000 
    15 None  R    mean 4.3333333 
    16 None  R standard.deviation 2.6457513 
    17 None  R    var 7.0000000 
    18 None  R    median 4.0000000 
    19 None  R    min 1.0000000 
    20 None  R    max 9.0000000 
    21 None  R  quantile.0% 1.0000000 
    22 None  R  quantile.25% 2.0000000 
    23 None  R  quantile.50% 4.0000000 
    24 None  R  quantile.75% 6.0000000 
    25 None  R  quantile.100% 9.0000000 
    26 None  R    skew 0.3275692 
    27 None  R   kurtosis -1.5333333 
    28 None  S     0 0.0000000 
    29 None  S     1 0.0000000 
    30 None  S     2 0.0000000 
    31 None  S     3 1.0000000 
    32 None  S     4 0.0000000 
    33 None  S     5 1.0000000 
    34 None  S     6 1.0000000 
    35 None  S     7 3.0000000 
    36 None  S     8 1.0000000 
    37 None  S     9 1.0000000 
    38 None  S     10 0.0000000 
    39 None  S     11 0.0000000 
    40 None  S     12 0.0000000 
    41 None  S     13 0.0000000 
    42 None  S     14 0.0000000 
    43 None  S     15 0.0000000 
    44 None  S     16 0.0000000 
    45 None  S     17 0.0000000 
    46 None  S     18 0.0000000 
    47 None  S     19 0.0000000 
    48 None  S     20 0.0000000 
    49 None  S    <NA> 2.0000000 
    50 None  S    sum 52.0000000 
    51 None  S    length 10.0000000 
    52 None  S    mean 6.5000000 
    53 None  S standard.deviation 1.8516402 
    54 None  S    var 3.4285714 
    55 None  S    median 7.0000000 
    56 None  S    min 3.0000000 
    57 None  S    max 9.0000000 
    58 None  S  quantile.0% 3.0000000 
    59 None  S  quantile.25% 5.7500000 
    60 None  S  quantile.50% 7.0000000 
    61 None  S  quantile.75% 7.2500000 
    62 None  S  quantile.100% 9.0000000 
    63 None  S    skew -0.4252986 
    64 None  S   kurtosis -1.3028646 
    65 None  W     0 0.0000000 
    66 None  W     1 0.0000000 
    67 None  W     2 1.0000000 
    68 None  W     3 0.0000000 
    69 None  W     4 2.0000000 
    70 None  W     5 2.0000000 
    71 None  W     6 2.0000000 
    72 None  W     7 2.0000000 
    73 None  W     8 1.0000000 
    74 None  W     9 0.0000000 
    75 None  W     10 0.0000000 
    76 None  W     11 0.0000000 
    77 None  W     12 0.0000000 
    78 None  W     13 0.0000000 
    79 None  W     14 0.0000000 
    80 None  W     15 0.0000000 
    81 None  W     16 0.0000000 
    82 None  W     17 0.0000000 
    83 None  W     18 0.0000000 
    84 None  W     19 0.0000000 
    85 None  W     20 0.0000000 
    86 None  W     21 0.0000000 
    87 None  W     22 0.0000000 
    88 None  W     23 0.0000000 
    89 None  W     24 0.0000000 
    90 None  W     25 0.0000000 
    91 None  W     26 0.0000000 
    92 None  W     27 0.0000000 
    93 None  W     28 0.0000000 
    94 None  W     29 0.0000000 
    95 None  W     30 0.0000000 
    96 None  W    <NA> 0.0000000 
    97 None  W    sum 54.0000000 
    98 None  W    length 10.0000000 
    99 None  W    mean 5.4000000 
    100 None  W standard.deviation 1.7763883 
    101 None  W    var 3.1555556 
    102 None  W    median 5.5000000 
    103 None  W    min 2.0000000 
    104 None  W    max 8.0000000 
    105 None  W  quantile.0% 2.0000000 
    106 None  W  quantile.25% 4.2500000 
    107 None  W  quantile.50% 5.5000000 
    108 None  W  quantile.75% 6.7500000 
    109 None  W  quantile.100% 8.0000000 
    110 None  W    skew -0.3339582 
    111 None  W   kurtosis -0.9871315 

を返し

# create the summary function 
    summaryStatistics <- function(x,levels) { 
     xx <- na.omit(x) 
     c(table(factor(x, levels=levels), useNA='always', exclude=NULL), 
      sum=sum(xx), 
      length=length(x), 
      mean=mean(xx), 
      standard.deviation=sqrt(var(xx)), 
      var=(var(xx)), 
      median=median(xx), 
      min=min(xx), 
      max=max(xx), 
      quantile=quantile(xx), 
      skew=sum((xx-mean(xx))^3/sqrt(var(xx))^3)/length(x) , 
      kurtosis=sum((xx-mean(xx))^4/sqrt(var(xx))^4)/length(x) - 3 
     ) 
    } 

    # create the function that steps through the summary function 
    extractSummaryDataframe <- function(questions.dataframe, answers.list, filter) { 

     result <- data.frame(
      answer=factor(), 
      question=factor(), 
      value=double() 
     ) ; 
     listIndex <- 0 ; 
     for (name in names(questions.dataframe)){ 
      listIndex <- listIndex + 1 ; 

      result <- rbind(result, 
       setNames(
         nm=c('answer','question','value'), 
         as.data.frame(
          as.table(
           simplify2array(
            lapply(
             questions.dataframe[c(name)], 
             summaryStatistics, 
             answers.list[[listIndex]] 
            ) 
           ) 
          ) 
         ) 
       ) 
      )   
     } 

     result <- result[, c(2, 1, 3)] ; 
     result <- cbind(filter=filter,result) ; 
     result 
    } 

    # create the test data frame 
    Id <- c(1,2,3,4,5,6,7,8,9,10) 
    ClassA <- c(1,NA,3,1,1,2,1,4,5,3) 
    ClassB <- c(2,1,1,3,3,2,1,1,3,3) 
    R <- c(1,2,3,NA,9,2,4,5,6,7) 
    S <- c(3,7,NA,9,5,8,7,NA,7,6) 
    W <- c(4,5,6,7,2,4,5,6,7,8) 
    df <- data.frame(Id,ClassA,ClassB,R,S,W) 

    ClassAAnswers <- c(1:5,NA) 
    ClassBAnswers <- c(1:5,NA) 

    RAnswers <- c(0:10,NA); 
    SAnswers <- c(0:20,NA); 
    WAnswers <- c(0:30,NA); 
    answers.list <- list(RAnswers,SAnswers,WAnswers); 

    RSW.df <- df[c('R','S','W')]; 

    # create the result 
    result <- extractSummaryDataframe(RSW.df, answers.list, 'None') ; 

    # return the result 
    result 

です。

関連する問題