函数使用R中的数据帧的参数中的列

By simon at 2018-02-07 • 0人收藏 • 26人看过

我想创建一个与我的数据框的列之一的功能 就职,因为我需要类似的计算其他栏目 也。

numeric_fun<-function(dataset,grp_var,var){

  require("dplyr")

    dataset%>%select(grp_var,var)%>% group_by_(grp_var)%>%
  summarize(
    'q25' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
    'median' =round(quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
    'avg' = round( mean(var, na.rm=TRUE), 0),
    'q75' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
    'n' = sum(!is.na(var))
  )%>% 
  mutate(
    q25 = ifelse( n < 5, "--", paste(q25,"%" )),
    median = ifelse( n < 5, "--", paste(median,"%")),
    avg = ifelse(n < 5, "--", paste(avg,"%")),
    q75 = ifelse( n < 5, "--", paste(q75,"%")),
    n = n
  ) %>% 
  rename( "Industry"=grp_var,
          "25%" = q25,
          "75%" = q75
  )

}

numeric_fun(outplacement,"Q7_1","Q8")
但是这个不行,还有其他有效的方法吗? 输出的输出
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA, 
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L, 
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L, 
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L, 
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA, 
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L, 
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services", 
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance", 
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing", 
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA, 
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L, 
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L, 
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L, 
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA, 
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L, 
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L, 
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L, 
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L, 
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L, 
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA, 
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L, 
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA, 
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L, 
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA, 
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L, 
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L, 
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA, 
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA, 
-94L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 
0x0000000000090788>, .Names = c("Q7_1", 
"Q8", "Q9", "Q10"))

2 个回复 | 最后更新于 2018-02-07
2018-02-07   #1

问题在于代码使用的字符串不合适 上下文。我们可以用sym和!!从rlang包来翻译 他们。添加标记为##的语句,然后使用!!grp_var 和!!var到位of grp_var和var无处不在。也 将sym0更改为group_by,重新格式化,更改为require 到library(见下一段),并添加了library报表 data.table和rlang。 请注意,library是除非在一个范围之内,最好是require if.这样,如果包丢失,它会失败的 library声明使事业显而易见。另一方面与 require它会进一步失败,使其更难调试。

library(data.table) ##
library(rlang) ##
library(dplyr)


numeric_fun <- function(dataset, grp_var, var) {

  grp_var <- sym(grp_var)
  var <- sym(var)

  dataset %>%
    select(!!grp_var,!!var) %>% 
    group_by(!!grp_var) %>%
    summarize(
      'q25' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[2],
      'median' = round(quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
      'avg' = round( mean(!!var, na.rm = TRUE), 0),
      'q75' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[4] ,
      'n' = sum(!is.na(!!var))
    ) %>% 
    mutate(
      q25 = ifelse( n < 5, "--", paste(q25, "%" )),
      median = ifelse( n < 5, "--", paste(median, "%")),
      avg = ifelse(n < 5, "--", paste(avg, "%")),
      q75 = ifelse( n < 5, "--", paste(q75, "%")),
      n = n
    ) %>% 
    rename( "Industry" = !!grp_var,
            "25%" = q25,
            "75%" = q75
    )

}

numeric_fun(outplacement,"Q7_1","Q8")
赠送:
# A tibble: 14 x 6
   Industry                   `25%` median avg   `75%`      n
   <fctr>                     <chr> <chr>  <chr> <chr>  <int>
 1 Banking/Financial Services 2 %   3 %    4 %   5 %       11
 2 Chemicals                  --    --     --    --         3
 3 Consumer Goods             1 %   3 %    3 %   5 %        5
 4 Energy                     --    --     --    --         4
 5 High Tech                  1 %   4 %    3 %   4.5 %      6
 6 Insurance/Reinsurance      --    --     --    --         3
 7 Logistics                  --    --     --    --         2
 8 Mining & Metals            --    --     --    --         4
 9 Other Manufacturing        --    --     --    --         4
10 Other Non-Manufacturing    2 %   2 %    3 %   4.75 %     8
11 Retail & Wholesale         --    --     --    --         2
12 Services (Non-Financial)   2 %   3 %    3 %   5 %       12
13 Transportation Equipment   --    --     --    --         3
14 <NA>                       --    --     --    --         0
## 注意 dput不适用于具有外部指针的对象,如 data.table obj所以我们使用这个:
outplacement <- 
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA, 
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L, 
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L, 
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L, 
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA, 
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L, 
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services", 
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance", 
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing", 
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA, 
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L, 
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L, 
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L, 
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA, 
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L, 
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L, 
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L, 
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L, 
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L, 
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA, 
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L, 
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA, 
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L, 
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA, 
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L, 
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L, 
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA, 
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA, 
-94L), class = "data.frame", .Names = c("Q7_1", "Q8", "Q9", "Q10"))

library(data.table)
outplacement <- as.data.table(outplacement)

2018-02-07   #2

使用从dplyr enquo/!!我们可以改造的功能

library(dplyr)
numeric_fun<-function(dataset,grp_var,var){

      grp_var <- enquo(grp_var)
      var <- enquo(var)


    dataset %>%
          select(!! grp_var, !!var) %>%
          group_by(!! grp_var) %>%
          summarise(
               q25 = quantile(!! var,  type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
           median =round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
               avg = round( mean(!! var, na.rm=TRUE), 0),
               q75 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
               n = sum(!is.na(!!var))) %>%

         mutate(
              q25 = ifelse( n < 5, "--", paste0(q25,"%" )),
              median = ifelse( n < 5, "--", paste0(median,"%")),
              avg = ifelse(n < 5, "--", paste0(avg,"%")),
              q75 = ifelse( n < 5, "--", paste0(q75,"%"))

          ) %>% 
        rename(Industry= !!grp_var,
          `25%` = q25,
          `75%` = q75
  )



}
运行该功能
numeric_fun(df1, Q7_1, Q8)
# A tibble: 14 x 6
#   Industry                   `25%` median avg   `75%`     n
#   <fctr>                     <chr> <chr>  <chr> <chr> <int>
# 1 Banking/Financial Services 2%    3%     4%    5%       11
# 2 Chemicals                  --    --     --    --        3
# 3 Consumer Goods             1%    3%     3%    5%        5
# 4 Energy                     --    --     --    --        4
# 5 High Tech                  1%    4%     3%    4.5%      6
# 6 Insurance/Reinsurance      --    --     --    --        3
# 7 Logistics                  --    --     --    --        2
# 8 Mining & Metals            --    --     --    --        4
# 9 Other Manufacturing        --    --     --    --        4
#10 Other Non-Manufacturing    2%    2%     3%    4.75%     8
#11 Retail & Wholesale         --    --     --    --        2
#12 Services (Non-Financial)   2%    3%     3%    5%       12
#13 Transportation Equipment   --    --     --    --        3
#14 <NA>                       --    --     --    --        0

登录后方可回帖

Loading...