23. 表の作成

pacman::p_load(tidyverse, kableExtra, gt, DT)

country_df <- read_csv("Data/Countries.csv")

HTML出力のみならどっちでも問題ないが、{gt}が使いやすい。

ただし、{gt}は開発途上であり、PDF出力との相性が現在 (2022-03-22)、優れているとはいい難いので、PDF出力まで考えているのであれば{knitr}のkable() + {kableExtra}を推奨

LaTeX出力とHTML出力の見た目が異なるため、調整が必要

23.1 {kableExtra}の使い方

country_dfPopulation (100万で割った値)、Area (1万で割った値)、GPP_per_capita (1万で割った値)、PPP_per_capita (1万で割った値)、HDI_2018Polity_ScoreFH_Totalの記述統計

コード入力が面倒であれば、コピペでOK

country_desc <- country_df %>%
    mutate(Population     = Population / 1000000,
           Area           = Area / 10000,
           GDP_per_capita = GDP_per_capita / 10000,
           PPP_per_capita = PPP_per_capita / 10000) %>%
    select(Population, Area, GDP_per_capita, PPP_per_capita, 
           HDI_2018, Polity_Score, FH_Total) %>%
    summarise(across(everything(),
                     .fns = list("Mean" = ~mean(.x, na.rm = TRUE),
                                 "SD"   = ~sd(.x, na.rm = TRUE),
                                 "Min"  = ~min(.x, na.rm = TRUE),
                                 "Max"  = ~max(.x, na.rm = TRUE),
                                 "Obs"  = ~sum(!is.na(.x))),
                     .names = "{.col}-{.fn}")) %>%
    pivot_longer(cols = everything(),
                 names_to = "Label",
                 values_to = "Value") %>%
    separate(col  = "Label",
             into = c("Variable", "Stat"),
             sep  = "-") %>%
    pivot_wider(names_from  = Stat,
                values_from = Value)
country_desc
## # A tibble: 7 × 6
##   Variable         Mean      SD        Min      Max   Obs
##   <chr>           <dbl>   <dbl>      <dbl>    <dbl> <dbl>
## 1 Population     41.7   151.      0.000801 1447.      186
## 2 Area           69.6   187.      0        1638.      186
## 3 GDP_per_capita  1.62    2.57    0.00577    18.3     185
## 4 PPP_per_capita  2.08    2.10    0.0733     11.3     178
## 5 HDI_2018        0.713   0.153   0.377       0.954   180
## 6 Polity_Score    4.26    6.10  -10          10       158
## 7 FH_Total       57.7    29.9     0         100       185

{summarytools}のdescr()を使う場合

pacman::p_load(summarytools)

country_desc <- country_df %>%
    mutate(Population     = Population / 1000000,
           Area           = Area / 10000,
           GDP_per_capita = GDP_per_capita / 10000,
           PPP_per_capita = PPP_per_capita / 10000) %>%
    select(Population, Area, GDP = GDP_per_capita, PPP = PPP_per_capita, 
           HDI = HDI_2018, Polity = Polity_Score, FreedomHouse = FH_Total) %>%
    descr(stats = c("mean", "sd", "min", "max", "n.valid"),
          order = "preserve", transpose = TRUE) %>%
    as.data.frame() %>%
    rownames_to_column("Variable") %>%
    rename(SD = Std.Dev, Obs = N.Valid) %>%
    as_tibble()
country_desc
## # A tibble: 7 × 6
##   Variable       Mean      SD        Min      Max   Obs
##   <chr>         <dbl>   <dbl>      <dbl>    <dbl> <dbl>
## 1 Population   41.7   151.      0.000801 1447.      186
## 2 Area         69.6   187.      0        1638.      186
## 3 GDP           1.62    2.57    0.00577    18.3     185
## 4 PPP           2.08    2.10    0.0733     11.3     178
## 5 HDI           0.713   0.153   0.377       0.954   180
## 6 Polity        4.26    6.10  -10          10       158
## 7 FreedomHouse 57.7    29.9     0         100       185

23.1.1 表の出力

kbl(country_desc)
Variable Mean SD Min Max Obs
Population 41.7377735 151.2702976 0.0008010 1447.47009 186
Area 69.6069247 187.2412489 0.0000000 1637.68700 186
GDP 1.6158103 2.5710359 0.0057700 18.31772 185
PPP 2.0833383 2.0992134 0.0733142 11.34231 178
HDI 0.7134833 0.1528503 0.3770000 0.95400 180
Polity 4.2594937 6.1022919 -10.0000000 10.00000 158
FreedomHouse 57.7135135 29.8656244 0.0000000 100.00000 185

小数点桁数の調整

country_desc %>%
    kbl(digits = 3)
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

表の幅 (HTML限定)

country_desc %>%
    kbl(digits = 3) %>%
    kable_styling(full_width = FALSE)
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

23.1.2 列の操作

列名の修正

全ての列を指定する必要がある

country_desc %>%
    kbl(col.names = c("変数", "平均値", "標準偏差", 
                      "最小値", "最大値", "観察数"),
        digits = 3)
変数 平均値 標準偏差 最小値 最大値 観察数
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

列の揃え

全ての列を指定する必要がある

# 不要だが、あえてVariable列を中央揃えにする
country_desc %>%
    kbl(align = c("crrrrr"),
        digits = 3)
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

23.1.3 タイトル、フットノート

country_desc %>%
    kbl(caption = "記述統計表", digits = 3)
表 23.1: 記述統計表
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(caption = "記述統計表", digits = 3) %>%
    footnote(general       = "『私たちのR』のサンプルデータ",
             general_title = "出典:")
表 23.2: 記述統計表
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
出典:
『私たちのR』のサンプルデータ
country_desc2 <- country_desc

names(country_desc2)[4:5] <- paste0(names(country_desc2)[4:5],
                                    footnote_marker_number(1))
names(country_desc2)[6] <- paste0(names(country_desc2)[6],
                                  footnote_marker_number(2))

country_desc2 %>%
    kbl(caption = "記述統計表", escape = FALSE, digits = 3) %>%
    footnote(general = "出典: 『私たちのR』のサンプルデータ",
             number  = c("欠損値を除く", "欠損していないケース数"))
表 23.3: 記述統計表
Variable Mean SD Min1 Max1 Obs2
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
Note:
出典: 『私たちのR』のサンプルデータ
1 欠損値を除く
2 欠損していないケース数

23.1.4 グループ化

列のグループ化

グループ化しない列のラベルは""でなく、" "にする。

country_desc %>%
    kbl(digits = 3) %>%
    add_header_above(c(" " = 3, "Range" = 2, " " = 1))
Range
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

行のグループ化

country_desc %>%
    kbl(digits = 3) %>%
    pack_rows("Demographic factors", 1, 2) %>%
    pack_rows("Economic factors", 3, 5) %>%
    pack_rows("Political factors", 6, 7)
Variable Mean SD Min Max Obs
Demographic factors
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
Economic factors
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Political factors
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc3 <- country_desc %>%
    mutate(Factor  = c(rep("Demographic", 2),
                       rep("Economic", 3),
                       rep("Political", 2)),
           .before = Variable)

country_desc3 %>%
    kbl(digits = 3)
Factor Variable Mean SD Min Max Obs
Demographic Population 41.738 151.270 0.001 1447.470 186
Demographic Area 69.607 187.241 0.000 1637.687 186
Economic GDP 1.616 2.571 0.006 18.318 185
Economic PPP 2.083 2.099 0.073 11.342 178
Economic HDI 0.713 0.153 0.377 0.954 180
Political Polity 4.259 6.102 -10.000 10.000 158
Political FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc3 %>%
    kbl(digits = 3) %>%
    collapse_rows(columns = 1, valign = "top")
Factor Variable Mean SD Min Max Obs
Demographic Population 41.738 151.270 0.001 1447.470 186
Demographic Area 69.607 187.241 0.000 1637.687 186
Economic GDP 1.616 2.571 0.006 18.318 185
Economic PPP 2.083 2.099 0.073 11.342 178
Economic HDI 0.713 0.153 0.377 0.954 180
Political Polity 4.259 6.102 -10.000 10.000 158
Political FreedomHouse 57.714 29.866 0.000 100.000 185

23.1.5 セルの色分け(HTML限定)

{formattable}パッケージ使用

country_desc4 <- country_df %>%
    mutate(Population     = Population / 1000000,
           Area           = Area / 10000,
           GDP_per_capita = GDP_per_capita / 10000,
           PPP_per_capita = PPP_per_capita / 10000) %>%
    select(HDI_2018, 
           Population, Area, GDP = GDP_per_capita, PPP = PPP_per_capita, 
           Polity = Polity_Score, FreedomHouse = FH_Total) %>%
    drop_na() %>%
    cor() %>%
    as.data.frame() %>%
    rownames_to_column("Variable") %>%
    select(Variable, Cor = HDI_2018) %>%
    filter(Variable != "HDI_2018")
country_desc4 %>%
    kbl(digits = 3)
Variable Cor
Population 0.001
Area 0.125
GDP 0.714
PPP 0.801
Polity 0.287
FreedomHouse 0.574
pacman::p_load(formattable)

country_desc4$Cor2 <- color_text("blue", "red")(sprintf("%.3f", country_desc4$Cor))
country_desc4$Cor3 <- color_tile("white", "mistyrose")(sprintf("%.3f", country_desc4$Cor))

country_desc4 %>%
    kbl(col.names = c("変数", "数字のみ", "文字色", "色塗り"),
        digits = 3, escape = FALSE) %>%
    add_header_above(c(" " = 1, "人間開発指数との相関係数" = 3))
人間開発指数との相関係数
変数 数字のみ 文字色 色塗り
Population 0.001 0.001 0.001
Area 0.125 0.125 0.125
GDP 0.714 0.714 0.714
PPP 0.801 0.801 0.801
Polity 0.287 0.287 0.287
FreedomHouse 0.574 0.574 0.574

23.1.6 テーマ

country_desc %>%
    kbl(digits = 3) %>%
    kable_paper()
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(digits = 3) %>%
    kable_classic()
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(digits = 3) %>%
    kable_classic_2()
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(digits = 3) %>%
    kable_minimal()
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(digits = 3) %>%
    kable_material()
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(digits = 3) %>%
    kable_paper(bootstrap_options = "striped",
                full_width        = FALSE)
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185
country_desc %>%
    kbl(digits = 3) %>%
    kable_paper(bootstrap_options = c("striped", "condensed"),
                full_width        = FALSE)
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1447.470 186
Area 69.607 187.241 0.000 1637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 -10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

他にも"hover""responsive""bordered"あり

テーマを変更せずにスタイルを変更したい場合はkable_styling()を使用

23.1.7 LaTeX形式への出力

RMarkdownの出力形式がHTMLでなく、PDFの場合

  • kable_style()やテーマ関数 (kable_*())内にfull_width = FALSEの指定は不要
  • 縦線を無くしたい場合はkbl()内にbooktabs = TRUEを指定
country_desc %>%
    kbl(digits = 3, booktabs = TRUE)

表を中央に位置づけたい場合はkable_style()内にposition = "center"を指定

country_desc %>%
    kbl(digits = 3, booktabs = TRUE) %>%
    kable_styling(position = "center")

LaTeX用表のコードが必要な場合、format = "latex"を指定

country_desc %>%
    kbl(format = "latex", digits = 3, booktabs = TRUE)

23.2 {gt}の使い方

そのままcountry_descを使用

表の出力

gt(country_desc)
Variable Mean SD Min Max Obs
Population 41.7377735 151.2702976 0.000801000 1447.47009 186
Area 69.6069247 187.2412489 0.000000000 1637.68700 186
GDP 1.6158103 2.5710359 0.005770007 18.31772 185
PPP 2.0833383 2.0992134 0.073314173 11.34231 178
HDI 0.7134833 0.1528503 0.377000000 0.95400 180
Polity 4.2594937 6.1022919 -10.000000000 10.00000 158
FreedomHouse 57.7135135 29.8656244 0.000000000 100.00000 185

小数点桁数の調整

country_desc %>%
    gt() %>%
    fmt_number(columns = 2:5, decimals = 3)
Variable Mean SD Min Max Obs
Population 41.738 151.270 0.001 1,447.470 186
Area 69.607 187.241 0.000 1,637.687 186
GDP 1.616 2.571 0.006 18.318 185
PPP 2.083 2.099 0.073 11.342 178
HDI 0.713 0.153 0.377 0.954 180
Polity 4.259 6.102 −10.000 10.000 158
FreedomHouse 57.714 29.866 0.000 100.000 185

23.3 データの出力

通常の出力の場合

country_df
## # A tibble: 186 × 18
##    Country  Population   Area    GDP     PPP GDP_per_capita PPP_per_capita    G7
##    <chr>         <dbl>  <dbl>  <dbl>   <dbl>          <dbl>          <dbl> <dbl>
##  1 Afghani…   38928346 6.53e5 1.91e4  8.27e4           491.          2125.     0
##  2 Albania     2877797 2.74e4 1.53e4  3.97e4          5309.         13781.     0
##  3 Algeria    43851044 2.38e6 1.70e5  4.97e5          3876.         11324.     0
##  4 Andorra       77265 4.7 e2 3.15e3 NA              40821.            NA      0
##  5 Angola     32866272 1.25e6 9.46e4  2.19e5          2879.          6649.     0
##  6 Antigua…      97929 4.4 e2 1.73e3  2.08e3         17643.         21267.     0
##  7 Argenti…   45195774 2.74e6 4.50e5  1.04e6          9949.         22938.     0
##  8 Armenia     2963243 2.85e4 1.37e4  3.84e4          4614.         12974.     0
##  9 Austral…   25499884 7.68e6 1.39e6  1.28e6         54615.         50001.     0
## 10 Austria     9006398 8.24e4 4.46e5  5.03e5         49555.         55824.     0
## # … with 176 more rows, and 10 more variables: G20 <dbl>, OECD <dbl>,
## #   HDI_2018 <dbl>, Polity_Score <dbl>, Polity_Type <chr>, FH_PR <dbl>,
## #   FH_CL <dbl>, FH_Total <dbl>, FH_Status <chr>, Continent <chr>

{DT}パッケージのdatatable()関数を使用した場合

datatable(country_df)