How to convert tidy hierarchical data frame to hierarchical list grid in R?

This is a more complex version of a previous question where I had abstracted the actual problem too much to apply the answers. R convert tidy hierarchical data frame to hierarchical list

I've converted a hierarchical data frame with two grouping levels into a hierarchical list-grid using a for loop.

Is there a more efficient base R, tidyverse or other approach to achieve this?

In the real dataset:

  • The grouping variables and description are multi word strings.
  • The description preface - d# - is in the MWE for ease of checking.
  • There are 14 associated variables variously of type: character, integer and double
  • Rules

    Group 1 and Group 2 headings to be in description column Group 1 headings to appear once only Group 2 heading are children of group 1 heading, and only change when there is a new group 2 heading Descriptions are children of group 2 headings

    From this

    g1    g2    desc    var1       var2   var3 
    A     a     d1 KS3  0.0500     2      PLs  
    A     a     d2 CTI  0.0500     9      7O0  
    A     b     d3 b8x  0.580      5      he2  
    A     b     d4 XOf  0.180     12      XJE  
    A     b     d5 ygn  0.900     11      v48  
    A     c     d6 dGY  0.770      6      UcH  
    A     d     d7 jpG  0.600      4      P5M  
    B     d     d8 Z95  0.600     10      j6O  
    

    To this

     desc      var1      var2  var3 
     A         
     a       
     d1 KS3   0.0500     2     PLs  
     d2 CTI   0.0500     9     7O0  
     b       
     d3 b8x   0.580      5     he2  
     d4 XOf   0.180     12     XJE  
     d5 ygn   0.900     11     v48  
     c        
    d6 dGY   0.770      6     UcH  
     d          
    d7 jpG   0.600      4     P5M  
     B       
     d       
    

    Code

    library(tidyverse)
    library(stringi)
    
    set.seed(2018) 
    
    tib <-  tibble(g1 = c("A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "C"),
                   g2 = c("a", "a", "b", "b", "b", "c", "d", "d", "b", "b", "e", "e"),
                   desc = paste0("d", 1:12, " ", stri_rand_strings(12, 3)),
                   var1 = round(runif(12), 2),
                   var2 = sample.int(12),
                   var3 = stri_rand_strings(12, 3))
    
    
    tib
    
    # Number of rows in final table
    n_rows <- length(unique(tib$g1)) + length(unique(paste0(tib$g1, tib$g2))) + nrow(tib)
    
    # create empty output tibble
    output <- 
      as_tibble(matrix(nrow = n_rows, ncol = ncol(tib)-1)) %>% 
      rename(id = V1, desc = V2, var1 = V3, var2 = V4, var3 = V5) %>% 
      mutate(id = NA_character_,
             desc = NA_character_,
             var1 = NA_real_,
             var2 = NA_integer_,
             var3 = NA_character_)
    
    # Loop counters
    level_1 <- 0
    level_2 <- 0
    output_row <- 1
    
    for(i in seq_len(nrow(tib))){
    
      # level 1 headings
      if(tib$g1[[i]] != level_1) {
        output$id[[output_row]] <- "g1"
        output$desc[[output_row]] <- tib$g1[[i]]
        output_row <- output_row + 1
      }
    
      # level 2 headings
      if(paste0(tib$g1[[i]], tib$g2[[i]]) != paste0(level_1, level_2)) {
        output$id[[output_row]] <- "g2"
        output$desc[[output_row]] <- tib$g2[[i]]
        output_row <- output_row + 1
      }
    
      level_1 <- tib$g1[[i]]  
      level_2 <- tib$g2[[i]] 
    
      # Description and data grid
      output$desc[[output_row]] <- tib$desc[[i]]
      output$var1[[output_row]] <- tib$var1[[i]]
      output$var2[[output_row]] <- tib$var2[[i]]
      output$var3[[output_row]] <- tib$var3[[i]]
      output_row <- output_row + 1
    
    }
    
    output
    

    调整tyluRp R的答案将整齐的分层数据框转换为层次列表我已经选择了一个解决方案。

    library(tidyverse)
    library(stringi)
    
    set.seed(2018) 
    
    tib <-  tibble(g1 = c("A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "C"),
    
                   g2 = c("a", "a", "b", "b", "b", "c", "d", "d", "b", "b", "e", "e"),
                   desc = paste0("d", 1:12, " ", stri_rand_strings(12, 3)),
                   var1 = round(runif(12), 2),
                   var2 = sample.int(12),
                   var3 = stri_rand_strings(12, 3))
    
    # add unique identifier for description and variable rows
    tib <- 
      tib %>%
      rowid_to_column() %>% 
      mutate(rowid = paste0("z_", rowid))
    
    # separate tibble for variables associated with descriptions
    tib_var <- 
      tib %>% 
      select(rowid, var1, var2, var3)
    
    # code adapted from tyluRp to reorder the data and add description variables 
    tib <- 
      tib %>%
      select(g1, g2, desc, rowid) %>% 
      mutate(g2 = paste(g1, g2, sep = "_")) %>% 
      transpose() %>% 
      unlist() %>% 
      stack() %>% 
      distinct(values, ind) %>% 
      mutate(detect_var = str_detect(values, "^z_"),
             ind = lead(case_when(detect_var == TRUE ~ values)),
             values = case_when(detect_var == TRUE ~ NA_character_,
                                TRUE ~ values))%>% 
      drop_na(values) %>% 
      select(values, ind) %>% 
      mutate(values = str_remove(values, "D_")) %>% 
      left_join(tib_var, by = c("ind" = "rowid")) %>% 
      select(-ind) %>% 
      replace_na(list(var1 = "", var2 = "", var3 = "")) 
    
    链接地址: http://www.djcxy.com/p/24808.html

    上一篇: 给定二元离散分布的随机样本

    下一篇: 如何将整齐的分层数据框转换为R中的分层列表网格?