Title: | Useful Functions for Data Processing |
---|---|
Description: | In ancient Chinese mythology, Bai Ze is a divine creature that knows the needs of everything. 'baizer' provides data processing functions frequently used by the author. Hope this package also knows what you want! |
Authors: | William Song [aut, cre] |
Maintainer: | William Song <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.8.1 |
Built: | 2025-02-19 04:30:02 UTC |
Source: | https://github.com/william-swl/baizer |
equal calculation operator, support NA
x %eq% y
x %eq% y
x |
value x |
y |
value y |
logical value, TRUE if x and y are not equal
NA %eq% NA
NA %eq% NA
not equal calculation operator, support NA
x %neq% y
x %neq% y
x |
value x |
y |
value y |
logical value, TRUE if x and y are not equal
1 %neq% NA
1 %neq% NA
not in calculation operator
left %nin% right
left %nin% right
left |
left element |
right |
right element |
logical value, TRUE if left is not in right
0 %nin% 1:4
0 %nin% 1:4
expand a number vector according to the adjacent two numbers
adjacent_div(v, n_div = 10, .unique = FALSE)
adjacent_div(v, n_div = 10, .unique = FALSE)
v |
number vector |
n_div |
how many divisions expanded by two numbers |
.unique |
only keep unique numbers |
new number vector
adjacent_div(10^c(1:3), n_div = 10)
adjacent_div(10^c(1:3), n_div = 10)
use aliases for function arguments
alias_arg(..., default = NULL)
alias_arg(..., default = NULL)
... |
aliases of an argument |
default |
a alias with a default value |
the finally value of this argument across all aliases
# set y, z as aliases of x when create a function func <- function(x = 1, y = NULL, z = NULL) { x <- alias_arg(x, y, z, default = x) return(x) }
# set y, z as aliases of x when create a function func <- function(x = 1, y = NULL, z = NULL) { x <- alias_arg(x, y, z, default = x) return(x) }
trans a tibble into markdown format table
as_md_table(x, show = TRUE)
as_md_table(x, show = TRUE)
x |
tibble |
show |
show result instead of return the markdown string, TRUE as default |
NULL or markdown string
mini_diamond %>% head(5) %>% as_md_table()
mini_diamond %>% head(5) %>% as_md_table()
trans a table in markdown format into tibble
as_tibble_md(x)
as_tibble_md(x)
x |
character string |
tibble
x <- " col1 | col2 | col3 | | ---- | ---- | ---- | | v1 | v2 | v3 | | r1 | r2 | r3 | " as_tibble_md(x)
x <- " col1 | col2 | col3 | | ---- | ---- | ---- | | v1 | v2 | v3 | | r1 | r2 | r3 | " as_tibble_md(x)
whether the expression is an atomic one
atomic_expr(ex)
atomic_expr(ex)
ex |
expression |
logical value
atomic_expr(rlang::expr(x)) atomic_expr(rlang::expr(!x)) atomic_expr(rlang::expr(x + y)) atomic_expr(rlang::expr(x > 1)) atomic_expr(rlang::expr(!x + y)) atomic_expr(rlang::expr(x > 1 | y < 2))
atomic_expr(rlang::expr(x)) atomic_expr(rlang::expr(!x)) atomic_expr(rlang::expr(x + y)) atomic_expr(rlang::expr(x > 1)) atomic_expr(rlang::expr(!x + y)) atomic_expr(rlang::expr(x > 1 | y < 2))
broadcast the vector into length n
broadcast_vector(x, n)
broadcast_vector(x, n)
x |
vector |
n |
target length |
vector
broadcast_vector(1:3, 5)
broadcast_vector(1:3, 5)
wrapper of tibble::column_to_rownames
c2r(df, col = "")
c2r(df, col = "")
df |
tibble |
col |
a col name |
data.frame
mini_diamond %>% c2r("id")
mini_diamond %>% c2r("id")
check arguments by custom function
check_arg(..., n = 2, fun = not.null)
check_arg(..., n = 2, fun = not.null)
... |
arguments |
n |
how many arguments should meet the custom conditions |
fun |
custom conditions defined by a function |
logical value
x <- 1 y <- 3 z <- NULL func <- function(x = NULL, y = NULL, z = NULL) { if (check_arg(x, y, z, n = 2)) { print("As expected, two arguments is not NULL") } if (check_arg(x, y, z, n = 1, method = ~ .x < 2)) { print("As expected, one argument less than 2") } }
x <- 1 y <- 3 z <- NULL func <- function(x = NULL, y = NULL, z = NULL) { if (check_arg(x, y, z, n = 2)) { print("As expected, two arguments is not NULL") } if (check_arg(x, y, z, n = 1, method = ~ .x < 2)) { print("As expected, one argument less than 2") } }
get the command line arguments
cmdargs(x = NULL)
cmdargs(x = NULL)
x |
one of 'wd, R_env, script_path, script_dir, env_configs' |
list of all arguments, or single value of select argument
cmdargs()
cmdargs()
dump a named vector into character
collapse_vector(named_vector, front_name = TRUE, collapse = ",")
collapse_vector(named_vector, front_name = TRUE, collapse = ",")
named_vector |
a named vector |
front_name |
if TRUE, put names to former |
collapse |
collapse separator |
character
collapse_vector(c(e = 1:4), front_name = TRUE, collapse = ";")
collapse_vector(c(e = 1:4), front_name = TRUE, collapse = ";")
combine multiple vectors into one
combn_vector(..., method = "first", invalid = NA)
combn_vector(..., method = "first", invalid = NA)
... |
vectors |
method |
how to combine, should be one of
|
invalid |
invalid value to ignore, |
combined vector
x1 <- c(1, 2, NA, NA) x2 <- c(3, NA, 2, NA) x3 <- c(4, NA, NA, 3) combn_vector(x1, x2, x3, method = "sum")
x1 <- c(1, 2, NA, NA) x2 <- c(3, NA, 2, NA) x3 <- c(4, NA, NA, 3) combn_vector(x1, x2, x3, method = "sum")
correct the numbers to a target ratio
correct_ratio(raw, target, digits = 0)
correct_ratio(raw, target, digits = 0)
raw |
the raw numbers |
target |
the target ratio |
digits |
the result digits |
corrected number vector
correct_ratio(c(10, 10), c(3, 5)) # support ratio as a float correct_ratio(c(100, 100), c(0.2, 0.8)) # more numbers correct_ratio(10:13, c(2, 3, 4, 6)) # with digits after decimal point correct_ratio(c(10, 10), c(1, 4), digits = 1)
correct_ratio(c(10, 10), c(3, 5)) # support ratio as a float correct_ratio(c(100, 100), c(0.2, 0.8)) # more numbers correct_ratio(10:13, c(2, 3, 4, 6)) # with digits after decimal point correct_ratio(c(10, 10), c(1, 4), digits = 1)
count two columns as a cross-tabulation table
cross_count(df, row, col, method = "n", digits = 2)
cross_count(df, row, col, method = "n", digits = 2)
df |
tibble |
row |
the column as rownames in the output |
col |
the column as colnames in the output |
method |
one of |
digits |
the digits of ratios |
data.frame
cross_count(mini_diamond, cut, clarity) # show the ratio in the row cross_count(mini_diamond, cut, clarity, method = "rowr") # show the ratio in the col cross_count(mini_diamond, cut, clarity, method = "colr")
cross_count(mini_diamond, cut, clarity) # show the ratio in the row cross_count(mini_diamond, cut, clarity, method = "rowr") # show the ratio in the col cross_count(mini_diamond, cut, clarity, method = "colr")
detect possible duplication in a vector, ignore case, blank and special character
detect_dup(vector, index = FALSE)
detect_dup(vector, index = FALSE)
vector |
vector possibly with duplication |
index |
return duplication index |
duplication sub-vector
detect_dup(c("a", "C_", "c -", "#A"))
detect_dup(c("a", "C_", "c -", "#A"))
the index of different character
diff_index(s1, s2, nth = NULL, ignore_case = FALSE)
diff_index(s1, s2, nth = NULL, ignore_case = FALSE)
s1 |
string1 |
s2 |
string2 |
nth |
just return nth index |
ignore_case |
ignore upper or lower cases |
list of different character indices
diff_index("AAAA", "ABBA")
diff_index("AAAA", "ABBA")
differences between two tibbles
diff_tb(old, new)
diff_tb(old, new)
old |
old tibble |
new |
new tibble |
differences tibble, 'a, d, c' in diff_type
stand for
'add, delete, change' compared to the old tibble
tb1 <- gen_tb(fill = "int", seed = 1) tb2 <- gen_tb(fill = "int", seed = 3) diff_tb(tb1, tb2)
tb1 <- gen_tb(fill = "int", seed = 1) tb2 <- gen_tb(fill = "int", seed = 3) diff_tb(tb1, tb2)
diagnosis a tibble for character NA, NULL, all T/F column, blank in cell
dx_tb(x)
dx_tb(x)
x |
tibble |
list
x <- tibble::tibble( c1 = c("NA", NA, "a", "b"), c2 = c("c", "d", "e", "NULL"), c3 = c("T", "F", "F", "T"), c4 = c("T", "F", "F", NA), c5 = c("", " ", "\t", "\n") ) dx_tb(x)
x <- tibble::tibble( c1 = c("NA", NA, "a", "b"), c2 = c("c", "d", "e", "NULL"), c3 = c("T", "F", "F", "T"), c4 = c("T", "F", "F", NA), c5 = c("", " ", "\t", "\n") ) dx_tb(x)
detect whether directory is empty recursively
empty_dir(dir)
empty_dir(dir)
dir |
the directory |
logical value
# create an empty directory dir.create("some/deep/path/in/a/folder", recursive = TRUE) empty_dir("some/deep/path/in/a/folder") # create an empty file file.create("some/deep/path/in/a/folder/there_is_a_file.txt") empty_dir("some/deep/path/in/a/folder") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) # create a file with only character of length 0 write("", "some/deep/path/in/a/folder/there_is_a_file.txt") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) empty_file("some/deep/path/in/a/folder/there_is_a_file.txt") # clean unlink("some", recursive = TRUE)
# create an empty directory dir.create("some/deep/path/in/a/folder", recursive = TRUE) empty_dir("some/deep/path/in/a/folder") # create an empty file file.create("some/deep/path/in/a/folder/there_is_a_file.txt") empty_dir("some/deep/path/in/a/folder") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) # create a file with only character of length 0 write("", "some/deep/path/in/a/folder/there_is_a_file.txt") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) empty_file("some/deep/path/in/a/folder/there_is_a_file.txt") # clean unlink("some", recursive = TRUE)
detect whether file is empty recursively
empty_file(path, strict = FALSE)
empty_file(path, strict = FALSE)
path |
the path of file |
strict |
|
logical value
# create an empty directory dir.create("some/deep/path/in/a/folder", recursive = TRUE) empty_dir("some/deep/path/in/a/folder") # create an empty file file.create("some/deep/path/in/a/folder/there_is_a_file.txt") empty_dir("some/deep/path/in/a/folder") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) # create a file with only character of length 0 write("", "some/deep/path/in/a/folder/there_is_a_file.txt") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) empty_file("some/deep/path/in/a/folder/there_is_a_file.txt") # clean unlink("some", recursive = TRUE)
# create an empty directory dir.create("some/deep/path/in/a/folder", recursive = TRUE) empty_dir("some/deep/path/in/a/folder") # create an empty file file.create("some/deep/path/in/a/folder/there_is_a_file.txt") empty_dir("some/deep/path/in/a/folder") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) # create a file with only character of length 0 write("", "some/deep/path/in/a/folder/there_is_a_file.txt") empty_file("some/deep/path/in/a/folder/there_is_a_file.txt", strict = TRUE) empty_file("some/deep/path/in/a/folder/there_is_a_file.txt") # clean unlink("some", recursive = TRUE)
generate a matrix to show whether the item in each element of a list
exist_matrix(x, n_lim = 0, n_top = NULL, sort_items = NULL)
exist_matrix(x, n_lim = 0, n_top = NULL, sort_items = NULL)
x |
list of character vectors |
n_lim |
n limit to keep items in result |
n_top |
only keep top n items in result |
sort_items |
function to sort the items, item frequency by default |
tibble
x <- 1:5 %>% purrr::map( ~ gen_char(to = "k", n = 5, random = TRUE, seed = .x) ) exist_matrix(x)
x <- 1:5 %>% purrr::map( ~ gen_char(to = "k", n = 5, random = TRUE, seed = .x) ) exist_matrix(x)
pileup the subexpressions which is atomic
expr_pileup(ex)
expr_pileup(ex)
ex |
expression |
the character vector of subexpressions
ex <- rlang::expr(a == 2 & b == 3 | !b & x + 2) expr_pileup(ex)
ex <- rlang::expr(a == 2 & b == 3 | !b & x + 2) expr_pileup(ex)
extract key and values for a character vector
extract_kv(v, sep = ": ", key_loc = 1, value_loc = 2)
extract_kv(v, sep = ": ", key_loc = 1, value_loc = 2)
v |
character vector |
sep |
separator between key and value |
key_loc |
key location |
value_loc |
value location |
a named character vector
extract_kv(c("x: 1", "y: 2"))
extract_kv(c("x: 1", "y: 2"))
fancy count to show an extended column
fancy_count(df, ..., ext = NULL, ext_fmt = "count", sort = FALSE, digits = 2)
fancy_count(df, ..., ext = NULL, ext_fmt = "count", sort = FALSE, digits = 2)
df |
tibble |
... |
other arguments from |
ext |
extended column |
ext_fmt |
|
sort |
sort by frequency or not |
digits |
if |
count tibble
fancy_count(mini_diamond, cut, ext = clarity) fancy_count(mini_diamond, cut, ext = clarity, ext_fmt = "ratio") fancy_count(mini_diamond, cut, ext = clarity, ext_fmt = "clean") fancy_count(mini_diamond, cut, ext = clarity, sort = FALSE) fancy_count(mini_diamond, cut, clarity, ext = id) %>% head(5)
fancy_count(mini_diamond, cut, ext = clarity) fancy_count(mini_diamond, cut, ext = clarity, ext_fmt = "ratio") fancy_count(mini_diamond, cut, ext = clarity, ext_fmt = "clean") fancy_count(mini_diamond, cut, ext = clarity, sort = FALSE) fancy_count(mini_diamond, cut, clarity, ext = id) %>% head(5)
fetch character from strings
fetch_char(s, index_list, na.rm = FALSE, collapse = FALSE)
fetch_char(s, index_list, na.rm = FALSE, collapse = FALSE)
s |
strings |
index_list |
index of nth character,
can be output of |
na.rm |
remove NA values from results or not |
collapse |
optional string used to combine the characters from a same string |
list of characters
fetch_char(rep("ABC", 3), list(1, 2, 3))
fetch_char(rep("ABC", 3), list(1, 2, 3))
apply tbflt on dplyr filter
filterC(.data, tbflt = NULL, .by = NULL, usecol = TRUE)
filterC(.data, tbflt = NULL, .by = NULL, usecol = TRUE)
.data |
tibble |
tbflt |
tbflt object |
.by |
group by, same as |
usecol |
if |
tibble
c1 <- tbflt(cut == "Fair") c2 <- tbflt(x > 8) mini_diamond %>% filterC(c1) %>% head(5) mini_diamond %>% filterC(c1 & c2) x <- 8 cond <- tbflt(y > x) # variable `x` not used because of column `x` in `mini_diamond` filterC(mini_diamond, cond) # will raise error because `x` is on the right side of `>` # filterC(mini_diamond, cond, usecol=FALSE) # if you know how to use `.env` or `!!`, forget argument `usecol`! cond <- tbflt(y > !!x) filterC(mini_diamond, cond) cond <- tbflt(y > .env$x) filterC(mini_diamond, cond)
c1 <- tbflt(cut == "Fair") c2 <- tbflt(x > 8) mini_diamond %>% filterC(c1) %>% head(5) mini_diamond %>% filterC(c1 & c2) x <- 8 cond <- tbflt(y > x) # variable `x` not used because of column `x` in `mini_diamond` filterC(mini_diamond, cond) # will raise error because `x` is on the right side of `>` # filterC(mini_diamond, cond, usecol=FALSE) # if you know how to use `.env` or `!!`, forget argument `usecol`! cond <- tbflt(y > !!x) filterC(mini_diamond, cond) cond <- tbflt(y > .env$x) filterC(mini_diamond, cond)
trans fixed string into regular expression string
fix_to_regex(p)
fix_to_regex(p)
p |
raw fixed pattern |
regex pattern
fix_to_regex("ABC|?(*)")
fix_to_regex("ABC|?(*)")
from float number to percent number
float_to_percent(x, digits = 2)
float_to_percent(x, digits = 2)
x |
number |
digits |
hold n digits after the decimal point |
percent character of x
float_to_percent(0.12)
float_to_percent(0.12)
farthest point sampling (FPS) for a vector
fps_vector(v, n, method = "round")
fps_vector(v, n, method = "round")
v |
vector |
n |
sample size |
method |
|
sampled vector
fps_vector(1:10, 4)
fps_vector(1:10, 4)
dplyr::full_join
while ignore the same columns in right tibblelike dplyr::full_join
while ignore the same columns in right tibble
full_expand(x, y, by = NULL)
full_expand(x, y, by = NULL)
x |
left tibble |
y |
right tibble |
by |
columns to join by |
tibble
tb1 <- head(mini_diamond, 4) tb2 <- tibble::tibble( id = c("id-2", "id-4", "id-5"), carat = 1:3, price = c(1000, 2000, 3000), newcol = c("new2", "new4", "new5") ) left_expand(tb1, tb2, by = "id") full_expand(tb1, tb2, by = "id") inner_expand(tb1, tb2, by = "id")
tb1 <- head(mini_diamond, 4) tb2 <- tibble::tibble( id = c("id-2", "id-4", "id-5"), carat = 1:3, price = c(1000, 2000, 3000), newcol = c("new2", "new4", "new5") ) left_expand(tb1, tb2, by = "id") full_expand(tb1, tb2, by = "id") inner_expand(tb1, tb2, by = "id")
generate characters
gen_char( from = NULL, to = NULL, n = NULL, random = FALSE, allow_dup = TRUE, add = NULL, seed = NULL )
gen_char( from = NULL, to = NULL, n = NULL, random = FALSE, allow_dup = TRUE, add = NULL, seed = NULL )
from |
left bound, lower case letter |
to |
right bound, lower case letter |
n |
number of characters to generate |
random |
random generation |
allow_dup |
allow duplication when random generation |
add |
add extra characters other than |
seed |
random seed |
generated characters
gen_char(from = "g", n = 5) gen_char(to = "g", n = 5) gen_char(from = "g", to = "j") gen_char(from = "t", n = 5, random = TRUE) gen_char( from = "x", n = 5, random = TRUE, allow_dup = FALSE, add = c("+", "-") )
gen_char(from = "g", n = 5) gen_char(to = "g", n = 5) gen_char(from = "g", to = "j") gen_char(from = "t", n = 5, random = TRUE) gen_char( from = "x", n = 5, random = TRUE, allow_dup = FALSE, add = c("+", "-") )
generate all combinations
gen_combn(x, n = 2)
gen_combn(x, n = 2)
x |
vector |
n |
numbers of element to combine |
all combinations
gen_combn(1:4, n = 2)
gen_combn(1:4, n = 2)
generate outliers from a series of number
gen_outlier( x, n, digits = 0, side = "both", lim = NULL, assign_n = NULL, only_out = TRUE )
gen_outlier( x, n, digits = 0, side = "both", lim = NULL, assign_n = NULL, only_out = TRUE )
x |
number vector |
n |
number of outliers to generate |
digits |
the digits of outliers |
side |
should be one of |
lim |
a two-length vector to assign the limitations of the outliers
if method is |
assign_n |
manually assign the number of low outliers or
high outliers when method is |
only_out |
only return outliers |
number vector of outliers
x <- seq(0, 100, 1) gen_outlier(x, 10) # generation limits gen_outlier(x, 10, lim = c(-80, 160)) # assign the low and high outliers gen_outlier(x, 10, lim = c(-80, 160), assign_n = c(0.1, 0.9)) # just generate low outliers gen_outlier(x, 10, side = "low") # return with raw vector gen_outlier(x, 10, only_out = FALSE)
x <- seq(0, 100, 1) gen_outlier(x, 10) # generation limits gen_outlier(x, 10, lim = c(-80, 160)) # assign the low and high outliers gen_outlier(x, 10, lim = c(-80, 160), assign_n = c(0.1, 0.9)) # just generate low outliers gen_outlier(x, 10, side = "low") # return with raw vector gen_outlier(x, 10, only_out = FALSE)
generate strings
gen_str(n = 1, len = 3, seed = NULL)
gen_str(n = 1, len = 3, seed = NULL)
n |
number of strings to generate |
len |
string length |
seed |
random seed |
string
gen_str(n = 2, len = 3)
gen_str(n = 2, len = 3)
generate tibbles
gen_tb(nrow = 3, ncol = 4, fill = "float", colnames = NULL, seed = NULL, ...)
gen_tb(nrow = 3, ncol = 4, fill = "float", colnames = NULL, seed = NULL, ...)
nrow |
number of rows |
ncol |
number of columns |
fill |
fill by, one of |
colnames |
names of columns |
seed |
random seed |
... |
parameters of |
tibble
gen_tb() gen_tb(fill = "str", nrow = 3, ncol = 4, len = 3)
gen_tb() gen_tb(fill = "str", nrow = 3, ncol = 4, len = 3)
generate ticks for a number vector
generate_ticks(x, expect_ticks = 10)
generate_ticks(x, expect_ticks = 10)
x |
number vector |
expect_ticks |
expected number of ticks, may be a little different from the result |
ticks number
generate_ticks(c(176, 198, 264))
generate_ticks(c(176, 198, 264))
geometric mean
geom_mean(x, na.rm = TRUE)
geom_mean(x, na.rm = TRUE)
x |
value |
na.rm |
remove NA or not |
geometric mean value
geom_mean(1, 9)
geom_mean(1, 9)
group character vector by a regex pattern
group_vector(x, pattern = "\\w")
group_vector(x, pattern = "\\w")
x |
character vector |
pattern |
regex pattern, '\w' as default |
list
v <- c( stringr::str_c("A", c(1, 2, 9, 10, 11, 12, 99, 101, 102)), stringr::str_c("B", c(1, 2, 9, 10, 21, 32, 99, 101, 102)) ) %>% sample() group_vector(v) group_vector(v, pattern = "\\w\\d") group_vector(v, pattern = "\\w(\\d)") # unmatched part will alse be stored group_vector(v, pattern = "\\d{2}")
v <- c( stringr::str_c("A", c(1, 2, 9, 10, 11, 12, 99, 101, 102)), stringr::str_c("B", c(1, 2, 9, 10, 21, 32, 99, 101, 102)) ) %>% sample() group_vector(v) group_vector(v, pattern = "\\w\\d") group_vector(v, pattern = "\\w(\\d)") # unmatched part will alse be stored group_vector(v, pattern = "\\d{2}")
separate numeric x into bins
hist_bins(x, bins = 10, lim = c(min(x), max(x)), breaks = NULL, sort = FALSE)
hist_bins(x, bins = 10, lim = c(min(x), max(x)), breaks = NULL, sort = FALSE)
x |
numeric vector |
bins |
bins number, defaults to 10 |
lim |
the min and max limits of bins, default as |
breaks |
assign breaks directly and will ignore |
sort |
sort the result tibble |
tibble
x <- dplyr::pull(mini_diamond, price, id) hist_bins(x, bins = 20)
x <- dplyr::pull(mini_diamond, price, id) hist_bins(x, bins = 20)
dplyr::inner_join
while ignore the same columns in right tibblelike dplyr::inner_join
while ignore the same columns in right tibble
inner_expand(x, y, by = NULL)
inner_expand(x, y, by = NULL)
x |
left tibble |
y |
right tibble |
by |
columns to join by |
tibble
tb1 <- head(mini_diamond, 4) tb2 <- tibble::tibble( id = c("id-2", "id-4", "id-5"), carat = 1:3, price = c(1000, 2000, 3000), newcol = c("new2", "new4", "new5") ) left_expand(tb1, tb2, by = "id") full_expand(tb1, tb2, by = "id") inner_expand(tb1, tb2, by = "id")
tb1 <- head(mini_diamond, 4) tb2 <- tibble::tibble( id = c("id-2", "id-4", "id-5"), carat = 1:3, price = c(1000, 2000, 3000), newcol = c("new2", "new4", "new5") ) left_expand(tb1, tb2, by = "id") full_expand(tb1, tb2, by = "id") inner_expand(tb1, tb2, by = "id")
trans numbers to a fixed integer digit length
int_digits(x, digits = 2, scale_factor = FALSE)
int_digits(x, digits = 2, scale_factor = FALSE)
x |
number |
digits |
integer digit length |
scale_factor |
return the scale_factor instead of value |
number
int_digits(0.0332, 1)
int_digits(0.0332, 1)
if a number only have zeros
is.zero(x)
is.zero(x)
x |
number |
all zero or not
is.zero(c("0.000", "0.102", NA))
is.zero(c("0.000", "0.102", NA))
dplyr::left_join
while ignore the same columns in right tibblelike dplyr::left_join
while ignore the same columns in right tibble
left_expand(x, y, by = NULL)
left_expand(x, y, by = NULL)
x |
left tibble |
y |
right tibble |
by |
columns to join by |
tibble
tb1 <- head(mini_diamond, 4) tb2 <- tibble::tibble( id = c("id-2", "id-4", "id-5"), carat = 1:3, price = c(1000, 2000, 3000), newcol = c("new2", "new4", "new5") ) left_expand(tb1, tb2, by = "id") full_expand(tb1, tb2, by = "id") inner_expand(tb1, tb2, by = "id")
tb1 <- head(mini_diamond, 4) tb2 <- tibble::tibble( id = c("id-2", "id-4", "id-5"), carat = 1:3, price = c(1000, 2000, 3000), newcol = c("new2", "new4", "new5") ) left_expand(tb1, tb2, by = "id") full_expand(tb1, tb2, by = "id") inner_expand(tb1, tb2, by = "id")
trans list into data.frame
list2df(x, rownames = TRUE, colnames = NULL, method = "row")
list2df(x, rownames = TRUE, colnames = NULL, method = "row")
x |
list |
rownames |
use rownames or not |
colnames |
colnames of the output |
method |
one of |
tibble
x <- list( c("a", "1"), c("b", "2"), c("c", "3") ) list2df(x, colnames = c("char", "num")) x <- list( c("a", "b", "c"), c("1", "2", "3") ) list2df(x, method = "col")
x <- list( c("a", "1"), c("b", "2"), c("c", "3") ) list2df(x, colnames = c("char", "num")) x <- list( c("a", "b", "c"), c("1", "2", "3") ) list2df(x, method = "col")
max depth of a list
max_depth(x)
max_depth(x)
x |
list |
number
max_depth(list(a = list(b = list(c = 1), d = 2, e = 3)))
max_depth(list(a = list(b = list(c = 1), d = 2, e = 3)))
melt a vector into single value
melt_vector(x, method = "first", invalid = NA)
melt_vector(x, method = "first", invalid = NA)
x |
vector |
method |
how to melt, should be one of
|
invalid |
invalid value to ignore, |
melted single value
melt_vector(c(NA, 2, 3), method = "first") melt_vector(c(NA, 2, 3), method = "sum") melt_vector(c(NA, 2, 3), method = ",") melt_vector(c(NA, 2, Inf), invalid = c(NA, Inf))
melt_vector(c(NA, 2, 3), method = "first") melt_vector(c(NA, 2, 3), method = "sum") melt_vector(c(NA, 2, 3), method = ",") melt_vector(c(NA, 2, Inf), invalid = c(NA, Inf))
Minimal tibble dataset adjusted from diamond
mini_diamond
mini_diamond
mini_diamond
A data frame with 100 rows and 7 columns:
unique id
2 category variables
4 continuous variables
...
adjusted from ggplot2
max-min normalization
mm_norm(x, low = 0, high = 1)
mm_norm(x, low = 0, high = 1)
x |
numeric vector |
low |
low limit of result, 0 as default |
high |
high limit of result, 1 as default |
normed vector
mm_norm(c(1, 3, 4))
mm_norm(c(1, 3, 4))
move selected rows to target location
move_row(df, rows, .after = FALSE, .before = FALSE)
move_row(df, rows, .after = FALSE, .before = FALSE)
df |
tibble |
rows |
selected rows indexes |
.after |
|
.before |
|
reordered tibble
move_row(mini_diamond, 3:5, .after = 8)
move_row(mini_diamond, 3:5, .after = 8)
the ticks near a number
near_ticks(x, level = NULL, div = 2)
near_ticks(x, level = NULL, div = 2)
x |
number |
level |
the level of ticks, such as 1, 10, 100, etc. |
div |
number of divisions |
number vector of ticks
near_ticks(3462, level = 10)
near_ticks(3462, level = 10)
the nearest ticks around a number
nearest_tick(x, side = "both", level = NULL, div = 2)
nearest_tick(x, side = "both", level = NULL, div = 2)
x |
number |
side |
default as 'both', can be 'both|left|right' |
level |
the level of ticks, such as 1, 10, 100, etc. |
div |
number of divisions |
nearest tick number
nearest_tick(3462, level = 10)
nearest_tick(3462, level = 10)
not NA
not.na(x)
not.na(x)
x |
value |
logical value
not.na(NA)
not.na(NA)
not NULL
not.null(x)
not.null(x)
x |
value |
logical value
not.null(NULL)
not.null(NULL)
wrapper of the functions to process number string with prefix and suffix
number_fun_wrapper( x, fun = ~.x, prefix_ext = NULL, suffix_ext = NULL, verbose = FALSE )
number_fun_wrapper( x, fun = ~.x, prefix_ext = NULL, suffix_ext = NULL, verbose = FALSE )
x |
number string vector with prefix and suffix |
fun |
process function |
prefix_ext |
prefix extension |
suffix_ext |
suffix extension |
verbose |
print more details |
processed number with prefix and suffix
number_fun_wrapper(">=2.134%", function(x) round(x, 2))
number_fun_wrapper(">=2.134%", function(x) round(x, 2))
slice a tibble by an ordered vector
ordered_slice(df, by, ordered_vector, na.rm = FALSE, dup.rm = FALSE)
ordered_slice(df, by, ordered_vector, na.rm = FALSE, dup.rm = FALSE)
df |
tibble |
by |
slice by this column, this value must has no duplicated value |
ordered_vector |
ordered vector |
na.rm |
remove NA or unknown values from ordered vector |
dup.rm |
remove duplication values from ordered vector |
sliced tibble
ordered_slice(mini_diamond, id, c("id-3", "id-2"))
ordered_slice(mini_diamond, id, c("id-3", "id-2"))
from percent number to float number
percent_to_float(x, digits = 2, to_double = FALSE)
percent_to_float(x, digits = 2, to_double = FALSE)
x |
percent number character |
digits |
hold n digits after the decimal point |
to_double |
use double output |
float character or double of x
percent_to_float("12%")
percent_to_float("12%")
pileup another logical vector on the TRUE values of first vector
pileup_logical(x, v)
pileup_logical(x, v)
x |
logical vector |
v |
another logical vector |
logical vector
# first vector have 2 TRUE value v1 <- c(TRUE, FALSE, TRUE) # the length of second vector should also be 2 v2 <- c(FALSE, TRUE) pileup_logical(v1, v2)
# first vector have 2 TRUE value v1 <- c(TRUE, FALSE, TRUE) # the length of second vector should also be 2 v2 <- c(FALSE, TRUE) pileup_logical(v1, v2)
information of packages
pkginfo(...)
pkginfo(...)
... |
case-insensitive package names |
baizer::pkginfo(dplyr)
baizer::pkginfo(dplyr)
load packages as a batch
pkglib(...)
pkglib(...)
... |
pkgs |
baizer::pkglib(dplyr, purrr)
baizer::pkglib(dplyr, purrr)
versions of packages
pkgver(...)
pkgver(...)
... |
case-insensitive package names |
baizer::pkgver(dplyr, purrr)
baizer::pkgver(dplyr, purrr)
split a positive integer number as a number vector
pos_int_split(x, n, method = "average")
pos_int_split(x, n, method = "average")
x |
positive integer |
n |
length of the output |
method |
should be one of |
number vector
pos_int_split(12, 3, method = "average") pos_int_split(12, 3, method = "random") pos_int_split(12, 3, method = c(1, 2, 3))
pos_int_split(12, 3, method = "average") pos_int_split(12, 3, method = "random") pos_int_split(12, 3, method = c(1, 2, 3))
wrapper of tibble::rownames_to_column
r2c(df, col = "")
r2c(df, col = "")
df |
tibble |
col |
a col name |
tibble
mini_diamond %>% c2r("id") %>% r2c("id")
mini_diamond %>% c2r("id") %>% r2c("id")
read excel file
read_excel(...)
read_excel(...)
... |
arguments of |
tibble
read multi-sheet excel file as a list of tibbles
read_excel_list(x)
read_excel_list(x)
x |
path |
list
read front matter markdown
read_fmmd(x, rm_blank_line = TRUE)
read_fmmd(x, rm_blank_line = TRUE)
x |
path |
rm_blank_line |
remove leading and trailing blank lines |
list
relevel a target column by another reference column
ref_level(x, col, ref)
ref_level(x, col, ref)
x |
tibble |
col |
target column |
ref |
reference column |
tibble
cut_level <- mini_diamond %>% dplyr::pull(cut) %>% unique() mini_diamond %>% dplyr::mutate(cut = factor(cut, cut_level)) %>% dplyr::mutate(cut0 = stringr::str_c(cut, "xxx")) %>% ref_level(cut0, cut)
cut_level <- mini_diamond %>% dplyr::pull(cut) %>% unique() mini_diamond %>% dplyr::mutate(cut = factor(cut, cut_level)) %>% dplyr::mutate(cut0 = stringr::str_c(cut, "xxx")) %>% ref_level(cut0, cut)
join the matched parts into string
reg_join(x, pattern, sep = "")
reg_join(x, pattern, sep = "")
x |
character |
pattern |
regex pattern |
sep |
separator |
character
reg_join(c("A_12.B", "C_3.23:2"), "[A-Za-z]+") reg_join(c("A_12.B", "C_3.23:2"), "\\w+") reg_join(c("A_12.B", "C_3.23:2"), "\\d+", sep = ",") reg_join(c("A_12.B", "C_3.23:2"), "\\d", sep = ",")
reg_join(c("A_12.B", "C_3.23:2"), "[A-Za-z]+") reg_join(c("A_12.B", "C_3.23:2"), "\\w+") reg_join(c("A_12.B", "C_3.23:2"), "\\d+", sep = ",") reg_join(c("A_12.B", "C_3.23:2"), "\\d", sep = ",")
regex match
reg_match(x, pattern, group = 1)
reg_match(x, pattern, group = 1)
x |
vector |
pattern |
regex pattern |
group |
regex group, 1 as default. when group=-1, return full matched tibble |
vector or tibble
v <- stringr::str_c("id", 1:3, c("A", "B", "C")) reg_match(v, "id(\\d+)(\\w)") reg_match(v, "id(\\d+)(\\w)", group = 2) reg_match(v, "id(\\d+)(\\w)", group = -1)
v <- stringr::str_c("id", 1:3, c("A", "B", "C")) reg_match(v, "id(\\d+)(\\w)") reg_match(v, "id(\\d+)(\\w)", group = 2) reg_match(v, "id(\\d+)(\\w)", group = -1)
remove columns by the ratio of an identical single value (NA supported)
remove_monocol(df, max_ratio = 1)
remove_monocol(df, max_ratio = 1)
df |
tibble |
max_ratio |
the max single value ratio to keep this column, default is 1 |
tibble
# remove_monocol(df)
# remove_monocol(df)
remove columns by the ratio of NA
remove_nacol(df, max_ratio = 1)
remove_nacol(df, max_ratio = 1)
df |
tibble |
max_ratio |
the max NA ratio to keep this column, default is 1 have NA |
tibble
# remove_nacol(df)
# remove_nacol(df)
remove rows by the ratio of NA
remove_narow(df, ..., max_ratio = 1)
remove_narow(df, ..., max_ratio = 1)
df |
tibble |
... |
only remove rows according to these columns,
refer to |
max_ratio |
the max NA ratio to keep this row, default is 1 have NA |
tibble
# remove_narow(df)
# remove_narow(df)
remove outliers and NA
remove_outliers(df, col, .by = NULL)
remove_outliers(df, col, .by = NULL)
df |
tibble |
col |
columns to remove outliers |
.by |
group by |
tibble
remove_outliers(mini_diamond, price)
remove_outliers(mini_diamond, price)
replace the items of one object by another
replace_item(x, y, keep_extra = FALSE)
replace_item(x, y, keep_extra = FALSE)
x |
number, character or list |
y |
another object, the class of y should be same as x |
keep_extra |
whether keep extra items in y |
replaced object
x <- list(A = 1, B = 3) y <- list(A = 9, C = 10) replace_item(x, y) replace_item(x, y, keep_extra = TRUE)
x <- list(A = 1, B = 3) y <- list(A = 9, C = 10) replace_item(x, y) replace_item(x, y, keep_extra = TRUE)
rewrite the NA values in a tibble by another tibble
rewrite_na(x, y, by)
rewrite_na(x, y, by)
x |
raw tibble |
y |
replace reference tibble |
by |
columns to align the tibbles |
tibble
tb1 <- tibble::tibble( id = c("id-1", "id-2", "id-3", "id-4"), group = c("a", "b", "a", "b"), price = c(0, -200, 3000, NA), type = c("large", "none", "small", "none") ) tb2 <- tibble::tibble( id = c("id-1", "id-2", "id-3", "id-4"), group = c("a", "b", "a", "b"), price = c(1, 2, 3, 4), type = c("l", "x", "x", "m") ) rewrite_na(tb1, tb2, by = c("id", "group"))
tb1 <- tibble::tibble( id = c("id-1", "id-2", "id-3", "id-4"), group = c("a", "b", "a", "b"), price = c(0, -200, 3000, NA), type = c("large", "none", "small", "none") ) tb2 <- tibble::tibble( id = c("id-1", "id-2", "id-3", "id-4"), group = c("a", "b", "a", "b"), price = c(1, 2, 3, 4), type = c("l", "x", "x", "m") ) rewrite_na(tb1, tb2, by = c("id", "group"))
trans range character into seq characters
rng2seq(x, sep = "-")
rng2seq(x, sep = "-")
x |
range character |
sep |
range separator |
seq characters
rng2seq(c("1-5", "2"))
rng2seq(c("1-5", "2"))
from float number to fixed digits character
round_string(x, digits = 2)
round_string(x, digits = 2)
x |
number |
digits |
hold n digits after the decimal point |
character
round_string(1.1, 2)
round_string(1.1, 2)
add #' into each line of codes for roxygen examples
roxygen_fmt(x)
roxygen_fmt(x)
x |
codes |
roxygen_fmt( " code line1 code line2 " )
roxygen_fmt( " code line1 code line2 " )
the index of identical character
same_index(s1, s2, nth = NULL, ignore_case = FALSE)
same_index(s1, s2, nth = NULL, ignore_case = FALSE)
s1 |
string1 |
s2 |
string2 |
nth |
just return nth index |
ignore_case |
ignore upper or lower cases |
list of identical character indices
same_index("AAAA", "ABBA")
same_index("AAAA", "ABBA")
dataframe rows seriation, which will reorder the rows in a better pattern
seriate_df(x)
seriate_df(x)
x |
dataframe |
seriated dataframe
x <- mini_diamond %>% dplyr::select(id, dplyr::where(is.numeric)) %>% dplyr::mutate( dplyr::across( dplyr::where(is.numeric), ~ round(.x / max(.x), 4) ) ) %>% c2r("id") seriate_df(x)
x <- mini_diamond %>% dplyr::select(id, dplyr::where(is.numeric)) %>% dplyr::mutate( dplyr::across( dplyr::where(is.numeric), ~ round(.x / max(.x), 4) ) ) %>% c2r("id") seriate_df(x)
connection parameters to remote server via sftp
sftp_connect( server = "localhost", port = 22, user = NULL, password = NULL, wd = "~" )
sftp_connect( server = "localhost", port = 22, user = NULL, password = NULL, wd = "~" )
server |
remote server |
port |
SSH port, 22 as default |
user |
username |
password |
password |
wd |
workdir |
sftp_connection object
# sftp_con <- sftp_connect(server='remote_host', port=22, # user='username', password = "password", wd='~')
# sftp_con <- sftp_connect(server='remote_host', port=22, # user='username', password = "password", wd='~')
download file from remote server via sftp
sftp_download(sftp_con, path = NULL, to = basename(path))
sftp_download(sftp_con, path = NULL, to = basename(path))
sftp_con |
sftp_connection created by sftp_connect() |
path |
remote file path |
to |
local target path |
# sftp_download(sftp_con, # path=c('t1.txt', 't2.txt'), # to=c('path1.txt', 'path2.txt')
# sftp_download(sftp_con, # path=c('t1.txt', 't2.txt'), # to=c('path1.txt', 'path2.txt')
list files from remote server via sftp
sftp_ls(sftp_con, path = NULL, all = FALSE)
sftp_ls(sftp_con, path = NULL, all = FALSE)
sftp_con |
sftp_connection created by sftp_connect() |
path |
remote directory path |
all |
list hidden files or not |
files in the dir
# sftp_ls(sftp_con, 'your/dir')
# sftp_ls(sftp_con, 'your/dir')
signif while use ceiling
signif_ceiling(x, digits = 2)
signif_ceiling(x, digits = 2)
x |
number |
digits |
digits |
number
signif_ceiling(3.11, 2)
signif_ceiling(3.11, 2)
signif while use floor
signif_floor(x, digits = 2)
signif_floor(x, digits = 2)
x |
number |
digits |
digits |
number
signif_floor(3.19, 2)
signif_floor(3.19, 2)
signif or round string depend on the character length
signif_round_string( x, digits = 2, format = "short", full_large = TRUE, full_small = FALSE )
signif_round_string( x, digits = 2, format = "short", full_large = TRUE, full_small = FALSE )
x |
number |
digits |
signif or round digits |
format |
short or long |
full_large |
keep full digits for large number |
full_small |
keep full digits for small number |
signif or round strings
signif_round_string(1.214, 2)
signif_round_string(1.214, 2)
from float number to fixed significant digits character
signif_string(x, digits = 2)
signif_string(x, digits = 2)
x |
number |
digits |
hold n significant digits |
character
signif_string(1.1, 2)
signif_string(1.1, 2)
slice character vector
slice_char(x, from = x[1], to = x[length(x)], unique = FALSE)
slice_char(x, from = x[1], to = x[length(x)], unique = FALSE)
x |
character vector |
from |
from |
to |
to |
unique |
remove the duplicated boundary characters |
sliced vector
x <- c("A", "B", "C", "D", "E") slice_char(x, "A", "D") slice_char(x, "D", "A") x <- c("A", "B", "C", "C", "A", "D", "D", "E", "A") slice_char(x, "B", "E") # duplicated element as boundary will throw an error # slice_char(x, 'A', 'E') # unique=TRUE to remove the duplicated boundary characters slice_char(x, "A", "E", unique = TRUE)
x <- c("A", "B", "C", "D", "E") slice_char(x, "A", "D") slice_char(x, "D", "A") x <- c("A", "B", "C", "C", "A", "D", "D", "E", "A") slice_char(x, "B", "E") # duplicated element as boundary will throw an error # slice_char(x, 'A', 'E') # unique=TRUE to remove the duplicated boundary characters slice_char(x, "A", "E", unique = TRUE)
sort by a function
sortf(x, func, group_pattern = NULL)
sortf(x, func, group_pattern = NULL)
x |
vector |
func |
a function used by the sort |
group_pattern |
a regex pattern to group by, only available if x is a character vector |
vector
sortf(c(-2, 1, 3), abs) v <- stringr::str_c("id", c(1, 2, 9, 10, 11, 12, 99, 101, 102)) %>% sample() sortf(v, function(x) reg_match(x, "\\d+") %>% as.double()) sortf(v, ~ reg_match(.x, "\\d+") %>% as.double()) v <- c( stringr::str_c("A", c(1, 2, 9, 10, 11, 12, 99, 101, 102)), stringr::str_c("B", c(1, 2, 9, 10, 21, 32, 99, 101, 102)) ) %>% sample() sortf(v, ~ reg_match(.x, "\\d+") %>% as.double(), group_pattern = "\\w")
sortf(c(-2, 1, 3), abs) v <- stringr::str_c("id", c(1, 2, 9, 10, 11, 12, 99, 101, 102)) %>% sample() sortf(v, function(x) reg_match(x, "\\d+") %>% as.double()) sortf(v, ~ reg_match(.x, "\\d+") %>% as.double()) v <- c( stringr::str_c("A", c(1, 2, 9, 10, 11, 12, 99, 101, 102)), stringr::str_c("B", c(1, 2, 9, 10, 21, 32, 99, 101, 102)) ) %>% sample() sortf(v, ~ reg_match(.x, "\\d+") %>% as.double(), group_pattern = "\\w")
split a column and return a longer tibble
split_column(df, name_col, value_col, sep = ",")
split_column(df, name_col, value_col, sep = ",")
df |
tibble |
name_col |
repeat this as name column |
value_col |
expand by this value column |
sep |
separator in the string |
expanded tibble
fancy_count(mini_diamond, cut, ext = clarity) %>% split_column(name_col = cut, value_col = clarity)
fancy_count(mini_diamond, cut, ext = clarity) %>% split_column(name_col = cut, value_col = clarity)
split a path into ancestor paths recursively
split_path(path)
split_path(path)
path |
path to split |
character vectors of ancestor paths
split_path("/home/someone/a/test/path.txt")
split_path("/home/someone/a/test/path.txt")
split vector into list
split_vector(vector, breaks, bounds = "(]")
split_vector(vector, breaks, bounds = "(]")
vector |
vector |
breaks |
split breaks |
bounds |
"(]" as default, can also be "[), []" |
list
split_vector(1:10, c(3, 7)) split_vector(stringr::str_split("ABCDEFGHIJ", "") %>% unlist(), c(3, 7), bounds = "[)" )
split_vector(1:10, c(3, 7)) split_vector(stringr::str_split("ABCDEFGHIJ", "") %>% unlist(), c(3, 7), bounds = "[)" )
fold change calculation which returns a extensible tibble
stat_fc( df, y, x, method = "mean", .by = NULL, rev_div = FALSE, digits = 2, fc_fmt = "short", suffix = "x" )
stat_fc( df, y, x, method = "mean", .by = NULL, rev_div = FALSE, digits = 2, fc_fmt = "short", suffix = "x" )
df |
tibble |
y |
value |
x |
sample test group |
method |
|
.by |
super-group |
rev_div |
reverse division |
digits |
fold change digits |
fc_fmt |
fold change format, one of short, signif, round |
suffix |
suffix of fold change, |
fold change result tibble
stat_fc(mini_diamond, y = price, x = cut, .by = clarity)
stat_fc(mini_diamond, y = price, x = cut, .by = clarity)
calculate phi coefficient of two binary variables
stat_phi(x)
stat_phi(x)
x |
2x2 matrix or dataframe |
phi coefficient
data <- matrix(c(10, 8, 14, 18), nrow = 2) stat_phi(data)
data <- matrix(c(10, 8, 14, 18), nrow = 2) stat_phi(data)
statistical test which returns a extensible tibble
stat_test( df, y, x, .by = NULL, trans = "identity", paired = FALSE, paired_by = NULL, alternative = "two.sided", exclude_func = NULL, method = "wilcoxon", ns_symbol = "NS", digits = 2 )
stat_test( df, y, x, .by = NULL, trans = "identity", paired = FALSE, paired_by = NULL, alternative = "two.sided", exclude_func = NULL, method = "wilcoxon", ns_symbol = "NS", digits = 2 )
df |
tibble |
y |
value |
x |
sample test group |
.by |
super-group |
trans |
scale transformation |
paired |
paired samples or not |
paired_by |
a column for pair |
alternative |
one of "two.sided" (default), "greater" or "less" |
exclude_func |
a function has two arguments and return bool value, used if paired=TRUE and will keep the comparation pairs which return TRUE by this function. |
method |
test method, 'wilcoxon' as default, one of |
ns_symbol |
symbol of nonsignificant, 'NS' as default |
digits |
significant figure digits of p value If the data pair of a single test returns TRUE, then exclude this pair |
test result tibble
stat_test(mini_diamond, y = price, x = cut, .by = clarity)
stat_test(mini_diamond, y = price, x = cut, .by = clarity)
replace specific characters in a string by their locations
str_replace_loc(x, start = 1, end = nchar(x), replacement = " ")
str_replace_loc(x, start = 1, end = nchar(x), replacement = " ")
x |
string |
start |
start |
end |
end |
replacement |
replacement |
replaced string
str_replace_loc("abcde", 1, 3, "A")
str_replace_loc("abcde", 1, 3, "A")
swap the names and values of a vector
swap_vecname(x)
swap_vecname(x)
x |
vector without duplicated values |
swapped vector
v <- c("a" = "A", "b" = "B", "c" = "C") swap_vecname(v)
v <- c("a" = "A", "b" = "B", "c" = "C") swap_vecname(v)
tbflt()
can save a series of filter conditions, and support
logical operating among conditions
tbflt(x = expression(), .env = NULL)
tbflt(x = expression(), .env = NULL)
x |
any expression |
.env |
environment |
tbflt
c1 <- tbflt(cut == "Fair") c2 <- tbflt(x > 8) !c1 c1 | c2 c1 & c2
c1 <- tbflt(cut == "Fair") c2 <- tbflt(x > 8) !c1 c1 | c2 c1 & c2
transpose a dataframe
tdf(x, colnames = NULL)
tdf(x, colnames = NULL)
x |
dataframe |
colnames |
column names of the transposed dataframe |
dataframe
x <- c2r(mini_diamond, "id") tdf(x)
x <- c2r(mini_diamond, "id") tdf(x)
return top n items with highest frequency
top_item(x, n = 1)
top_item(x, n = 1)
x |
character |
n |
top n |
character
top_item(c("a", "b", "c", "b"))
top_item(c("a", "b", "c", "b"))
only keep unique vector values and its names
uniq(x)
uniq(x)
x |
vector |
vector
x <- c(a = 1, b = 2, c = 3, b = 2, a = 1) uniq(x)
x <- c(a = 1, b = 2, c = 3, b = 2, a = 1) uniq(x)
count unique values in each column
uniq_in_cols(x)
uniq_in_cols(x)
x |
tibble |
tibble
uniq_in_cols(mini_diamond)
uniq_in_cols(mini_diamond)
write a tibble into an excel file
write_excel(df, filename, sheetname = NULL, creator = "")
write_excel(df, filename, sheetname = NULL, creator = "")
df |
tibble or a list of tibbles |
filename |
the output filename |
sheetname |
the names of sheets. If not given, will use 'sheet1', or the names of list |
creator |
creator |
return status
# write_excel(mini_diamond, "mini_diamond.xlsx")
# write_excel(mini_diamond, "mini_diamond.xlsx")