diff --git a/.Rbuildignore b/.Rbuildignore index 8f27db2fa..e05151613 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,8 +17,9 @@ ^R/secure.global.ranking.md$ ^_pkgdown\.yml$ ^docs$ -^dsBase_6.3.2.tar.gz$ -^dsBase_6.3.2-permissive.tar.gz$ -^dsDanger_6.3.1.tar.gz$ +^dsBase_6.3.4.tar.gz$ +^dsBase_6.3.4-permissive.tar.gz$ +^dsDanger_6.3.4.tar.gz$ ^\.circleci$ ^\.circleci/config\.yml$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 000000000..2d19fc766 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 000000000..bfc9f4db3 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/DESCRIPTION b/DESCRIPTION index 862e5a060..cc110b7f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,23 +1,27 @@ Package: dsBaseClient -Title: DataSHIELD Client Functions -Version: 6.3.3 -Description: Base DataSHIELD functions for the client side. DataSHIELD is a software package which allows - you to do non-disclosive federated analysis on sensitive data. DataSHIELD analytic functions have +Title: 'DataSHIELD' Client Side Base Functions +Version: 6.3.4 +Description: Base 'DataSHIELD' functions for the client side. 'DataSHIELD' is a software package which allows + you to do non-disclosive federated analysis on sensitive data. 'DataSHIELD' analytic functions have been designed to only share non disclosive summary statistics, with built in automated output checking based on statistical disclosure control. With data sites setting the threshold values for - the automated output checks. + the automated output checks. For more details, see 'citation("dsBaseClient")'. Authors@R: c(person(given = "Paul", family = "Burton", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0001-5799-9634")), person(given = "Rebecca", family = "Wilson", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0003-2294-593X")), person(given = "Olly", family = "Butters", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0003-0354-8461")), person(given = "Patricia", family = "Ryser-Welch", - role = c("aut")), + role = c("aut"), + comment = c(ORCID = "0000-0002-0070-0264")), person(given = "Alex", family = "Westerberg", role = c("aut")), @@ -32,11 +36,26 @@ Authors@R: c(person(given = "Paul", family = "Avraam", role = c("aut"), comment = c(ORCID = "0000-0001-8908-2441")), + person(given = "Demetris", + family = "Avraam", + role = c("aut"), + comment = c(ORCID = "0000-0001-8908-2441")), person(given = "Yannick", family = "Marcon", role = c("aut"), email = "yannick.marcon@obiba.org", comment = c(ORCID = "0000-0003-0138-2023")), + person(given = "Tom", + family = "Bishop", + role = c("aut")), + person(given = "Amadou", + family = "Gaye", + role = c("aut"), + comment = c(ORCID = "0000-0002-1180-2792")), + person(given = "Xavier", + family = "Escribà-Montagut", + role = c("aut"), + comment = c(ORCID = "0000-0003-2888-8948")), person(given = "Stuart", family = "Wheater", role = c("aut", "cre"), @@ -58,6 +77,7 @@ Imports: Suggests: lme4, httr, + spelling, tibble, testthat, e1071, @@ -65,5 +85,6 @@ Suggests: DSOpal, DSMolgenisArmadillo, DSLite -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Encoding: UTF-8 +Language: en-GB diff --git a/R/ds.asNumeric.R b/R/ds.asNumeric.R index 7b4da435e..3e2b445fa 100644 --- a/R/ds.asNumeric.R +++ b/R/ds.asNumeric.R @@ -5,7 +5,7 @@ #' @details This function is based on the native R function \code{as.numeric}. #' However, it behaves differently with some specific classes of variables. For example, if the input #' object is of class factor, it first converts its values into characters and then convert those to -#' numerics. This behavior is important for the case where the input object is of class factor having +#' numerics. This behaviour is important for the case where the input object is of class factor having #' numbers as levels. In that case, the native R #' \code{as.numeric} function returns the underlying level codes and not the values as numbers. #' For example \code{as.numeric} in R converts the factor vector: \cr diff --git a/R/ds.corTest.R b/R/ds.corTest.R index 38fffceb9..3c9e42a81 100644 --- a/R/ds.corTest.R +++ b/R/ds.corTest.R @@ -10,7 +10,7 @@ #' used for the test. One of "pearson", "kendall", or "spearman", can be abbreviated. #' Default is set to "pearson". #' @param exact a logical indicating whether an exact p-value should be computed. Used for -#' Kendall's tau and Spearman's rho. See ‘Details’ of R stats function \code{cor.test} for +#' Kendall's tau and Spearman's rho. See \emph{Details} of R stats function \code{cor.test} for #' the meaning of NULL (the default). #' @param conf.level confidence level for the returned confidence interval. Currently #' only used for the Pearson product moment correlation coefficient if there are at least diff --git a/R/ds.dataFrameSubset.R b/R/ds.dataFrameSubset.R index 46878520b..1c5ff6a00 100644 --- a/R/ds.dataFrameSubset.R +++ b/R/ds.dataFrameSubset.R @@ -12,7 +12,7 @@ #' #' Server functions called: \code{dataFrameSubsetDS1} and \code{dataFrameSubsetDS2} #' -#' @param df.name a character string providing the name of the data frame to be subseted. +#' @param df.name a character string providing the name of the data frame to be subset. #' @param V1.name A character string specifying the name of the vector #' to which the Boolean operator is to be applied to define the subset. #' For more information see details. diff --git a/R/ds.dmtC2S.R b/R/ds.dmtC2S.R index 1f91efbfe..ee2e4be67 100644 --- a/R/ds.dmtC2S.R +++ b/R/ds.dmtC2S.R @@ -29,7 +29,7 @@ #' wish to change the connections you wish to use by default the call #' datashield.connections_default('opals.a') will set 'default.connections' #' to be 'opals.a' and so in the absence of specific instructions to the contrary -#' (e.g. by specifiying a particular dataset to be used via the +#' (e.g. by specifying a particular dataset to be used via the #' argument) all subsequent function calls will be to the datasets held in opals.a. #' If the argument is specified, it should be set without #' inverted commas: e.g. datasources=opals.a or datasources=default.connections. diff --git a/R/ds.elspline.R b/R/ds.elspline.R index c4a2bbdd8..01ddca05b 100644 --- a/R/ds.elspline.R +++ b/R/ds.elspline.R @@ -13,7 +13,7 @@ #' @param x the name of the input numeric variable #' @param n integer greater than 2, knots are computed such that they cut n equally-spaced #' intervals along the range of x -#' @param marginal logical, how to parametrize the spline, see Details +#' @param marginal logical, how to parametrise the spline, see Details #' @param names character, vector of names for constructed variables #' @param newobj a character string that provides the name for the output #' variable that is stored on the data servers. Default \code{elspline.newobj}. diff --git a/R/ds.forestplot.R b/R/ds.forestplot.R index 3b4873875..60227913c 100644 --- a/R/ds.forestplot.R +++ b/R/ds.forestplot.R @@ -1,10 +1,10 @@ #' @title Forestplot for SLMA models -#' @description Draws a foresplot of the coefficients for Study-Level Meta-Analysis performed with +#' @description Draws a forestplot of the coefficients for Study-Level Meta-Analysis performed with #' DataSHIELD #' -#' @param mod \code{list} List outputed by any of the SLMA models of DataSHIELD (\code{ds.glmerSLMA}, +#' @param mod \code{list} List outputted by any of the SLMA models of DataSHIELD (\code{ds.glmerSLMA}, #' \code{ds.glmSLMA}, \code{ds.lmerSLMA}) -#' @param variable \code{character} (default \code{NULL}) Variable to meta-analyze and visualize, by setting this +#' @param variable \code{character} (default \code{NULL}) Variable to meta-analyse and visualise, by setting this #' argument to \code{NULL} (default) the first independent variable will be used. #' @param method \code{character} (Default \code{"ML"}) Method to estimate the between study variance. #' See details from \code{?meta::metagen} for the different options. diff --git a/R/ds.glmPredict.R b/R/ds.glmPredict.R index ff0d7647e..96dfc792c 100644 --- a/R/ds.glmPredict.R +++ b/R/ds.glmPredict.R @@ -91,7 +91,7 @@ #' wish to change the connections you wish to use by default the call #' datashield.connections_default('opals.a') will set 'default.connections' #' to be 'opals.a' and so in the absence of specific instructions to the contrary -#' (e.g. by specifiying a particular dataset to be used via the +#' (e.g. by specifying a particular dataset to be used via the #' argument) all subsequent function calls will be to the datasets held in opals.a. #' If the argument is specified, it should be set without #' inverted commas: e.g. datasources=opals.a or datasources=default.connections. @@ -116,7 +116,7 @@ #' the name - if one was specified - of the dataframe being used as the basis for predictions, #' the output.type specified ('link', 'response' or 'terms'), the value of the #' dispersion parameter if one had been specified and the residual scale parameter (which is -#' multipled by sqrt(dispersion parameter) if one has been set). If output.type = 'terms', +#' multiplied by sqrt(dispersion parameter) if one has been set). If output.type = 'terms', #' the summary statistics for the fit and se.fit vectors are replaced by equivalent #' summary statistics for each column in fit and se.fit matrices which each have k columns #' if k terms are being summarised. diff --git a/R/ds.glmSLMA.R b/R/ds.glmSLMA.R index 7f3454bc0..9b46a6a40 100644 --- a/R/ds.glmSLMA.R +++ b/R/ds.glmSLMA.R @@ -263,7 +263,7 @@ #' them into their meta-analysis package of choice. #' @return \code{is.object.created} and \code{validity.check} are standard #' items returned by an assign function when the designated newobj appears to have -#' been successfuly created on the serverside at each study. This output is +#' been successfully created on the serverside at each study. This output is #' produced specifically by the assign function \code{glmSLMADS.assign} that writes #' out the glm object on the serverside #' @author Paul Burton, for DataSHIELD Development Team 07/07/20 diff --git a/R/ds.glmSummary.R b/R/ds.glmSummary.R index 38df81637..5629d937f 100644 --- a/R/ds.glmSummary.R +++ b/R/ds.glmSummary.R @@ -48,7 +48,7 @@ #' wish to change the connections you wish to use by default the call #' datashield.connections_default('opals.a') will set 'default.connections' #' to be 'opals.a' and so in the absence of specific instructions to the contrary -#' (e.g. by specifiying a particular dataset to be used via the +#' (e.g. by specifying a particular dataset to be used via the #' argument) all subsequent function calls will be to the datasets held in opals.a. #' If the argument is specified, it should be set without #' inverted commas: e.g. datasources=opals.a or datasources=default.connections. diff --git a/R/ds.igb_standards.R b/R/ds.igb_standards.R index fb7953f1e..89498a0ca 100644 --- a/R/ds.igb_standards.R +++ b/R/ds.igb_standards.R @@ -20,11 +20,20 @@ #' used: see \code{\link[DSI]{datashield.connections_default}}. #' @note For gestational ages between 24 and 33 weeks, the INTERGROWTH very early preterm #' standard is used. -#' @references International standards for newborn weight, length, and head circumference by -#' gestational age and sex: the Newborn Cross-Sectional Study of the INTERGROWTH-21st Project -#' Villar, José et al. The Lancet, Volume 384, Issue 9946, 857-868 -#' INTERGROWTH-21st very preterm size at birth reference charts. Lancet 2016 -#' doi.org/10.1016/S0140-6736(16) 00384-6. Villar, José et al. +#' @references +#' \itemize{ +#' \item Villar, J., Ismail, L.C., Victora, C.G., Ohuma, E.O., Bertino, E., +#' Altman, D.G., Lambert, A., Papageorghiou, A.T., Carvalho, M., Jaffer, Y.A., +#' Gravett, M.G., Purwar, M., Frederick, I.O., Noble, A.J., Pang, R., Barros, +#' F.C., Chumlea, C., Bhutta, Z.A., Kennedy, S.H., 2014. International +#' standards for newborn weight, length, and head circumference by gestational +#' age and sex: the Newborn Cross-Sectional Study of the INTERGROWTH-21st +#' Project. The Lancet 384, 857--868. https://doi.org/10.1016/S0140-6736(14)60932-6 +#' \item Villar, J., Giuliani, F., Fenton, T.R., Ohuma, E.O., Ismail, L.C., +#' Kennedy, S.H., 2016. INTERGROWTH-21st very preterm size at birth reference +#' charts. The Lancet 387, 844--845. https://doi.org/10.1016/S0140-6736(16)00384-6 +#' +#' } #' @return assigns the converted measurement as a new object on the server-side #' @author Demetris Avraam for DataSHIELD Development Team #' @export diff --git a/R/ds.ls.R b/R/ds.ls.R index 415c93f4d..2f65a3c8f 100644 --- a/R/ds.ls.R +++ b/R/ds.ls.R @@ -41,7 +41,7 @@ #' Server function called: \code{lsDS}. #' #' @param search.filter character string (potentially including \code{*} symbol) specifying the filter -#' for the object name that you want to find in the enviroment. For more information see \strong{Details}. +#' for the object name that you want to find in the environment. For more information see \strong{Details}. #' @param env.to.search an integer (e.g. in \code{2} or \code{2L} format) specifying the position #' in the search path of the environment to be explored. \code{1L} is the current active analytic #' environment on the server-side and is the default value of \code{env.to.search}. diff --git a/R/ds.lspline.R b/R/ds.lspline.R index 9aca23974..e044005cb 100644 --- a/R/ds.lspline.R +++ b/R/ds.lspline.R @@ -10,7 +10,7 @@ #' in slope as compared to the previous segment. #' @param x the name of the input numeric variable #' @param knots numeric vector of knot positions -#' @param marginal logical, how to parametrize the spline, see Details +#' @param marginal logical, how to parametrise the spline, see Details #' @param names character, vector of names for constructed variables #' @param newobj a character string that provides the name for the output #' variable that is stored on the data servers. Default \code{lspline.newobj}. diff --git a/R/ds.matrix.R b/R/ds.matrix.R index 6ee9bdfd5..b90356c59 100644 --- a/R/ds.matrix.R +++ b/R/ds.matrix.R @@ -27,7 +27,7 @@ #' #' @param mdata a character string specifying #' the name of a server-side scalar or vector. Also, a numeric value representing a -#' scalar specified from the client-side can be speficied. +#' scalar specified from the client-side can be specified #' Zeros, negative values and NAs are all allowed. #' For more information see \strong{Details}. #' @param from a character string specifying the source and nature of \code{mdata}. diff --git a/R/ds.matrixDiag.R b/R/ds.matrixDiag.R index f6edcbcd5..8c8ca877a 100644 --- a/R/ds.matrixDiag.R +++ b/R/ds.matrixDiag.R @@ -37,7 +37,7 @@ #' Server function called: \code{matrixDiagDS} #' @param x1 a character string specifying #' the name of a server-side scalar or vector. Also, a numeric value or vector -#' specified from the client-side can be speficied. This argument depends +#' specified from the client-side can be specified. This argument depends #' on the value specified in \code{aim}. #' For more information see \strong{Details}. #' @param aim a character string specifying the behaviour of the function. diff --git a/R/ds.ns.R b/R/ds.ns.R index 9c961fcf1..e98643d4d 100644 --- a/R/ds.ns.R +++ b/R/ds.ns.R @@ -8,7 +8,7 @@ #' sequence of interior knots, and the natural boundary conditions. These enforce the constraint #' that the function is linear beyond the boundary knots, which can either be supplied or default #' to the extremes of the data. -#' A primary use is in modeling formula to directly specify a natural spline term in a model. +#' A primary use is in modelling formula to directly specify a natural spline term in a model. #' @param x the predictor variable. Missing values are allowed. #' @param df degrees of freedom. One can supply df rather than knots; ns() then chooses #' df - 1 - intercept knots at suitably chosen quantiles of x (which will ignore missing values). diff --git a/R/ds.qlspline.R b/R/ds.qlspline.R index 45a3e2d33..9839d9843 100644 --- a/R/ds.qlspline.R +++ b/R/ds.qlspline.R @@ -18,7 +18,7 @@ #' intervals along x or a vector of numbers in (0; 1) specifying the quantiles explicitly. #' @param na.rm logical, whether NA should be removed when calculating quantiles, passed #' to na.rm of quantile. Default set to TRUE -#' @param marginal logical, how to parametrize the spline, see Details +#' @param marginal logical, how to parametrise the spline, see Details #' @param names character, vector of names for constructed variables #' @param newobj a character string that provides the name for the output #' variable that is stored on the data servers. Default \code{qlspline.newobj}. diff --git a/R/ds.ranksSecure.R b/R/ds.ranksSecure.R index 5a3affcaf..1d9d98ed9 100644 --- a/R/ds.ranksSecure.R +++ b/R/ds.ranksSecure.R @@ -1,7 +1,7 @@ # ds.ranksSecure #' @title Secure ranking of a vector across all sources #' @description Securely generate the ranks of a numeric vector and estimate -#' true qlobal quantiles across all data sources simultaneously +#' true global quantiles across all data sources simultaneously #' @details ds.ranksSecure is a clientside function which calls a series of #' other clientside and serverside functions to securely generate the global #' ranks of a numeric vector "V2BR" (vector to be ranked) @@ -43,7 +43,7 @@ #' of the clusters of values that are being ranked such that some values are #' treated as being missing and the processing stops, then setting #' generate.quantiles to FALSE allows the generation of ranks to complete so -#' they can then be used for non-parameteric analysis, even if the key values +#' they can then be used for non-parametric analysis, even if the key values #' cannot be estimated. A real example of an unusual configuration was in a #' reasonably large dataset of survival times, where a substantial proportion #' of survival profiles were censored at precisely 10 years. This meant that diff --git a/R/ds.recodeValues.R b/R/ds.recodeValues.R index d44cc8570..184ccea2b 100644 --- a/R/ds.recodeValues.R +++ b/R/ds.recodeValues.R @@ -4,7 +4,7 @@ #' @details This function recodes individual values with new individual values. This can #' apply to numeric and character values, factor levels and NAs. One particular use of #' \code{ds.recodeValues} is to convert NAs to an explicit value. This value is specified -#' in the argument \code{missing}. If tthe user want to recode only missing values, then it +#' in the argument \code{missing}. If the user want to recode only missing values, then it #' should also specify an identical vector of values in both arguments \code{values2replace.vector} #' and \code{new.values.vector} (see Example 2 below). #' Server function called: \code{recodeValuesDS} diff --git a/R/ds.sample.R b/R/ds.sample.R index d2a08ab33..08c1b04c1 100644 --- a/R/ds.sample.R +++ b/R/ds.sample.R @@ -45,7 +45,7 @@ #' case with the default name 'newobj.sample) using ds.dataFrameSort with the #' 'sampling.order' vector as the sort key, the output object is rendered #' equivalent to PRWa but with the rows randomly permuted (so the column reflecting -#' the vector 'sample.order' now runs from 1:length of obejct, while the +#' the vector 'sample.order' now runs from 1:length of object, while the #' column reflecting 'ID.seq' denoting the original order is now randomly ordered. #' If you need to return to the original order you can simply us ds.dataFrameSort #' again using the column reflecting 'ID.seq' as the sort key: diff --git a/R/ds.scatterPlot.R b/R/ds.scatterPlot.R index 827b32722..6c2c78058 100644 --- a/R/ds.scatterPlot.R +++ b/R/ds.scatterPlot.R @@ -6,7 +6,7 @@ #' permitted in DataSHIELD, this function allows the user to plot non-disclosive scatter plots. #' #' If the argument \code{method} is set to \code{'deterministic'}, the server-side function searches -#' for the \code{k-1} nearest neighbors of each single data point and calculates the centroid +#' for the \code{k-1} nearest neighbours of each single data point and calculates the centroid #' of such \code{k} points. #' The proximity is defined by the minimum Euclidean distances of z-score transformed data. #' @@ -52,7 +52,7 @@ #' This argument can be set as \code{'deteministic'} or \code{'probabilistic'}. #' Default \code{'deteministic'}. #' For more information see \strong{Details}. -#' @param k the number of the nearest neighbors for which their centroid is calculated. +#' @param k the number of the nearest neighbours for which their centroid is calculated. #' Default 3. #' For more information see \strong{Details}. #' @param noise the percentage of the initial variance that is used as the variance of the embedded diff --git a/R/ds.sqrt.R b/R/ds.sqrt.R index 9193dbbc7..e78011def 100644 --- a/R/ds.sqrt.R +++ b/R/ds.sqrt.R @@ -59,7 +59,7 @@ #' # and get their square roots #' ds.make(toAssign='rep((1:10)^2, times=10)', newobj='squares.vector', datasources=connections) #' ds.sqrt(x='squares.vector', newobj='sqrt.vector', datasources=connections) -#' # check the behavior of that operation by comparing the tables of squares.vector and sqrt.vector + # check the behaviour of that operation by comparing the tables of squares.vector and sqrt.vector #' ds.table(rvar='squares.vector')$output.list$TABLE_rvar.by.study_counts #' ds.table(rvar='sqrt.vector')$output.list$TABLE_rvar.by.study_counts #' diff --git a/R/ds.subset.R b/R/ds.subset.R index 40bd6ce05..03b2e7344 100644 --- a/R/ds.subset.R +++ b/R/ds.subset.R @@ -1,10 +1,10 @@ #' #' @title Generates a valid subset of a table or a vector #' @description The function uses the R classical subsetting with squared brackets '[]' and allows also to -#' subset using a logical oprator and a threshold. The object to subset from must be a vector (factor, numeric -#' or charcater) or a table (data.frame or matrix). +#' subset using a logical operator and a threshold. The object to subset from must be a vector (factor, numeric +#' or character) or a table (data.frame or matrix). #' @details (1) If the input data is a table the user specifies the rows and/or columns to include in the subset; the columns can be -#' refered to by their names. Table subsetting can also be done using the name of a variable and a threshold (see example 3). +#' referred to by their names. Table subsetting can also be done using the name of a variable and a threshold (see example 3). #' (2) If the input data is a vector and the parameters 'rows', 'logical' and 'threshold' are all provided the last two are ignored #' (i.e. 'rows' has precedence over the other two parameters then). #' IMPORTANT NOTE: If the requested subset is not valid (i.e. contains less than the allowed number of observations) all the values are diff --git a/R/ds.subsetByClass.R b/R/ds.subsetByClass.R index 5c6f92bb1..b3b14ec27 100644 --- a/R/ds.subsetByClass.R +++ b/R/ds.subsetByClass.R @@ -107,7 +107,7 @@ ds.subsetByClass <- function(x=NULL, subsets="subClasses", variables=NULL, datas } # call the server side function that does the job - # get the indices of the columns refered to by their names in the arguments + # get the indices of the columns referred to by their names in the arguments if(is.null(variables)){ cally <- paste0("subsetByClassDS('", x, "')") }else{ diff --git a/R/ds.table1D.R b/R/ds.table1D.R index 07df60eb3..8a13afdc7 100644 --- a/R/ds.table1D.R +++ b/R/ds.table1D.R @@ -10,10 +10,10 @@ #' count. This way it is possible the know the total count and combine total counts across data sources but it #' is not possible to identify the cell(s) that had the small counts which render the table invalid. #' @param x a character, the name of a numerical vector with discrete values - usually a factor. -#' @param type a character which represent the type of table to ouput: pooled table or one table for each +#' @param type a character which represent the type of table to output: pooled table or one table for each #' data source. If \code{type} is set to 'combine', a pooled 1-dimensional table is returned; if If \code{type} #' is set to 'split' a 1-dimensional table is returned for each data source. -#' @param warningMessage a boolean, if set to TRUE (deafult) a warning is displayed if any returned table is invalid. Warning +#' @param warningMessage a boolean, if set to TRUE (default) a warning is displayed if any returned table is invalid. Warning #' messages are suppressed if this parameter is set to FALSE. However the analyst can still view 'validity' information #' which are stored in the output object 'validity' - see the list of output objects. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the diff --git a/R/ds.table2D.R b/R/ds.table2D.R index 301a89936..0dee00abd 100644 --- a/R/ds.table2D.R +++ b/R/ds.table2D.R @@ -10,11 +10,11 @@ #' identify the cell(s) that had the small counts which render the table invalid. #' @param x a character, the name of a numerical vector with discrete values - usually a factor. #' @param y a character, the name of a numerical vector with discrete values - usually a factor. -#' @param type a character which represent the type of table to ouput: pooled table or one table for each +#' @param type a character which represent the type of table to output: pooled table or one table for each #' data source or both. If \code{type} is set to 'combine', a pooled 2-dimensional table is returned; If \code{type} #' is set to 'split' a 2-dimensional table is returned for each data source. If \code{type} is set to 'both' (default) #' a pooled 2-dimensional table plus a 2-dimensional table for each data source are returned. -#' @param warningMessage a boolean, if set to TRUE (deafult) a warning is displayed if any returned table is invalid. Warning +#' @param warningMessage a boolean, if set to TRUE (default) a warning is displayed if any returned table is invalid. Warning #' messages are suppressed if this parameter is set to FALSE. However the analyst can still view 'validity' information #' which are stored in the output object 'validity' - see the list of output objects. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the diff --git a/R/ds.unique.R b/R/ds.unique.R index 327585bd2..8f2717054 100644 --- a/R/ds.unique.R +++ b/R/ds.unique.R @@ -4,7 +4,7 @@ #' @details Will create a vector or list which has no duplicate values. #' #' Server function called: \code{uniqueDS} -#' @param x.name a character string providing the name of the varable, in the server, to perform \code{unique} upon +#' @param x.name a character string providing the name of the variable, in the server, to perform \code{unique} upon #' @param newobj a character string that provides the name for the output object #' that is stored on the data servers. Default \code{unique.newobj}. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} diff --git a/R/ds.vectorCalc.R b/R/ds.vectorCalc.R index f019c7bca..f8918aab4 100644 --- a/R/ds.vectorCalc.R +++ b/R/ds.vectorCalc.R @@ -3,10 +3,10 @@ #' @description Carries out a row-wise operation on two or more vector. The function calls no #' server side function; it uses the R operation symbols built in DataSHIELD. #' @details In DataSHIELD it is possible to perform an operation on vectors by just using the relevant -#' R symbols (e.g. '+' for addtion, '*' for multiplication, '-' for substraction and '/' for division). +#' R symbols (e.g. '+' for addition, '*' for multiplication, '-' for subtraction and '/' for division). #' This might however be inconvenient if the number of vectors to include in the operation is large. #' This function takes the names of two or more vectors and performs the desired operation which could be -#' an addition, a multiplication, a substraction or a division. If one or more vectors have a missing value +#' an addition, a multiplication, a subtraction or a division. If one or more vectors have a missing value #' at any one entry (i.e. observation), the operation returns a missing value ('NA') for that entry; the output #' vectors has, hence the same length as the input vectors. #' @param x a vector of characters, the names of the vectors to include in the operation. diff --git a/R/glmChecks.R b/R/glmChecks.R index c720fa45f..6dcfe2ee7 100644 --- a/R/glmChecks.R +++ b/R/glmChecks.R @@ -1,14 +1,14 @@ #' #' @title Checks if the elements in the glm model have the right characteristics #' @description This is an internal function required by the client function \code{ds.glm} -#' to verify all the variables and ensure the process does not halt inadvertanly. +#' to verify all the variables and ensure the process does not halt inadvertently #' @details the variables are checked to ensure they are defined, not empty (i.e. are not missing -#' at complete) and evantually (if 'offset' or 'weights') are of 'numeric' with non negative value +#' at complete) and eventually (if 'offset' or 'weights') are of 'numeric' with non negative value #' (if 'weights'). #' @param formula a character, a regression formula given as a string character #' @param data a character, the name of an optional data frame containing the variables in #' in the \code{formula}. -#' @param offset null or a numreric vector that can be used to specify an a priori known component to be +#' @param offset null or a numeric vector that can be used to specify an a priori known component to be #' included in the linear predictor during fitting. #' @param weights a character, the name of an optional vector of 'prior weights' to be used in the fitting #' process. Should be NULL or a numeric vector. diff --git a/R/logical2int.R b/R/logical2int.R index b2a6d20bb..575960c41 100644 --- a/R/logical2int.R +++ b/R/logical2int.R @@ -1,7 +1,7 @@ #' #' @title Turns a logical operator into an integer #' @description This is an internal function. -#' @details This function is called to turn a logical oprator given as a +#' @details This function is called to turn a logical operator given as a #' character into an integer: '>' is turned into 1, '>=' into 2, '<' into 3, #' '<=' into 4, '==' into 5 and '!=' into 6. #' @param obj a character, the logical parameter to turn into an integer diff --git a/R/meanByClassHelper0a.R b/R/meanByClassHelper0a.R index 21c6e90ca..c1c51c9b6 100644 --- a/R/meanByClassHelper0a.R +++ b/R/meanByClassHelper0a.R @@ -7,7 +7,7 @@ #' @param b a character, the name of a factor vector. #' @param type a character which represents the type of analysis to carry out. If \code{type} is set to #' 'combine', a pooled table of results is generated. If \code{type} is set to 'split', a table of results -#' is genrated for each study. +#' is generated for each study. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the #' the default set of connections will be used: see \link[DSI]{datashield.connections_default}. #' @return a table or a list of tables that hold the length of the numeric variable and its mean diff --git a/R/meanByClassHelper0b.R b/R/meanByClassHelper0b.R index 1419d53f2..89c1c17d6 100644 --- a/R/meanByClassHelper0b.R +++ b/R/meanByClassHelper0b.R @@ -2,13 +2,13 @@ #' @title Runs the computation if variables are within a table structure #' @description This is an internal function. #' @details This function is called by the function 'ds.meanByClass' to produce the final tables -#' if the user soecify a table structure. +#' if the user specify a table structure. #' @param x a character, the name of the dataset to get the subsets from. #' @param outvar a character vector, the names of the continuous variables #' @param covar a character vector, the names of up to 3 categorical variables #' @param type a character which represents the type of analysis to carry out. If \code{type} is set to #' 'combine', a pooled table of results is generated. If \code{type} is set to 'split', a table of results -#' is genrated for each study. +#' is generated for each study. #' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the #' the default set of connections will be used: see \link[DSI]{datashield.connections_default}. #' @return a table or a list of tables that hold the length of the numeric variable(s) and their mean diff --git a/R/meanByClassHelper2.R b/R/meanByClassHelper2.R index 9fac9125a..55dca1c33 100644 --- a/R/meanByClassHelper2.R +++ b/R/meanByClassHelper2.R @@ -2,12 +2,12 @@ #' @title Generates a table for pooled results #' @description This is an internal function. #' @details This function is called by the function 'ds.meanByClass' to produce the final table -#' if the user sets the parmater 'type' to combine (the default behaviour of 'ds.meanByClass'). +#' if the user sets the parameter 'type' to combine (the default behaviour of 'ds.meanByClass'). #' @param dtsources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the #' the default set of connections will be used: see \link[DSI]{datashield.connections_default}. #' @param tablenames a character vector, the name of the subset tables #' @param variables a character vector, the names of the continuous variables to computes a mean for. -#' @param invalidrecorder a list, holds informations about invalid subsets in each study. +#' @param invalidrecorder a list, holds information about invalid subsets in each study. #' @keywords internal #' @return a matrix, a table which contains the length, mean and standard deviation of each of the #' specified 'variables' in each subset table. diff --git a/R/meanByClassHelper3.R b/R/meanByClassHelper3.R index 61e61d1f8..4c834b78a 100644 --- a/R/meanByClassHelper3.R +++ b/R/meanByClassHelper3.R @@ -2,12 +2,12 @@ #' @title Generates results tables for each study separately #' @description This is an internal function. #' @details This function is called by the function 'ds.meanByClass' to produce the final tables -#' if the user sets the parmater 'type' to 'split'. +#' if the user sets the parameter 'type' to 'split'. #' @param dtsources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the #' the default set of connections will be used: see \link[DSI]{datashield.connections_default}. #' @param tablenames a character vector, the name of the subset tables #' @param variables a character vector, the names of the continuous variables to computes a mean for. -#' @param invalidrecorder a list, holds informations about invalid subsets in each study +#' @param invalidrecorder a list, holds information about invalid subsets in each study #' @keywords internal #' @return a list which one results table for each study. #' @author Gaye, A. @@ -19,7 +19,7 @@ meanByClassHelper3 <- function(dtsources, tablenames, variables, invalidrecorder finalist <- vector('list', length(dtsources)) for(s in 1:length(dtsources)){ - # now get the mean and SD for the continuous variables in each of tthe subset tables + # now get the mean and SD for the continuous variables in each of the subset tables finaltable <- matrix(numeric(0), ncol=numtables) finalrows <- c() for(z in 1:length(variables)){ diff --git a/README.md b/README.md index 173f08245..c82341b5f 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,38 @@ -dsBaseClient -============ +## dsBaseClient: 'DataSHIELD' Client Side Base Functions -DataSHIELD client side base R library. +[![License](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.html) +[![](https://www.r-pkg.org/badges/version/dsBaseClient?color=black)](https://cran.r-project.org/package=dsBaseClient) +[![R build +status](https://github.com/datashield/dsBaseClient/workflows/R-CMD-check/badge.svg)](https://github.com/datashield/dsBaseClient/actions) +[![Codecov test coverage](https://codecov.io/gh/datashield/dsBaseClient/graph/badge.svg)](https://app.codecov.io/gh/datashield/dsBaseClient) -[![License](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.html) +## Installation -About -===== +You can install the released version of dsBaseClient from +[CRAN](https://cran.r-project.org/package=dsBaseClient) with: -DataSHIELD is a software package which allows you to do non-disclosive federated analysis on sensitive data. Our website (https://www.datashield.org) has in depth descriptions of what it is, how it works and how to install it. A key point to highlight is that DataSHIELD has a client-server infrastructure, so the dsBase package (https://github.com/datashield/dsBase) needs to be used in conjuction with the dsBaseClient package (https://github.com/datashield/dsBaseClient) - trying to use one without the other makes no sense. +``` r +install.packages("dsBaseClient") +``` + +And the development version from +[GitHub](https://github.com/datashield/dsBaseClient/) with: + + +``` r +install.packages("remotes") +remotes::install_github("datashield/dsBaseClient", "") + +# Install v6.3.4 with the following +remotes::install_github("datashield/dsBaseClient", "6.3.4") +``` + +For a full list of development branches, checkout https://github.com/datashield/dsBaseClient/branches + + +## About + +DataSHIELD is a software package which allows you to do non-disclosive federated analysis on sensitive data. Our website (https://www.datashield.org) has in depth descriptions of what it is, how it works and how to install it. A key point to highlight is that DataSHIELD has a client-server infrastructure, so the dsBase package (https://github.com/datashield/dsBase) needs to be used in conjunction with the dsBaseClient package (https://github.com/datashield/dsBaseClient) - trying to use one without the other makes no sense. Detailed instructions on how to install DataSHIELD are at https://www.datashield.org/wiki. @@ -21,3 +45,40 @@ The code here is organised as: | obiba CRAN | Where you probably should install DataSHIELD from. | | releases | Stable releases. | | master branch | Mostly in sync with the latest release, changes rarely. | + +## References + +[1] Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, + Avraam D, Marcon Y, Bishop T, Gaye A, Escribà Montagut X, Wheater S (2025). + _dsBaseClient: 'DataSHIELD' Client Side Base Functions_. R package version 6.3.4. + +[2] Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio + M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, + Oliver Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, + Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, + Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, + Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). + “DataSHIELD: taking the analysis to the data, not the data to the analysis.” _International + Journal of Epidemiology_, *43*(6), 1929-1944. . + +[3] Wilson R, W. Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, R. Burton P (2017). + “DataSHIELD – New Directions and Dimensions.” _Data Science Journal_, *16*(21), 1-21. + . + +[4] Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, + Duijts L, Escribà Montagut X, Garner H, Gonçalves G, González J, Haakma S, Hartlev M, + Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, + Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, + Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024). “DataSHIELD: + mitigating disclosure risk in a multi-site federated analysis platform.” _Bioinformatics + Advances_, *5*(1), 1-21. . + +> **_Note:_** Apple Mx architecture users, please be aware that there are some numerical limitations on this platform, which leads to unexpected results when using base R packages, like stats​. +> +> x <- c(0, 3, 7) +> +> 1 - cor(x, x)​ +> +> The above should result in a value of zero. +> +> _Also See:_ For more details see https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f and the bug report: https://bugs.r-project.org/show_bug.cgi?id=18941 diff --git a/_pkgdown.yml b/_pkgdown.yml index 4c98f6e56..f46c2ebc7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,4 @@ template: + lang: en-GB params: bootswatch: simplex diff --git a/armadillo_azure-pipelines.yml b/armadillo_azure-pipelines.yml index 08a1bbb01..8439c8cef 100644 --- a/armadillo_azure-pipelines.yml +++ b/armadillo_azure-pipelines.yml @@ -58,10 +58,10 @@ schedules: - master always: true - cron: "0 2 * * *" - displayName: Nightly build - v6.3.2-dev + displayName: Nightly build - v6.3.4-dev branches: include: - - v6.3.2-dev + - v6.3.4-dev always: true ######################################################################################### @@ -139,7 +139,7 @@ jobs: sudo R -q -e "install.packages(c('MolgenisAuth', 'MolgenisArmadillo', 'DSMolgenisArmadillo'), dependencies=TRUE, repos='https://cloud.r-project.org')" sudo R -q -e "install.packages(c('DescTools','e1071'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "library('devtools'); devtools::install_github(repo='datashield/dsDangerClient', ref='6.3.1', dependencies = TRUE)" + sudo R -q -e "library('devtools'); devtools::install_github(repo='datashield/dsDangerClient', ref='v6.3.4-dev', dependencies = TRUE)" # XML grep for coverage report merging sudo apt-get install -qq xml-twig-tools -y @@ -235,7 +235,7 @@ jobs: curl -u admin:admin -X GET http://localhost:8080/packages - curl -u admin:admin --max-time 300 -v -H 'Content-Type: multipart/form-data' -F "file=@dsBase_6.3.2-permissive.tar.gz" -X POST http://localhost:8080/install-package + curl -u admin:admin --max-time 300 -v -H 'Content-Type: multipart/form-data' -F "file=@dsBase_6.3.4-permissive.tar.gz" -X POST http://localhost:8080/install-package sleep 60 docker container restart dsbaseclient_armadillo_1 @@ -364,7 +364,7 @@ jobs: - bash: | curl -u admin:admin http://localhost:8080/whitelist - curl -u admin:admin -v -H 'Content-Type: multipart/form-data' -F "file=@dsDanger_6.3.2.tar.gz" -X POST http://localhost:8080/install-package + curl -u admin:admin -v -H 'Content-Type: multipart/form-data' -F "file=@dsDanger_6.3.4.tar.gz" -X POST http://localhost:8080/install-package docker container restart dsbaseclient_armadillo_1 sleep 60 diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2ce0e4d5f..6c6103d4a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -58,10 +58,10 @@ schedules: - master always: true - cron: "0 2 * * *" - displayName: Nightly build - v6.3.2-dev + displayName: Nightly build - v6.3.4-dev branches: include: - - v6.3.2-dev + - v6.3.4-dev always: true ######################################################################################### @@ -132,7 +132,7 @@ jobs: sudo apt-get upgrade -y sudo apt-get install -qq libxml2-dev libcurl4-openssl-dev libssl-dev libgsl-dev libgit2-dev r-base -y - sudo apt-get install -qq libharfbuzz-dev libfribidi-dev libmagick++-dev -y + sudo apt-get install -qq libharfbuzz-dev libfribidi-dev libmagick++-dev libudunits2-dev -y sudo R -q -e "install.packages(c('curl','httr'), dependencies=TRUE, repos='https://cloud.r-project.org')" sudo R -q -e "install.packages(c('devtools','covr'), dependencies=TRUE, repos='https://cloud.r-project.org')" sudo R -q -e "install.packages(c('fields','meta','metafor','ggplot2','gridExtra','data.table'), dependencies=TRUE, repos='https://cloud.r-project.org')" @@ -140,7 +140,7 @@ jobs: sudo R -q -e "install.packages(c('MolgenisAuth', 'MolgenisArmadillo', 'DSMolgenisArmadillo'), dependencies=TRUE, repos='https://cloud.r-project.org')" sudo R -q -e "install.packages(c('DescTools','e1071'), dependencies=TRUE, repos='https://cloud.r-project.org')" - sudo R -q -e "library('devtools'); devtools::install_github(repo='datashield/dsDangerClient', ref='6.3.2', dependencies = TRUE)" + sudo R -q -e "library('devtools'); devtools::install_github(repo='datashield/dsDangerClient', ref='6.3.4', dependencies = TRUE)" # XML grep for coverage report merging sudo apt-get install -qq xml-twig-tools -y @@ -235,7 +235,7 @@ jobs: - bash: | R -q -e "library(opalr); opal <- opal.login(username = 'administrator', password = 'datashield_test&', url = 'https://localhost:8443', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); opal.put(opal, 'system', 'conf', 'general', '_rPackage'); opal.logout(o)" - R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = '6.3.2'); opal.logout(opal)" + R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsBase', username = 'datashield', ref = 'v6.3.4-dev'); opal.logout(opal)" sleep 60 @@ -363,7 +363,7 @@ jobs: R -q -e "library(opalr); opal <- opal.login(username = 'administrator', password = 'datashield_test&', url = 'https://localhost:8443', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); opal.put(opal, 'system', 'conf', 'general', '_rPackage'); opal.logout(o)" - R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsDanger', username = 'datashield', ref = '6.3.1'); opal.logout(opal)" + R -q -e "library(opalr); opal <- opal.login('administrator','datashield_test&', url='https://localhost:8443/', opts = list(ssl_verifyhost=0, ssl_verifypeer=0)); dsadmin.install_github_package(opal, 'dsDanger', username = 'datashield', ref = '6.3.4'); opal.logout(opal)" workingDirectory: $(Pipeline.Workspace)/dsBaseClient displayName: 'Install dsDanger package on Opal server' diff --git a/docker-compose_armadillo.yml b/docker-compose_armadillo.yml index cf5a10956..7a791d39f 100644 --- a/docker-compose_armadillo.yml +++ b/docker-compose_armadillo.yml @@ -3,7 +3,7 @@ services: hostname: armadillo ports: - 8080:8080 - image: datashield/armadillo_citest:5.1.2 + image: datashield/armadillo_citest:latest environment: LOGGING_CONFIG: 'classpath:logback-file.xml' AUDIT_LOG_PATH: '/app/logs/audit.log' @@ -16,6 +16,6 @@ services: default: hostname: default - image: datashield/rserver-neutron-gypsum-permissive:latest + image: datashield/rock-omicron-karma-permissive:devel environment: DEBUG: "FALSE" diff --git a/docker-compose_opal.yml b/docker-compose_opal.yml index a55e8774a..40b3bf268 100644 --- a/docker-compose_opal.yml +++ b/docker-compose_opal.yml @@ -1,8 +1,8 @@ services: opal: - image: obiba/opal:5.1.4 + image: datashield/opal_citest:latest ports: - - "8443:8443" + - 8443:8443 links: - mongo - rock @@ -20,4 +20,6 @@ services: - MONGO_INITDB_ROOT_USERNAME=root - MONGO_INITDB_ROOT_PASSWORD=foobar rock: - image: datashield/rock-margin-joule-permissive:latest + image: datashield/rock-omicron-karma-permissive:devel + environment: + DEBUG: "FALSE" diff --git a/docs/404.html b/docs/404.html index c50acc168..761ee0b96 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,5 +1,5 @@ - + @@ -32,7 +32,7 @@ dsBaseClient - 6.3.3 + 6.3.4 @@ -73,7 +73,7 @@

Page not found (404)