diff --git a/.Rbuildignore b/.Rbuildignore index 61bcf25..0aa0152 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,3 +8,5 @@ ^CRAN-SUBMISSION$ ^doc$ ^Meta$ +^examples/temp/ + diff --git a/DESCRIPTION b/DESCRIPTION index e70ead4..f12afd9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Description: Provides users with an EZ-to-use platform for representing data License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 VignetteBuilder: knitr Config/testthat/edition: 3 Suggests: @@ -47,7 +47,10 @@ Imports: graphics, grDevices, plotrix, + Rcpp, splines, stats, withr NeedsCompilation: yes +LinkingTo: + Rcpp diff --git a/NAMESPACE b/NAMESPACE index 5dee822..6697b7c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,7 @@ export(rotate) export(samples) export(sqrtManhattan) export(translate_axes) +importFrom(Rcpp,sourceCpp) importFrom(grDevices,chull) importFrom(grDevices,colorRampPalette) importFrom(grDevices,grey) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..d943b0d --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,11 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +alfunc <- function(BVEC, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V) { + .Call(`_biplotEZ_alfunc`, BVEC, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V) +} + +optimize_spline <- function(BVEC, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V, TAU, FTOL, TINY, ITMAX) { + .Call(`_biplotEZ_optimize_spline`, BVEC, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V, TAU, FTOL, TINY, ITMAX) +} + diff --git a/R/biplotEZ-package.R b/R/biplotEZ-package.R index d95718a..0b92bb4 100644 --- a/R/biplotEZ-package.R +++ b/R/biplotEZ-package.R @@ -54,5 +54,7 @@ #' @importFrom graphics points #' @importFrom graphics text #' @importFrom grDevices colorRampPalette +#' @importFrom Rcpp sourceCpp +#' @useDynLib biplotEZ, .registration = TRUE ## usethis namespace: end NULL diff --git a/R/plot2D.R b/R/plot2D.R index fd1450e..fc732a2 100644 --- a/R/plot2D.R +++ b/R/plot2D.R @@ -1087,13 +1087,189 @@ #' @useDynLib biplotEZ, .registration = TRUE #' #' @noRd + + + +# biplot.spline.axis <- function(j, X, Y, means, sd, +# n.int, spline.control, dmeth=0, ... ) +# { +# n <- nrow(X) +# p <- ncol(X) +# if (n > 103) +# { +# my.sample <- sample (1:n, size=103, replace=F) +# X <- X[my.sample,] +# Y <- Y[my.sample,] +# n <- nrow(X) +# } + +# tau <- spline.control$tau +# nmu <- spline.control$nmu +# u <- spline.control$u +# v <- spline.control$v +# lambda <- spline.control$lambda +# smallsigma <- spline.control$smallsigma +# bigsigma <- spline.control$bigsigma +# gamma <- spline.control$gamma +# bigsigmaactivate <- spline.control$bigsigmaactivate +# eps <- spline.control$eps +# tiny <- spline.control$tiny +# itmax <- spline.control$itmax +# ftol <- spline.control$ftol + +# cat ("Calculating spline axis for variable", j, "\n") +# if(dmeth==1) stop("dmeth should be equal to zero or integer greater than 1 \n") +# Ytilde <- scale(scale(Y, center=FALSE, scale=1/sd), center=-1*means, scale=FALSE) + +# ytilde <- Ytilde[,j] +# mutilde <- seq(from=min(ytilde),to=max(ytilde),length.out=nmu) +# y <- Y[,j] +# rangey <- max(y)-min(y) +# mu <- seq(from=min(y)-.3*rangey,to=max(y)+.3*rangey,length.out=nmu) +# markers <- (pretty(ytilde)-means[j])/sd[j] +# mu <- sort(c(mu,markers)) +# mu <- unique(mu) +# nmu <- length(mu) + +# if (v>0) +# { +# knots <- seq.int(from=0,to=1,length.out=v+2)[-c(1,v+2)] +# knots <- stats::quantile(y,knots) +# M <- splines::bs(mu,knots=knots,degree=u,intercept=FALSE) +# } else M <- splines::bs(mu,df=u+v,degree=u,intercept=FALSE) +# M <- scale(M,scale=FALSE,center=M[which.min(abs(mu)),]) # To ensure that the spline passes through the origin at the calibration which represents the mean of the variable +# Breg <- t(solve(t(X)%*%X)%*%t(X)%*%y) +# Zreg <- mu%*%Breg/sum(Breg^2) +# Bvec <- as.vector(solve(t(M)%*%M)%*%t(M)%*%Zreg) # Closest to regression biplot + +# const1 <- sum(y^2) +# const2 <- sum(X^2)/(n*p) +# TotalNumberOfLossFunctionCalls <- 0 + +# optimtouse <- function(Bvec) +# { +# timetemp <- proc.time()[3] +# LOSS <- 1.0 +# LOSS1 <- 1.0 +# Ind <- rep(1,n) +# pred <- rep(0,nmu) +# deltmp <- 0 +# tau <- tau +# #.5 # the choice of tau seems to affect perfomance quite substantially. +# # tau is used to specify the points on the inital simplex. +# Ay <- rep(0,(u+v)*p+1) +# TEMPVK <- rep(0,(u+v)*p) +# iter1 <- 0 +# iter <- 0 +# ERRO <- 0 + +# # Prepare for Fortran subroutine +# storage.mode(X) <- "double" +# storage.mode(Ind) <- "integer" +# storage.mode(mu) <- "double" +# storage.mode(pred) <- "double" +# storage.mode(y) <- "double" +# storage.mode(M) <- "double" +# storage.mode(Bvec) <- "double" +# storage.mode(Ay) <- "double" +# storage.mode(TEMPVK) <- "double" + + +# returned_data <-.Fortran('L',LOSS=as.double(LOSS),X=X,n=as.integer(n),p=as.integer(p),nmu=as.integer(nmu),Ind=Ind, +# mu=mu,pred=pred,lambda=as.double(lambda),y=y,const1=as.double(const1),const2=as.double(const2),u=as.integer(u), +# v=as.integer(v),M=M,Bvec=Bvec,tau=as.double(tau),Ay=Ay,TEMPVEK=TEMPVK,iter=as.integer(iter), +# ftol=as.double(ftol),LOSS1=as.double(LOSS1),iter1=as.integer(iter1),fout = as.integer(ERRO), +# const3=as.double(tiny), itmax=as.integer(itmax)) +# if(returned_data$fout > 0) +# { +# cat("Fout is: ", returned_data$fout, "\n") +# warning("Increase itmax for Fortran \n") +# } + +# B <- matrix(returned_data$Bvec,ncol=p) +# Z <- M%*%B + +# aa <- list(BestValue=returned_data$LOSS,BestSolution=returned_data$Bvec,ConvergenceCode=returned_data$fout, iter1=returned_data$iter1, +# iter=returned_data$iter,TimeTaken=proc.time()[3]-timetemp) +# aa +# } +# EuclidDist2 <- function (X, Y) +# { +# n <- nrow(X) +# m <- nrow(Y) +# bx <- rowSums(X^2) +# by <- rowSums(Y^2) +# outer(bx, by, FUN = "+") - 2 * X %*% t(Y) +# } + +# ### Variable initialisation +# outBestValues <- rep(NA,gamma+1) +# outBestSolutions <- matrix(nrow=2*(u+v),ncol=gamma+1) +# outTimeTaken <- rep(NA,gamma+1) # Is made one element longer at each iteration. +# BestSolutionsFrequency <- rep(NA,gamma+1) +# BestSolutionsIndices <- rep(NA,gamma+1) # Is made one element longer at each iteration. +# SquaredDistancesBetweenBestSolutions <- matrix(nrow=gamma+1,ncol=gamma+1) + +# ### Initial coefficients closest to regression biplot +# temp <- optimtouse(Bvec) +# outBestValues[1] <- temp$BestValue +# outBestSolutions[,1] <- temp$BestSolution +# outTimeTaken[1] <- temp$TimeTaken +# BestSolutionsFrequency[1] <- 1 +# BestSolutionsIndices[1] <- 1 +# DistinctSolutions <- 1 +# PreviousBestSolution <- NA +# nSameSolutionConsecutively <- 0 +# BigSigmaActivations <- NULL + +# test.iter <- temp$iter +# test.iter1 <- temp$iter1 + +# ### Last best coefficients perturbed +# for (gammacounter in 2:(gamma+1)) +# { +# if (nSameSolutionConsecutively>=bigsigmaactivate) +# { +# temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=bigsigma)) +# BigSigmaActivations <- c(BigSigmaActivations,gammacounter) +# } +# else temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=smallsigma)) +# outTimeTaken[gammacounter] <- temp$TimeTaken +# tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution,nrow=1),t(outBestSolutions[,1:DistinctSolutions])) +# if (any(tempSquaredDistances 103) - { + if (n > 103){ my.sample <- sample (1:n, size=103, replace=F) X <- X[my.sample,] Y <- Y[my.sample,] @@ -1128,8 +1304,7 @@ biplot.spline.axis <- function(j, X, Y, means, sd, mu <- unique(mu) nmu <- length(mu) - if (v>0) - { + if (v>0){ knots <- seq.int(from=0,to=1,length.out=v+2)[-c(1,v+2)] knots <- stats::quantile(y,knots) M <- splines::bs(mu,knots=knots,degree=u,intercept=FALSE) @@ -1142,56 +1317,44 @@ biplot.spline.axis <- function(j, X, Y, means, sd, const1 <- sum(y^2) const2 <- sum(X^2)/(n*p) TotalNumberOfLossFunctionCalls <- 0 - - optimtouse <- function(Bvec) - { + optimtouse <- function(Bvec) { timetemp <- proc.time()[3] - LOSS <- 1.0 - LOSS1 <- 1.0 - Ind <- rep(1,n) - pred <- rep(0,nmu) - deltmp <- 0 - tau <- tau - #.5 # the choice of tau seems to affect perfomance quite substantially. - # tau is used to specify the points on the inital simplex. - Ay <- rep(0,(u+v)*p+1) - TEMPVK <- rep(0,(u+v)*p) - iter1 <- 0 - iter <- 0 - ERRO <- 0 - - # Prepare for Fortran subroutine - storage.mode(X) <- "double" - storage.mode(Ind) <- "integer" - storage.mode(mu) <- "double" - storage.mode(pred) <- "double" - storage.mode(y) <- "double" - storage.mode(M) <- "double" - storage.mode(Bvec) <- "double" - storage.mode(Ay) <- "double" - storage.mode(TEMPVK) <- "double" + # Use Rcpp optimize_spline function + returned_data <- optimize_spline( + BVEC = Bvec, + X = X, + Y = y, + M = M, + MU = mu, + LAMBDA = lambda, + CONST1 = const1, + CONST2 = const2, + U = u, + V = v, + TAU = tau, + FTOL = ftol, + TINY = tiny, + ITMAX = itmax + ) - returned_data <-.Fortran('L',LOSS=as.double(LOSS),X=X,n=as.integer(n),p=as.integer(p),nmu=as.integer(nmu),Ind=Ind, - mu=mu,pred=pred,lambda=as.double(lambda),y=y,const1=as.double(const1),const2=as.double(const2),u=as.integer(u), - v=as.integer(v),M=M,Bvec=Bvec,tau=as.double(tau),Ay=Ay,TEMPVEK=TEMPVK,iter=as.integer(iter), - ftol=as.double(ftol),LOSS1=as.double(LOSS1),iter1=as.integer(iter1),fout = as.integer(ERRO), - const3=as.double(tiny), itmax=as.integer(itmax)) - if(returned_data$fout > 0) - { - cat("Fout is: ", returned_data$fout, "\n") - warning("Increase itmax for Fortran \n") + if(returned_data$ERRO > 0) { + cat(" Warning: Error code =", returned_data$ERRO, "\n") } - B <- matrix(returned_data$Bvec,ncol=p) - Z <- M%*%B + aa <- list( + BestValue = returned_data$LOSS, + BestSolution = returned_data$BVEC, + ConvergenceCode = returned_data$ERRO, + iter1 = returned_data$ITER1, + iter = returned_data$ITER, + TimeTaken = proc.time()[3] - timetemp + ) - aa <- list(BestValue=returned_data$LOSS,BestSolution=returned_data$Bvec,ConvergenceCode=returned_data$fout, iter1=returned_data$iter1, - iter=returned_data$iter,TimeTaken=proc.time()[3]-timetemp) aa } - EuclidDist2 <- function (X, Y) - { + + EuclidDist2 <- function (X, Y) { n <- nrow(X) m <- nrow(Y) bx <- rowSums(X^2) @@ -1223,29 +1386,24 @@ biplot.spline.axis <- function(j, X, Y, means, sd, test.iter1 <- temp$iter1 ### Last best coefficients perturbed - for (gammacounter in 2:(gamma+1)) - { - if (nSameSolutionConsecutively>=bigsigmaactivate) - { + for (gammacounter in 2:(gamma+1)){ + if (nSameSolutionConsecutively>=bigsigmaactivate){ temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=bigsigma)) BigSigmaActivations <- c(BigSigmaActivations,gammacounter) } else temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=smallsigma)) outTimeTaken[gammacounter] <- temp$TimeTaken tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution,nrow=1),t(outBestSolutions[,1:DistinctSolutions])) - if (any(tempSquaredDistances +#include +using namespace Rcpp; + +// Khaled: differences with fortran: +//Optimize alfunc by computing Z directly from BVEC and combining loops + +//- Eliminated intermediate B matrix creation by computing Z = M %*% B directly from BVEC vector +//- Combined three separate loops (exind, prediction, loss calculation) into single pass +//- Removed fortran-unused utility functions matm() and exind() +//- Reduces memory allocations and improves cache locality +//- Maintains identical mathematical behavior with better performance + +// [[Rcpp::export]] +double alfunc(NumericVector BVEC, + NumericMatrix X, + NumericVector Y, + NumericMatrix M, + NumericVector MU, + double LAMBDA, + double CONST1, + double CONST2, + int U, int V) { + // to remain consistent with the paper, we should set const1 = 1 and lambda = 0. + int n = X.nrow(); + int p = X.ncol(); + int nmu = MU.length(); + int uv = U + V; + + // Compute Z = M %*% B directly from BVEC without creating B + NumericMatrix Z(nmu, p); + + for(int j = 0; j < p; j++) { + int bvec_offset = j * uv; // Starting index in BVEC for column j + for(int i = 0; i < nmu; i++) { + Z(i, j) = 0.0; + for(int k = 0; k < uv; k++) { + Z(i, j) += M(i, k) * BVEC[bvec_offset + k]; + } + } + } + + // Combined loop: find nearest index, get prediction, and calculate loss + double temp1 = 0.0; + + for(int i = 0; i < n; i++) { + int closest_idx = 0; + double rmin = 1.0e9; + + // Find closest Z row to X row i + for(int j = 0; j < nmu; j++) { + double temp_dist = 0.0; + for(int k = 0; k < p; k++) { + double diff = X(i, k) - Z(j, k); + temp_dist += diff * diff; + } + + if(temp_dist < rmin) { + rmin = temp_dist; + closest_idx = j; + } + } + + // Calculate loss immediately using the closest index + double diff = Y[i] - MU[closest_idx]; + temp1 += diff * diff; + } + + double loss = temp1 / CONST1; + + // Add penalty term if lambda > 0 + if(LAMBDA > 0.0 && nmu > 2) { + double temp2 = 0.0; + for(int j = 0; j < p; j++) { + for(int i = 1; i < (nmu - 1); i++) { + double diff = Z(i - 1, j) - 2.0 * Z(i, j) + Z(i + 1, j); + temp2 += diff * diff; + } + } + loss += LAMBDA * temp2 / CONST2; + } + + return loss; +} + +//Khaled: +// ========== SIMPLEX SETUP ========== +NumericMatrix varset(NumericVector BVEC, int U, int V, int P, double TAU) { + int andim = (U + V) * P; + NumericMatrix AP(andim + 1, andim); + + // First row: BVEC unchanged + for(int j = 0; j < andim; j++) { + AP(0, j) = BVEC[j]; + } + + // Remaining rows: BVEC with TAU added to successive elements + for(int i = 1; i <= andim; i++) { + for(int j = 0; j < andim; j++) { + AP(i, j) = BVEC[j]; + } + AP(i, i - 1) += TAU; + //Khaled: + //Alternative way for AP(i, i - 1). + //Adding a fixed TAU, regardless of the scale of the BVEC components, is problematic. + //It may be better to scale TAU relative to each component to achieve global optimization. + //If you want to use the following approach, please uncomment these lines and comment out AP above. + //If you activate the following code, be careful that TAU cannot be a fixed constant. + //In this case, interpret TAU as a relative step size (e.g., TAU = 0.05 means a 5% perturbation), not an absolute one. + + // // Relative perturbation: TAU * |BVEC[i-1]| or TAU if near zero + // double delta = fabs(BVEC[i - 1]) > 1e-8 ? + // TAU * fabs(BVEC[i - 1]) : TAU; + // AP(i, i - 1) += delta; + } + + return AP; +} + +// ========== SIMPLEX OPERATIONS ========== +double amotry(NumericMatrix& PMAT, + NumericVector& YVEK, + NumericVector& PSUM, + int IHI, + double FAC, + const NumericMatrix& X, + const NumericVector& Y, + const NumericMatrix& M, + const NumericVector& MU, + double LAMBDA, + double CONST1, + double CONST2, + int U, + int V){ + //PMAT: The simplex matrix - each row is a vertex (parameter vector) + //YVEK: Function values at each vertex (loss at each simplex point) + //PSUM: Sum of all vertices (used to compute centroid efficiently) + //IHI: Index of the highest (worst) point in the simplex (0-indexed in C++) + //FAC: Extrapolation factor. + //All other inputs are for loss function evaluation (passed by value) + //What thi function do: + //Extrapolates by a factor FAC through the face of the simplex across + //from the highest point, tries it, and replaces the high point if + //the new point is better + int ndim = PMAT.ncol(); + NumericVector PTRY(ndim); + + double fac1 = (1.0 - FAC) / ndim; + double fac2 = fac1 - FAC; + + for(int j = 0; j < ndim; j++) { + PTRY[j] = PSUM[j] * fac1 - PMAT(IHI, j) * fac2; + } + + double ytry = alfunc(PTRY, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V); + + if(ytry < YVEK[IHI]) { + YVEK[IHI] = ytry; + for(int j = 0; j < ndim; j++) { + PSUM[j] = PSUM[j] - PMAT(IHI, j) + PTRY[j]; + PMAT(IHI, j) = PTRY[j]; + } + } + + return ytry; +// Khaled: pass-by-reference eliminates the need to return PMAT, YVEK, and PSUM, +// since they are modified in place; the calling function only requires ytry for its control-flow logic. +} + +// ========== OPTIMIZER ========== + +List amoeba(NumericMatrix PMAT, NumericVector YVEK, + double FTOL, int ITMAX, + NumericMatrix X, NumericVector Y, NumericMatrix M, + NumericVector MU, double LAMBDA, + double CONST1, double CONST2, int U, int V, double TINY) { + + int ndim = PMAT.ncol(); + int mp = PMAT.nrow(); + int iter = 0; + int erro = 0; + + if(ndim > 20) { + erro = 5; + return List::create(Named("PMAT") = PMAT, + Named("YVEK") = YVEK, + Named("ITER") = iter, + Named("ERRO") = erro); + } + + NumericVector PSUM(ndim); + + while(true) { + // Compute PSUM + for(int j = 0; j < ndim; j++) { + double sum1 = 0.0; + for(int i = 0; i < mp; i++) { + sum1 += PMAT(i, j); + } + PSUM[j] = sum1; + } + + // Find ILO, IHI, INHI + int ilo = 0; + int ihi, inhi; + + if(YVEK[0] > YVEK[1]) { + ihi = 0; + inhi = 1; + } else { + ihi = 1; + inhi = 0; + } + + for(int i = 0; i < mp; i++) { + if(YVEK[i] <= YVEK[ilo]) ilo = i; + if(YVEK[i] > YVEK[ihi]) { + inhi = ihi; + ihi = i; + } else if(YVEK[i] > YVEK[inhi]) { + if(i != ihi) inhi = i; + } + } + + // Check convergence + double rtol = 2.0 * fabs(YVEK[ihi] - YVEK[ilo]) / + (fabs(YVEK[ihi]) + fabs(YVEK[ilo]) + TINY); + + if(rtol < FTOL) { + // Put best point in slot 0 + double swap = YVEK[0]; + YVEK[0] = YVEK[ilo]; + YVEK[ilo] = swap; + + for(int j = 0; j < ndim; j++) { + swap = PMAT(0, j); + PMAT(0, j) = PMAT(ilo, j); + PMAT(ilo, j) = swap; + } + + break; + } + + // Check iteration limit + if(iter >= ITMAX) { + erro = 1; + break; + } + + iter += 2; + + // Try reflection + double ytry = amotry(PMAT, YVEK, PSUM, ihi, -1.0, + X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V); + + if(ytry <= YVEK[ilo]) { + // Try expansion + ytry = amotry(PMAT, YVEK, PSUM, ihi, 2.0, + X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V); + } else if(ytry >= YVEK[inhi]) { + // Try contraction + double ysave = YVEK[ihi]; + ytry = amotry(PMAT, YVEK, PSUM, ihi, 0.5, + X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V); + + if(ytry >= ysave) { + // Shrink simplex + for(int i = 0; i < mp; i++) { + if(i != ilo) { + for(int j = 0; j < ndim; j++) { + PSUM[j] = 0.5 * (PMAT(i, j) + PMAT(ilo, j)); + PMAT(i, j) = PSUM[j]; + } + YVEK[i] = alfunc(PSUM, X, Y, M, MU, LAMBDA, + CONST1, CONST2, U, V); + } + } + iter += ndim; + } + } else { + iter -= 1; + } + } + + return List::create(Named("PMAT") = PMAT, + Named("YVEK") = YVEK, + Named("ITER") = iter, + Named("ERRO") = erro); +} + +// ========== MAIN WRAPPER ========== + +// [[Rcpp::export]] +List optimize_spline(NumericVector BVEC, + NumericMatrix X, + NumericVector Y, + NumericMatrix M, + NumericVector MU, + double LAMBDA, + double CONST1, + double CONST2, + int U, int V, + double TAU, + double FTOL, + double TINY, + int ITMAX) { + + int p = X.ncol(); + int andim = (U + V) * p; + int erro = 0; + + // ========== STAGE 1 ========== + + // Create simplex + NumericMatrix AP1 = varset(BVEC, U, V, p, TAU); + + // Evaluate at each vertex + NumericVector AY1(andim + 1); + for(int i = 0; i <= andim; i++) { + NumericVector tempvk(andim); + for(int j = 0; j < andim; j++) { + tempvk[j] = AP1(i, j); + } + AY1[i] = alfunc(tempvk, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V); + } + + // Run amoeba + List result1 = amoeba(AP1, AY1, FTOL, ITMAX, + X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V, TINY); + + NumericMatrix PMAT1 = result1["PMAT"]; + NumericVector YVEK1 = result1["YVEK"]; + int iter1 = result1["ITER"]; + erro = result1["ERRO"]; + + if(erro != 0) { + NumericVector bvec_out(andim); + for(int j = 0; j < andim; j++) { + bvec_out[j] = PMAT1(0, j); + } + + return List::create(Named("LOSS") = YVEK1[0], + Named("BVEC") = bvec_out, + Named("LOSS1") = YVEK1[0], + Named("ITER1") = iter1, + Named("ITER") = 0, + Named("ERRO") = erro); + } + + double loss1 = YVEK1[0]; + + // ========== STAGE 2 ========== + + // Extract best from stage 1 + NumericVector BVEC2(andim); + for(int j = 0; j < andim; j++) { + BVEC2[j] = PMAT1(0, j); + } + + // Create new simplex + NumericMatrix AP2 = varset(BVEC2, U, V, p, TAU); + + // Evaluate at each vertex + NumericVector AY2(andim + 1); + for(int i = 0; i <= andim; i++) { + NumericVector tempvk(andim); + for(int j = 0; j < andim; j++) { + tempvk[j] = AP2(i, j); + } + AY2[i] = alfunc(tempvk, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V); + } + + // Run amoeba again + List result2 = amoeba(AP2, AY2, FTOL, ITMAX, + X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V, TINY); + + NumericMatrix PMAT2 = result2["PMAT"]; + NumericVector YVEK2 = result2["YVEK"]; + int iter2 = result2["ITER"]; + erro = result2["ERRO"]; + + double loss = YVEK2[0]; + + // Extract final BVEC + NumericVector bvec_out(andim); + for(int j = 0; j < andim; j++) { + bvec_out[j] = PMAT2(0, j); + } + + return List::create(Named("LOSS") = loss, + Named("BVEC") = bvec_out, + Named("LOSS1") = loss1, + Named("ITER1") = iter1, + Named("ITER") = iter2, + Named("ERRO") = erro); +} + + + + + +// ========== ADDITIONAL FUNCTIONS (NOT USED IN OPTIMIZER) ========== + +//These two functions (mipd and inddup) are not used in the Nelder–Mead algorithm or in biplot.spline.axis. They are rewritten to align with the original Fortran code (although they may be used elsewhere in the library). + +double mipd(NumericVector X, NumericVector Y) { + int n = X.length(); + double d = 0.0; + int count = 0; + + for(int i = 1; i < n; i++) { + for(int j = 0; j < i; j++) { + double dx = X[i] - X[j]; + double dy = Y[i] - Y[j]; + d += sqrt(dx * dx + dy * dy); + count++; + } + } + + return d / count; +} + + +LogicalVector inddup(NumericVector X, NumericVector Y, + NumericVector RW, double FRAC) { + int n = X.length(); + double xtol = FRAC * (RW[1] - RW[0]); + double ytol = FRAC * (RW[3] - RW[2]); + + LogicalVector DUP(n); + DUP[0] = false; + + for(int i = 1; i < n; i++) { + DUP[i] = false; + for(int j = 0; j < i; j++) { + double dx = fabs(X[i] - X[j]); + double dy = fabs(Y[i] - Y[j]); + + if(dx < xtol && dy < ytol) { + DUP[i] = true; + break; + } + } + } + + return DUP; +} \ No newline at end of file diff --git a/src/LnjTinyNew.f b/src/LnjTinyNew.f deleted file mode 100644 index 8e021d0..0000000 --- a/src/LnjTinyNew.f +++ /dev/null @@ -1,512 +0,0 @@ - SUBROUTINE MIPD(X,Y,N,D) - IMPLICIT DOUBLE PRECISION(A-H,O-Z) - DIMENSION X(N), Y(N) - D = 0.D0 - DIV = DBLE(N*(N-1)/2) - DO 23284 I = 2,N - DO 23286 J = 1,I-1 - D = D + SQRT((X(I)-X(J))**2 + (Y(I)-Y(J))**2) -23286 CONTINUE -23284 CONTINUE - D = D/DIV - RETURN - END -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - - SUBROUTINE INDDUP(X,Y,N,RW,FRAC,DUP) - IMPLICIT DOUBLE PRECISION(A-H,O-Z) - LOGICAL DUP(N) - DIMENSION X(N), Y(N), RW(4) - XTOL = FRAC*(RW(2)-RW(1)) - YTOL = FRAC*(RW(4)-RW(3)) - DUP(1) = .FALSE. - DO 23182 I = 2,N - DUP(I) = .FALSE. - DO 23184 J = 1,I-1 - DX = ABS(X(I)-X(J)) - DY = ABS(Y(I)-Y(J)) - IF(.NOT.(DX .LT. XTOL .AND. DY .LT. YTOL))GOTO 23186 - DUP(I) = .TRUE. - GOTO 23185 -23186 CONTINUE -23184 CONTINUE -23185 CONTINUE -23182 CONTINUE - RETURN - END -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - SUBROUTINE MATM ( A1, A2B1, B2, A, B, OUT) - -C This subroutine performs matrix multiplication. -C This should be improved with optimised code (such as from -C Linpack, etc.) - IMPLICIT NONE - INTEGER A1, A2B1, B2 - DOUBLE PRECISION A(A1,A2B1), B(A2B1,B2), OUT(A1,B2) - -C DUMMIES - - INTEGER I, J, K - - DO 300,J=1,B2 - DO 200,I=1,A1 - OUT(I,J)=0 - DO 100,K=1,A2B1 - OUT(I,J)=OUT(I,J)+A(I,K)*B(K,J) -100 CONTINUE - -200 CONTINUE - -300 CONTINUE - - END - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - SUBROUTINE EXIND (Z, X, N, P, NMU, IND) - -C This subroutine returns the indices of EXact shortest distances. -C Tested against R output -- works - IMPLICIT NONE - INTEGER N, P, NMU, IND(N) - DOUBLE PRECISION Z(NMU,P), X(N,P) - -C DATA Ind/n*0.0/ - -C DUMMIES - - INTEGER I, J, K - DOUBLE PRECISION TEMP1, RMIN - - DO 300, I=1,N - Ind(I)=0 - RMIN = 1.0D+09 - DO 200, J=1,NMU - TEMP1=0.0 - DO 100, K=1,P - TEMP1=TEMP1+( X(I,K)-Z(J,K) )**2 -100 CONTINUE - IF (TEMP1 .LT. RMIN) THEN - RMIN=TEMP1 - IND(I)=J - END IF -200 CONTINUE -300 CONTINUE - - END - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - DOUBLE PRECISION FUNCTION ALFUNC (BVEC, X, N, P, NMU, IND, MU, - + PRED, LAMBDA, Y, CONST1, CONST2, U, V, M) - -C This subroutine calculates the loss function. -C const1 and const2 are the two scaling constants used in the -C loss function... -C ... they are calculated once in R itself. - IMPLICIT NONE - INTEGER N, P, NMU, IND(N), U, V - DOUBLE PRECISION LAMBDA - DOUBLE PRECISION X(N,P), MU(NMU), PRED(NMU), - + Y(N), CONST1, CONST2, M(NMU,U+V), BVEC((U+V)*P) -C NOTE Bvec is a vector here - -C common block - -C COMMON /CSHARE/X, N, P, NMU, IND, MU, PRED, LAMBDA, Y -C + CONST1, CONST2, U, V, M, BVEC -C DUMMIES - - INTEGER I, J - DOUBLE PRECISION Z(NMU, P), TEMP1, TEMP2 - -CCCCCCCCCCCC - - ALFUNC=0.0 - - CALL MATM( NMU, U+V, P, M, BVEC, Z) - - CALL EXIND(Z,X,N,P,NMU,IND) - - DO 100, I=1,N - PRED(I)=MU(IND(I)) -100 CONTINUE - -CCCCCCCCCCCC - - TEMP1=0.0 - DO 500, I=1,N - TEMP1=TEMP1+ ( Y(I)-PRED(I) )**2 -500 CONTINUE - - ALFUNC= TEMP1/CONST1 - - - IF (LAMBDA .GT. 0.0) THEN - TEMP2=0.0 - DO 700, J=1,P - DO 600, I=2,(NMU-1) - TEMP2=TEMP2+( Z(I-1,J)-2*Z(I,J)+Z(I+1,J) )**2 -600 CONTINUE -700 CONTINUE - ALFUNC=ALFUNC+LAMBDA*TEMP2/CONST2 - END IF - -C Checked Fortran Loss against R. Matches. - - END - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - SUBROUTINE VARSET (BVEC, AP, U, V, P, TAU) - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CC This subtroutine sets Ap and Ay for subroutine L. Dirty work. -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - IMPLICIT NONE - INTEGER U, V, P - - DOUBLE PRECISION AP((U+V)*P+1,(U+V)*P), BVEC((U+V)*P), TAU - - INTEGER I,J, ANDIM - - ANDIM=(U+V)*P - -C The first Ap is Bvec: - DO 100, J=1,ANDIM - AP(1,J)=BVEC(J) -100 CONTINUE - -C Adding a number tau to a successive element in each successive -C row of Ap: - DO 300, I=2,(ANDIM+1) - DO 200, J=1,ANDIM - AP(I,J)=BVEC(J) -200 CONTINUE - AP(I,I-1)=AP(I,I-1)+TAU -300 CONTINUE - - END -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - - SUBROUTINE L (LOSS, X, N, P, NMU, IND, MU, PRED, LAMBDA, Y, - +CONST1, CONST2, U, V, M, BVEC, TAU, AY, TEMPVK, ITER, FTOL, - +LOSS1, ITER1, ERRO, CONST3, ITMAX) - - IMPLICIT NONE - -C iter1 sends back the total number of iterations in the first call -C to amoeba; iter sends back the total number of iterations in the -C second call to amoeba. - - EXTERNAL ALFUNC - - DOUBLE PRECISION ALFUNC - -!CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -!C This is the subroutine actually called from within R. -!CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - INTEGER N, P, NMU, IND(N), U, V, ITER, ITER1, ERRO, ITMAX - DOUBLE PRECISION TAU, LAMBDA, CONST3 - DOUBLE PRECISION LOSS, LOSS1, X(N,P), MU(NMU), PRED(NMU), FTOL - DOUBLE PRECISION Y(N), CONST1, CONST2, M(NMU,U+V), BVEC((U+V)*P) - -C NOTE Bvec is a vector here. - -C ALFUNC IS THE NAME OF THE FUNCTION THAT CALCULATES THE LOSS. - - INTEGER ANDIM - DOUBLE PRECISION AP((U+V)*P+1,(U+V)*P), AY((U+V)*P+1) - -C The Ap's are the N+1=(u+v)*p+1 points defining the simplex; -C it's used in amoeba. -C The Ay's are the N+1=(u+v)*p+1 loss function evaluations for each -C of the Ap rows; it's used in amoeba. -C Ap1st stores the first set of Aps. Except for the first row, -C they are reused for the second call to amoeba. -C LOSS1 indicates the loss after the first call of amoeba. - - - INTEGER I, J - DOUBLE PRECISION TEMPVK((U+V)*P) -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - ERRO=0 - ANDIM=(U+V)*P - -C Setting Ap before the first call to amoeba: - - CALL VARSET (BVEC, AP, U, V, P, TAU) - -C Evaluating Ap to get Ay: - - DO 200, I=1,(ANDIM+1) - DO 100, J=1,ANDIM - TEMPVK(J)=AP(I,J) -100 CONTINUE -C Evaluating the successive Ay's: - AY(I)=ALFUNC(TEMPVK, X, N, P, NMU, IND, MU, PRED, LAMBDA, Y, - +CONST1, CONST2, U, V, M) -200 CONTINUE -C Checked the AY(I) values against those from R (before creating -C the VARSET function). - -C Calling amoeba for the FIRST TIME: - - CALL AMOEBA(AP, AY, ANDIM+1, ANDIM, ANDIM, FTOL, ALFUNC, ITER, - +X, N, P, NMU, IND, MU, PRED, LAMBDA, Y, CONST1, CONST2, U, V, - +M, ERRO, CONST3,ITMAX) -C Error check - IF (ERRO .NE. 0) GO TO 9999 -C Saving the number of iterations in the first call to amoeba: - ITER1=ITER - -C Saving the loss from the first call to amoeba: - LOSS1=AY(1) - -C Setting Ap before the second call to amoeba: - - DO 300, J=1,ANDIM - BVEC(J)=AP(1,J) -300 CONTINUE - - CALL VARSET (BVEC, AP, U, V, P, TAU) - -C Evaluating Ap to get Ay: - - DO 500, I=1,(ANDIM+1) - DO 400, J=1,ANDIM - TEMPVK(J)=AP(I,J) -400 CONTINUE - -C Evaluating the successive Ay's: - AY(I)=ALFUNC(TEMPVK, X, N, P, NMU, IND, MU, PRED, - +LAMBDA, Y, CONST1, CONST2, U, V, M) -500 CONTINUE - -C Checked the AY(I) values against those from R -C (before creating the VARSET function). - -C Calling amoeba for the SECOND time: - - CALL AMOEBA(AP, AY, ANDIM+1, ANDIM, ANDIM, FTOL, ALFUNC, ITER, - +X, N, P, NMU, IND, MU, PRED, LAMBDA, Y, CONST1, CONST2, U, - +V, M, ERRO, CONST3,ITMAX) - -C Error check - IF (ERRO .NE. 0) GO TO 9999 - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - -C RETURN VALUES: (carefully check) - - LOSS=AY(1) - DO 600, J=1, ANDIM - BVEC(J)=AP(1,J) -600 CONTINUE - -9999 END - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - SUBROUTINE AMOEBA (PMAT, YVEK, MP, NP, NDIM, FTOL, FUNK, ITER, - +QX, QN, QP, QNMU, QIND, QMU, QPRED, QLAMBD, QY, QCONS1, QCONS2, - +QU, QV, QM, ERRO, TINY, ITMAX) -C second and third lines of arguments added myself. - -C MP is the number of rows of PMAT. -C NP is the number of columns of PMAT. - - IMPLICIT NONE - INTEGER NMAX, ITMAX, MP, NP, NDIM, ITER, ERRO - DOUBLE PRECISION TINY, CONST3 - PARAMETER (NMAX=20) - - DOUBLE PRECISION PMAT(MP, NP), YVEK(MP), FTOL, FUNK - - EXTERNAL FUNK - - INTEGER QN, QP, QNMU, QIND(QN), QU, QV - DOUBLE PRECISION QX(QN, QP), QMU(QNMU), QPRED(QNMU), QLAMBD, - + QY(QN), QCONS1, QCONS2, QM(QNMU,QU+QV) - - INTEGER I, IHI, ILO, INHI, J - DOUBLE PRECISION RTOL, SUM1, SWAP, YSAVE, YTRY, PSUM(NMAX) - DOUBLE PRECISION AMOTRY - -CCCCCCCCCCCCCCCCCCCCCCCCCCC - -C USES AMOTRY, FUNK, -C Multidimensional minimization of the function funk(x) -C where x(1:ndim) is a vector -C in ndim dimensions, by the downhill simplex method of Nelder -C and Mead. The matrix -C pmat(1:ndim+1,1:ndim) is input. Its ndim+1 rows are -C ndim-dimensional vectors which are -C the vertices of the starting simplex. Also input is the vector -C yvek(1:ndim+1), whose compo- -C nents must be pre-initialized to the values of funk evaluated -C at the ndim+1 vertices (rows) -C of p; and ftol the fractional convergence tolerance to be -C achieved in the function value -C (n.b.!). On output, p and y will have been reset to ndim+1 new -C points all within ftol of -C a minimum function value, and iter gives the number of -C function evaluations taken. - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - CONST3 = 0.0001 - TINY=CONST3 - ITER=0 - ERRO=0 - IF (NDIM .GT. NMAX) THEN - ERRO=5 - GO TO 999 - END IF - -10 DO 40, J=1,NDIM - SUM1=0.0 - DO 30, I=1,NDIM+1 - SUM1=SUM1+PMAT(I,J) -30 CONTINUE - PSUM(J)=SUM1 -40 CONTINUE - -50 ILO=1 - - IF (YVEK(1) .GT. YVEK(2)) THEN - IHI=1 - INHI=2 - ELSE - IHI=2 - INHI=1 - END IF - - DO 60, I=1,NDIM+1 - IF(YVEK(I) .LE. YVEK(ILO)) ILO=I - IF(YVEK(I) .GT. YVEK(IHI)) THEN - INHI=IHI - IHI=I - ELSE IF(YVEK(I) .GT. YVEK(INHI)) THEN - IF(I .NE. IHI) INHI=I - END IF -60 CONTINUE - - RTOL=2.0*DABS(YVEK(IHI)-YVEK(ILO))/(DABS(YVEK(IHI)) - + +DABS(YVEK(ILO))+TINY) - -C Compute the fractional range from highest to lowest and return if -C satisfactory. -C If returning, put best point and value in slot 1. - - IF (RTOL .LT. FTOL) THEN - SWAP=YVEK(1) - YVEK(1)=YVEK(ILO) - YVEK(ILO)=SWAP - DO 70, J=1,NDIM - SWAP=PMAT(1,J) - PMAT(1,J)=PMAT(ILO,J) - PMAT(ILO,J)=SWAP -70 CONTINUE - RETURN - END IF - IF (ITER .GE. ITMAX) THEN - ERRO=1 - GO TO 999 - END IF -C 'ITMAX exceeded in AMOEBA' - ITER=ITER+2 -C Begin a new iteration. First extrapolate by a factor -1 through the -C face of the simplex across from the highest point i.e. reflect simplex -C from highest point - - YTRY=AMOTRY(PMAT, YVEK, PSUM, MP, NP, NDIM, FUNK, IHI, -1.0D+00, - + QX, QN, QP,QNMU, QIND, QMU, QPRED, QLAMBD, QY, QCONS1, QCONS2, - + QU, QV, QM) - - IF (YTRY .LE. YVEK(ILO)) THEN - YTRY=AMOTRY(PMAT, YVEK, PSUM, MP, NP, NDIM, FUNK, IHI, 2.0D+00, - + QX, QN, QP,QNMU, QIND, QMU, QPRED, QLAMBD, QY, QCONS1, QCONS2, - + QU, QV, QM) - ELSE IF (YTRY .GE. YVEK(INHI)) THEN - YSAVE=YVEK(IHI) - YTRY=AMOTRY(PMAT, YVEK, PSUM, MP, NP, NDIM,FUNK, IHI, 0.5D+00, - + QX, QN, QP, QNMU, QIND, QMU, QPRED, QLAMBD, QY, QCONS1, - + QCONS2, + QU, QV, QM) - - IF (YTRY .GE. YSAVE) THEN - DO 90, I=1,NDIM+1 - IF(I .NE. ILO) THEN - DO 80, J=1,NDIM - PSUM(J)=0.5*(PMAT(I,J)+PMAT(ILO,J)) - PMAT(I,J)=PSUM(J) -80 CONTINUE - YVEK(I)=FUNK(PSUM, QX, QN, QP, QNMU, QIND, QMU, QPRED, - + QLAMBD, QY, QCONS1, QCONS2, QU, QV, QM) - END IF -90 CONTINUE - - ITER=ITER+NDIM - - GOTO 10 - - END IF - ELSE - ITER=ITER-1 - END IF - GOTO 50 -999 END - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - DOUBLE PRECISION FUNCTION AMOTRY(PMAT, YVEK, PSUM, MP, NP, NDIM, - + FUNK,IHI, FAC, - + QX, QN, QP, QNMU, QIND, QMU, QPRED, QLAMBD, - + QY, QCONS1, QCONS2, QU, QV, QM) - - IMPLICIT NONE - INTEGER IHI, MP, NDIM, NP, NMAX - DOUBLE PRECISION FAC, PMAT(MP,NP), PSUM(NP), YVEK(MP), FUNK - - PARAMETER (NMAX=20) - EXTERNAL FUNK -C Extrapolates by a factor FAC through the face of the simplex across -C from the highest point, tries it, and replaces the high point if -C the new point is better - INTEGER J, QN, QP, QNMU, QIND(QN), QU, QV - DOUBLE PRECISION FAC1, FAC2, YTRY, PTRY(NMAX) - DOUBLE PRECISION QLAMBD, QX(QN,QP), QMU(QNMU) - DOUBLE PRECISION QPRED(QNMU), QY(QN), QCONS1, QCONS2 - DOUBLE PRECISION QM(QNMU,QU+QV) - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - FAC1=(1.0-FAC)/NDIM - FAC2=FAC1-FAC - DO 10, J=1,NDIM - PTRY(J)=PSUM(J)*FAC1-PMAT(IHI,J)*FAC2 -10 CONTINUE - - YTRY=FUNK(PTRY, QX, QN, QP, QNMU, QIND, QMU, QPRED, QLAMBD, QY, - + QCONS1, QCONS2, QU, QV, QM) - - IF (YTRY .LT. YVEK(IHI)) THEN - YVEK(IHI)=YTRY - DO 20, J=1,NDIM - PSUM(J)=PSUM(J)-PMAT(IHI,J)+PTRY(J) - PMAT(IHI,J)=PTRY(J) -20 CONTINUE - END IF - AMOTRY=YTRY - - END - - diff --git a/src/LnjTinyNew.o b/src/LnjTinyNew.o index 2d3b68c..ba85473 100644 Binary files a/src/LnjTinyNew.o and b/src/LnjTinyNew.o differ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..3d9eb9e --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,67 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// alfunc +double alfunc(NumericVector BVEC, NumericMatrix X, NumericVector Y, NumericMatrix M, NumericVector MU, double LAMBDA, double CONST1, double CONST2, int U, int V); +RcppExport SEXP _biplotEZ_alfunc(SEXP BVECSEXP, SEXP XSEXP, SEXP YSEXP, SEXP MSEXP, SEXP MUSEXP, SEXP LAMBDASEXP, SEXP CONST1SEXP, SEXP CONST2SEXP, SEXP USEXP, SEXP VSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type BVEC(BVECSEXP); + Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); + Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP); + Rcpp::traits::input_parameter< NumericMatrix >::type M(MSEXP); + Rcpp::traits::input_parameter< NumericVector >::type MU(MUSEXP); + Rcpp::traits::input_parameter< double >::type LAMBDA(LAMBDASEXP); + Rcpp::traits::input_parameter< double >::type CONST1(CONST1SEXP); + Rcpp::traits::input_parameter< double >::type CONST2(CONST2SEXP); + Rcpp::traits::input_parameter< int >::type U(USEXP); + Rcpp::traits::input_parameter< int >::type V(VSEXP); + rcpp_result_gen = Rcpp::wrap(alfunc(BVEC, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V)); + return rcpp_result_gen; +END_RCPP +} +// optimize_spline +List optimize_spline(NumericVector BVEC, NumericMatrix X, NumericVector Y, NumericMatrix M, NumericVector MU, double LAMBDA, double CONST1, double CONST2, int U, int V, double TAU, double FTOL, double TINY, int ITMAX); +RcppExport SEXP _biplotEZ_optimize_spline(SEXP BVECSEXP, SEXP XSEXP, SEXP YSEXP, SEXP MSEXP, SEXP MUSEXP, SEXP LAMBDASEXP, SEXP CONST1SEXP, SEXP CONST2SEXP, SEXP USEXP, SEXP VSEXP, SEXP TAUSEXP, SEXP FTOLSEXP, SEXP TINYSEXP, SEXP ITMAXSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< NumericVector >::type BVEC(BVECSEXP); + Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); + Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP); + Rcpp::traits::input_parameter< NumericMatrix >::type M(MSEXP); + Rcpp::traits::input_parameter< NumericVector >::type MU(MUSEXP); + Rcpp::traits::input_parameter< double >::type LAMBDA(LAMBDASEXP); + Rcpp::traits::input_parameter< double >::type CONST1(CONST1SEXP); + Rcpp::traits::input_parameter< double >::type CONST2(CONST2SEXP); + Rcpp::traits::input_parameter< int >::type U(USEXP); + Rcpp::traits::input_parameter< int >::type V(VSEXP); + Rcpp::traits::input_parameter< double >::type TAU(TAUSEXP); + Rcpp::traits::input_parameter< double >::type FTOL(FTOLSEXP); + Rcpp::traits::input_parameter< double >::type TINY(TINYSEXP); + Rcpp::traits::input_parameter< int >::type ITMAX(ITMAXSEXP); + rcpp_result_gen = Rcpp::wrap(optimize_spline(BVEC, X, Y, M, MU, LAMBDA, CONST1, CONST2, U, V, TAU, FTOL, TINY, ITMAX)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_biplotEZ_alfunc", (DL_FUNC) &_biplotEZ_alfunc, 10}, + {"_biplotEZ_optimize_spline", (DL_FUNC) &_biplotEZ_optimize_spline, 14}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_biplotEZ(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/biplotEZ.dll b/src/biplotEZ.dll index 404971a..1f615b8 100644 Binary files a/src/biplotEZ.dll and b/src/biplotEZ.dll differ diff --git a/tests/temp/compare_Rcpp_FORTRAN.R b/tests/temp/compare_Rcpp_FORTRAN.R new file mode 100644 index 0000000..c384eb1 --- /dev/null +++ b/tests/temp/compare_Rcpp_FORTRAN.R @@ -0,0 +1,366 @@ +library(Rcpp) +library(splines) +#chang this according to your path +setwd("C:\\Users\\29827094\\Documents\\GitHub\\biplotEZ") +# Compile Rcpp code (has alfunc_rcpp and optimize_spline_rcpp) +sourceCpp("src\\LnjTinyNew.cpp") + +# Load FORTRAN code +dyn.load("tests\\temp\\fortran.dll") + +# ======================================== +# I fixed all the randomness in this code (using seed), so you should see a similar result for Rcpp and Fortran. +# ======================================== + +biplot.spline.axis <- function(j, X, Y, means, sd, n.int, + spline.control, dmeth=0, + optim_method = "rcpp", + fix_seed = TRUE, # NEW PARAMETER + base_seed = NULL, # NEW PARAMETER + sampling_seed = NULL, # NEW: separate seed for sampling + ...) { + n <- nrow(X) + p <- ncol(X) + + # Set seed BEFORE sampling if provided + #new + if (!is.null(sampling_seed)) { + set.seed(sampling_seed) + } + #I dont know why we have this: + if (n > 103) { + my.sample <- sample(1:n, size=103, replace=F) + X <- X[my.sample,] + Y <- Y[my.sample,] + n <- nrow(X) + } + + # Extract control parameters + tau <- spline.control$tau + nmu <- spline.control$nmu + u <- spline.control$u + v <- spline.control$v + lambda <- spline.control$lambda + smallsigma <- spline.control$smallsigma + bigsigma <- spline.control$bigsigma + gamma <- spline.control$gamma + bigsigmaactivate <- spline.control$bigsigmaactivate + eps <- spline.control$eps + tiny <- spline.control$tiny + itmax <- spline.control$itmax + ftol <- spline.control$ftol + + cat("Calculating spline axis for variable", j, "using", optim_method, "\n") + + if(dmeth==1) stop("dmeth should be equal to zero or integer greater than 1 \n") + + Ytilde <- scale(scale(Y, center=FALSE, scale=1/sd), + center=-1*means, scale=FALSE) + + ytilde <- Ytilde[,j] + mutilde <- seq(from=min(ytilde), to=max(ytilde), length.out=nmu) + y <- Y[,j] + rangey <- max(y) - min(y) + mu <- seq(from=min(y)-.3*rangey, to=max(y)+.3*rangey, length.out=nmu) + markers <- (pretty(ytilde) - means[j]) / sd[j] + mu <- sort(c(mu, markers)) + mu <- unique(mu) + nmu <- length(mu) + + if (v > 0) { + knots <- seq.int(from=0, to=1, length.out=v+2)[-c(1, v+2)] + knots <- stats::quantile(y, knots) + M <- splines::bs(mu, knots=knots, degree=u, intercept=FALSE) + } else { + M <- splines::bs(mu, df=u+v, degree=u, intercept=FALSE) + } + + M <- scale(M, scale=FALSE, center=M[which.min(abs(mu)),]) + Breg <- t(solve(t(X)%*%X)%*%t(X)%*%y) + Zreg <- mu%*%Breg / sum(Breg^2) + Bvec <- as.vector(solve(t(M)%*%M)%*%t(M)%*%Zreg) + + const1 <- sum(y^2) + const2 <- sum(X^2) / (n*p) + + # ======================================== + # OPTIMIZATION WRAPPER + # ======================================== + + optimtouse <- function(Bvec) { + timetemp <- proc.time()[3] + + if (optim_method == "rcpp") { + # Use Rcpp two-stage + returned_data <- optimize_spline( + BVEC = Bvec, X = X, Y = y, M = M, MU = mu, + LAMBDA = lambda, CONST1 = const1, CONST2 = const2, + U = u, V = v, TAU = tau, FTOL = ftol, TINY = tiny, ITMAX = itmax + ) + + if(returned_data$ERRO > 0) { + cat(" Error code:", returned_data$ERRO, "\n") + } + + aa <- list( + BestValue = returned_data$LOSS, + BestSolution = returned_data$BVEC, + ConvergenceCode = returned_data$ERRO, + iter1 = returned_data$ITER1, + iter = returned_data$ITER, + TimeTaken = proc.time()[3] - timetemp + ) + + } else if (optim_method == "fortran") { + # Use FORTRAN two-stage + returned_data <- .Fortran("l", + LOSS = as.double(0), + X = as.double(X), + N = as.integer(n), + P = as.integer(p), + NMU = as.integer(nmu), + IND = as.integer(rep(0, n)), + MU = as.double(mu), + PRED = as.double(rep(0, n)), + LAMBDA = as.double(lambda), + Y = as.double(y), + CONST1 = as.double(const1), + CONST2 = as.double(const2), + U = as.integer(u), + V = as.integer(v), + M = as.double(M), + BVEC = as.double(Bvec), + TAU = as.double(tau), + AY = as.double(rep(0, (u+v)*p+1)), + TEMPVK = as.double(rep(0, (u+v)*p)), + ITER = as.integer(0), + FTOL = as.double(ftol), + LOSS1 = as.double(0), + ITER1 = as.integer(0), + ERRO = as.integer(0), + CONST3 = as.double(tiny), + ITMAX = as.integer(itmax) + ) + + if(returned_data$ERRO > 0) { + cat(" Error code:", returned_data$ERRO, "\n") + } + + aa <- list( + BestValue = returned_data$LOSS, + BestSolution = returned_data$BVEC, + ConvergenceCode = returned_data$ERRO, + iter1 = returned_data$ITER1, + iter = returned_data$ITER, + TimeTaken = proc.time()[3] - timetemp + ) + } + + aa + } + + EuclidDist2 <- function(X, Y) { + n <- nrow(X) + m <- nrow(Y) + bx <- rowSums(X^2) + by <- rowSums(Y^2) + outer(bx, by, FUN = "+") - 2 * X %*% t(Y) + } + + # Variable initialization + outBestValues <- rep(NA, gamma+1) + outBestSolutions <- matrix(nrow=2*(u+v), ncol=gamma+1) + outTimeTaken <- rep(NA, gamma+1) + BestSolutionsFrequency <- rep(NA, gamma+1) + BestSolutionsIndices <- rep(NA, gamma+1) + + # ======================================== + # PRE-GENERATE ALL RANDOM PERTURBATIONS + #new + # ======================================== + if (fix_seed && !is.null(base_seed)) { + set.seed(base_seed) + } + + # Pre-generate random perturbations for all gamma iterations + random_perturbations <- list() + for (gammacounter in 2:(gamma+1)) { + # Generate perturbation for this iteration + # We don't know yet if it will use smallsigma or bigsigma, + # so we pre-generate both and decide later + random_perturbations[[gammacounter]] <- list( + small = stats::rnorm((u+v)*2, mean=0, sd=smallsigma), + big = stats::rnorm((u+v)*2, mean=0, sd=bigsigma) + ) + } + + # Initial coefficients + temp <- optimtouse(Bvec) + outBestValues[1] <- temp$BestValue + outBestSolutions[,1] <- temp$BestSolution + outTimeTaken[1] <- temp$TimeTaken + BestSolutionsFrequency[1] <- 1 + BestSolutionsIndices[1] <- 1 + DistinctSolutions <- 1 + PreviousBestSolution <- NA + nSameSolutionConsecutively <- 0 + + # Multi-start optimization + for (gammacounter in 2:(gamma+1)) { + # Use pre-generated perturbations + if (nSameSolutionConsecutively >= bigsigmaactivate) { + perturbation <- random_perturbations[[gammacounter]]$big + } else { + perturbation <- random_perturbations[[gammacounter]]$small + } + + temp <- optimtouse(outBestSolutions[,which.min(outBestValues)] + perturbation) + + outTimeTaken[gammacounter] <- temp$TimeTaken + tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution, nrow=1), + t(outBestSolutions[,1:DistinctSolutions])) + + if (any(tempSquaredDistances < eps)) { + BestSolutionsIndices[gammacounter] <- tempAA <- which.min(tempSquaredDistances) + BestSolutionsFrequency[tempAA] <- BestSolutionsFrequency[tempAA] + 1 + if (!is.na(PreviousBestSolution) && tempAA == PreviousBestSolution) { + nSameSolutionConsecutively <- nSameSolutionConsecutively + 1 + } else { + PreviousBestSolution <- tempAA + nSameSolutionConsecutively <- 0 + } + } else { + DistinctSolutions <- DistinctSolutions + 1 + outBestValues[DistinctSolutions] <- temp$BestValue + outBestSolutions[,DistinctSolutions] <- temp$BestSolution + BestSolutionsFrequency[DistinctSolutions] <- 1 + BestSolutionsIndices[gammacounter] <- DistinctSolutions + nSameSolutionConsecutively <- 0 + } + } + + axis.points <- cbind(M %*% matrix(outBestSolutions[,which.min(outBestValues)], ncol=2), + mu, 0) + + for (i in 1:nrow(axis.points)) { + if (any(zapsmall(axis.points[i,3] - markers) == 0)) { + axis.points[i, 4] <- 1 + } + } + + axis.points[,3] <- axis.points[,3] * sd[j] + means[j] + + # Return with attributes + attr(axis.points, "loss") <- min(outBestValues, na.rm=TRUE) + attr(axis.points, "bvec") <- outBestSolutions[,which.min(outBestValues)] + attr(axis.points, "total_time") <- sum(outTimeTaken, na.rm=TRUE) + attr(axis.points, "distinct_solutions") <- DistinctSolutions + + axis.points +} + +# ======================================== +# EXAMPLE 1: CALCULATE LOSS FUNCTION +# ======================================== + +set.seed(123) + +n <- 150 +p <- 2 +Y <- matrix(rnorm(n * 2), ncol = 2) +Y[, 2] <- Y[, 1] + rnorm(n, 0, 0.5) + +means <- colMeans(Y) +sd <- apply(Y, 2, sd) +X <- Y[, 1:2] + matrix(rnorm(n * p, 0, 0.3), ncol = p) + +spline.control <- list( + tau = 0.01, nmu = 10, u = 3, v = 0, lambda = 0.1, + smallsigma = 0.1, bigsigma = 1.0, gamma = 3, bigsigmaactivate = 2, + eps = 1e-6, tiny = 1e-4, itmax = 500, ftol = 1e-4 +) + +j <- 1 +y <- Y[, j] +rangey <- max(y) - min(y) +mu <- seq(from=min(y)-.3*rangey, to=max(y)+.3*rangey, length.out=10) +M <- splines::bs(mu, df=3, degree=3, intercept=FALSE) +M <- scale(M, scale=FALSE, center=M[which.min(abs(mu)),]) + +const1 <- sum(y^2) +const2 <- sum(X^2) / (n*p) +lambda <- 0.1 +u <- 3 +v <- 0 + +# Test Bvec +Bvec_test <- rnorm((u+v)*p) + + +loss_value <- alfunc(Bvec_test, X, y, M, mu, lambda, const1, const2, u, v) +cat("Test Bvec:", round(Bvec_test, 4), "\n") +cat("Loss value:", loss_value, "\n\n") + +# ======================================== +# EXAMPLE 2: COMPARE RCPP VS FORTRAN +# ======================================== + + +# WITH FIXED SEED FOR REPRODUCIBILITY + +COMPARISON_SEED <- 999 # Use same seed for both methods +SAMPLING_SEED <- 777 # NEW: Separate seed for data sampling + +# Method 1: Rcpp +cat("1. Running Rcpp...\n") +time_rcpp <- system.time({ + result_rcpp <- biplot.spline.axis( + j = 1, X = X, Y = Y, means = means, sd = sd, + n.int = NULL, spline.control = spline.control, + dmeth = 0, optim_method = "rcpp", + fix_seed = TRUE, base_seed = COMPARISON_SEED, + sampling_seed = SAMPLING_SEED # NEW: Control sampling + ) +}) + +# Method 2: FORTRAN +cat("\n2. Running FORTRAN...\n") +time_fortran <- system.time({ + result_fortran <- biplot.spline.axis( + j = 1, X = X, Y = Y, means = means, sd = sd, + n.int = NULL, spline.control = spline.control, + dmeth = 0, optim_method = "fortran", + fix_seed = TRUE, base_seed = COMPARISON_SEED, + sampling_seed = SAMPLING_SEED # NEW: Control sampling + ) +}) + +# ======================================== +# COMPARE RESULTS +# ======================================== + +cat("\n========================================\n") +cat("COMPARISON RESULTS\n") +cat("========================================\n\n") + +cat("TIMING (seconds):\n") +cat(sprintf(" %-15s: %7.3f\n", "Rcpp", time_rcpp[3])) +cat(sprintf(" %-15s: %7.3f\n", "FORTRAN", time_fortran[3])) +cat(sprintf(" %-15s: %7.3f\n", "Speedup", time_fortran[3] / time_rcpp[3])) +cat("\n") + +cat("LOSS VALUES:\n") +loss_rcpp <- attr(result_rcpp, "loss") +loss_fortran <- attr(result_fortran, "loss") + +cat(sprintf(" %-15s: %.12f\n", "Rcpp", loss_rcpp)) +cat(sprintf(" %-15s: %.12f\n", "FORTRAN", loss_fortran)) +cat("\n") + +cat("COEFFICIENT VECTORS (bvec):\n") +bvec_rcpp <- attr(result_rcpp, "bvec") +bvec_fortran <- attr(result_fortran, "bvec") + +cat(" Rcpp: ", round(bvec_rcpp, 6), "\n") +cat(" FORTRAN: ", round(bvec_fortran, 6), "\n") +cat("\n") diff --git a/tests/temp/fortran.R b/tests/temp/fortran.R new file mode 100644 index 0000000..0f68db6 --- /dev/null +++ b/tests/temp/fortran.R @@ -0,0 +1,527 @@ + +library(splines) + +# Load FORTRAN code +dyn.load("C:\\Users\\29827094\\Documents\\GitHub\\biplotEZ\\src\\biplotEZ.dll") + + +biplot.spline.axis <- function(j, X, Y, means, sd, n.int, + spline.control, dmeth=0, ...) { + + n <- nrow(X) + p <- ncol(X) + if (n > 103) { + my.sample <- sample(1:n, size=103, replace=F) + X <- X[my.sample,] + Y <- Y[my.sample,] + n <- nrow(X) + } + + # Extract control parameters + tau <- spline.control$tau + nmu <- spline.control$nmu + u <- spline.control$u + v <- spline.control$v + lambda <- spline.control$lambda + smallsigma <- spline.control$smallsigma + bigsigma <- spline.control$bigsigma + gamma <- spline.control$gamma + bigsigmaactivate <- spline.control$bigsigmaactivate + eps <- spline.control$eps + tiny <- spline.control$tiny + itmax <- spline.control$itmax + ftol <- spline.control$ftol + +# +# if (v > 0) { +# itmax <- itmax * (1 + v) +# ftol <- ftol * 10 +# } + + cat("Calculating spline axis for variable", j) + + if(dmeth==1) stop("dmeth should be equal to zero or integer greater than 1 \n") + + Ytilde <- scale(scale(Y, center=FALSE, scale=1/sd), + center=-1*means, scale=FALSE) + + ytilde <- Ytilde[,j] + mutilde <- seq(from=min(ytilde), to=max(ytilde), length.out=nmu) + y <- Y[,j] + rangey <- max(y) - min(y) + mu <- seq(from=min(y)-.3*rangey, to=max(y)+.3*rangey, length.out=nmu) + markers <- (pretty(ytilde) - means[j]) / sd[j] + mu <- sort(c(mu, markers)) + mu <- unique(mu) + nmu <- length(mu) + + if (v > 0) { + knots <- seq.int(from=0, to=1, length.out=v+2)[-c(1, v+2)] + knots <- stats::quantile(y, knots) + M <- splines::bs(mu, knots=knots, degree=u, intercept=FALSE) + } else { + M <- splines::bs(mu, df=u+v, degree=u, intercept=FALSE) + } + + M <- scale(M, scale=FALSE, center=M[which.min(abs(mu)),]) + Breg <- t(solve(t(X)%*%X)%*%t(X)%*%y) + Zreg <- mu%*%Breg / sum(Breg^2) + Bvec <- as.vector(solve(t(M)%*%M)%*%t(M)%*%Zreg) + + const1 <- sum(y^2) + const2 <- sum(X^2) / (n*p) + + # ======================================== + #OPTIMIZATION FUNCTION + # ======================================== + + optimtouse <- function(Bvec) { + timetemp <- proc.time()[3] + + + if (length(Bvec) != 2*(u+v)) { + if (length(Bvec) < 2*(u+v)) { + Bvec <- c(Bvec, rep(0, 2*(u+v) - length(Bvec))) + } else { + Bvec <- Bvec[1:(2*(u+v))] + } + } + + # Check for NA/Inf + if (any(is.na(Bvec)) || any(is.infinite(Bvec))) { + Bvec[is.na(Bvec) | is.infinite(Bvec)] <- rnorm(sum(is.na(Bvec) | is.infinite(Bvec)), 0, 0.01) + } + + returned_data <- .Fortran("l", + LOSS = as.double(0), + X = as.double(X), + N = as.integer(n), + P = as.integer(p), + NMU = as.integer(nmu), + IND = as.integer(rep(0, n)), + MU = as.double(mu), + PRED = as.double(rep(0, n)), + LAMBDA = as.double(lambda), + Y = as.double(y), + CONST1 = as.double(const1), + CONST2 = as.double(const2), + U = as.integer(u), + V = as.integer(v), + M = as.double(M), + BVEC = as.double(Bvec), + TAU = as.double(tau), + AY = as.double(rep(0, (u+v)*p+1)), + TEMPVK = as.double(rep(0, (u+v)*p)), + ITER = as.integer(0), + FTOL = as.double(ftol), + LOSS1 = as.double(0), + ITER1 = as.integer(0), + ERRO = as.integer(0), + CONST3 = as.double(tiny), + ITMAX = as.integer(itmax) + ) + + aa <- list( + BestValue = returned_data$LOSS, + BestSolution = returned_data$BVEC, + ConvergenceCode = returned_data$ERRO, + iter1 = returned_data$ITER1, + iter = returned_data$ITER, + TimeTaken = proc.time()[3] - timetemp + ) + + aa + } + + EuclidDist2 <- function(X, Y) { + n <- nrow(X) + m <- nrow(Y) + bx <- rowSums(X^2) + by <- rowSums(Y^2) + outer(bx, by, FUN = "+") - 2 * X %*% t(Y) + } + + # Variable initialization + outBestValues <- rep(NA, gamma+1) + outBestSolutions <- matrix(nrow=2*(u+v), ncol=gamma+1) + outTimeTaken <- rep(NA, gamma+1) + BestSolutionsFrequency <- rep(NA, gamma+1) + BestSolutionsIndices <- rep(NA, gamma+1) + +# # Initial coefficients - ensure correct dimensions +# Bvec_initial <- rep(Bvec, length.out=2*(u+v)) +# if (length(Bvec) < 2*(u+v)) { +# Bvec_initial[(length(Bvec)+1):(2*(u+v))] <- rnorm(2*(u+v) - length(Bvec), 0, 0.01) +# } + +# # Scale initial values to avoid numerical issues +# if (max(abs(Bvec_initial)) > 5) { +# Bvec_initial <- Bvec_initial / max(abs(Bvec_initial)) * 1 +# } + +# # For v > 0, try starting from smaller values +# if (v > 0 && max(abs(Bvec_initial)) > 2) { +# Bvec_initial <- Bvec_initial * 0.5 +# } + + temp <- optimtouse(Bvec) + outBestValues[1] <- temp$BestValue + outBestSolutions[,1] <- temp$BestSolution + outTimeTaken[1] <- temp$TimeTaken + BestSolutionsFrequency[1] <- 1 + BestSolutionsIndices[1] <- 1 + DistinctSolutions <- 1 + PreviousBestSolution <- NA + nSameSolutionConsecutively <- 0 + + # Multi-start optimization + for (gammacounter in 2:(gamma+1)) { + if (nSameSolutionConsecutively >= bigsigmaactivate) { + temp <- optimtouse(outBestSolutions[,which.min(outBestValues)] + + stats::rnorm((u+v)*2, mean=0, sd=bigsigma)) + } else { + temp <- optimtouse(outBestSolutions[,which.min(outBestValues)] + + stats::rnorm((u+v)*2, mean=0, sd=smallsigma)) + } + + outTimeTaken[gammacounter] <- temp$TimeTaken + tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution, nrow=1), + t(outBestSolutions[,1:DistinctSolutions])) + + if (any(tempSquaredDistances < eps)) { + BestSolutionsIndices[gammacounter] <- tempAA <- which.min(tempSquaredDistances) + BestSolutionsFrequency[tempAA] <- BestSolutionsFrequency[tempAA] + 1 + if (!is.na(PreviousBestSolution) && tempAA == PreviousBestSolution) { + nSameSolutionConsecutively <- nSameSolutionConsecutively + 1 + } else { + PreviousBestSolution <- tempAA + nSameSolutionConsecutively <- 0 + } + } else { + DistinctSolutions <- DistinctSolutions + 1 + outBestValues[DistinctSolutions] <- temp$BestValue + outBestSolutions[,DistinctSolutions] <- temp$BestSolution + BestSolutionsFrequency[DistinctSolutions] <- 1 + BestSolutionsIndices[gammacounter] <- DistinctSolutions + nSameSolutionConsecutively <- 0 + } + } + + axis.points <- cbind(M %*% matrix(outBestSolutions[,which.min(outBestValues)], ncol=2), + mu, 0) + + for (i in 1:nrow(axis.points)) { + if (any(zapsmall(axis.points[i,3] - markers) == 0)) { + axis.points[i, 4] <- 1 + } + } + + axis.points[,3] <- axis.points[,3] * sd[j] + means[j] + + # Return with attributes + attr(axis.points, "loss") <- min(outBestValues, na.rm=TRUE) + attr(axis.points, "bvec") <- outBestSolutions[,which.min(outBestValues)] + attr(axis.points, "total_time") <- sum(outTimeTaken, na.rm=TRUE) + attr(axis.points, "distinct_solutions") <- DistinctSolutions + + axis.points +} + + + +biplot.spline.axis <- function(j, X, Y, means, sd, + n.int, spline.control, dmeth=0, ... ){ + n <- nrow(X) + p <- ncol(X) + if (n > 103){ + my.sample <- sample (1:n, size=103, replace=F) + X <- X[my.sample,] + Y <- Y[my.sample,] + n <- nrow(X) + } + + tau <- spline.control$tau + nmu <- spline.control$nmu + u <- spline.control$u + v <- spline.control$v + lambda <- spline.control$lambda + smallsigma <- spline.control$smallsigma + bigsigma <- spline.control$bigsigma + gamma <- spline.control$gamma + bigsigmaactivate <- spline.control$bigsigmaactivate + eps <- spline.control$eps + tiny <- spline.control$tiny + itmax <- spline.control$itmax + ftol <- spline.control$ftol + + cat ("Calculating spline axis for variable", j, "\n") + if(dmeth==1) stop("dmeth should be equal to zero or integer greater than 1 \n") + Ytilde <- scale(scale(Y, center=FALSE, scale=1/sd), center=-1*means, scale=FALSE) + + ytilde <- Ytilde[,j] + mutilde <- seq(from=min(ytilde),to=max(ytilde),length.out=nmu) + y <- Y[,j] + rangey <- max(y)-min(y) + mu <- seq(from=min(y)-.3*rangey,to=max(y)+.3*rangey,length.out=nmu) + markers <- (pretty(ytilde)-means[j])/sd[j] + mu <- sort(c(mu,markers)) + mu <- unique(mu) + nmu <- length(mu) + + if (v>0){ + knots <- seq.int(from=0,to=1,length.out=v+2)[-c(1,v+2)] + knots <- stats::quantile(y,knots) + M <- splines::bs(mu,knots=knots,degree=u,intercept=FALSE) + } else M <- splines::bs(mu,df=u+v,degree=u,intercept=FALSE) + M <- scale(M,scale=FALSE,center=M[which.min(abs(mu)),]) # To ensure that the spline passes through the origin at the calibration which represents the mean of the variable + Breg <- t(solve(t(X)%*%X)%*%t(X)%*%y) + Zreg <- mu%*%Breg/sum(Breg^2) + Bvec <- as.vector(solve(t(M)%*%M)%*%t(M)%*%Zreg) # Closest to regression biplot + + const1 <- sum(y^2) + const2 <- sum(X^2)/(n*p) + TotalNumberOfLossFunctionCalls <- 0 + + optimtouse <- function(Bvec){ + timetemp <- proc.time()[3] + LOSS <- 1.0 + LOSS1 <- 1.0 + Ind <- rep(1,n) + pred <- rep(0,nmu) + deltmp <- 0 + tau <- tau + #.5 # the choice of tau seems to affect perfomance quite substantially. + # tau is used to specify the points on the inital simplex. + Ay <- rep(0,(u+v)*p+1) + TEMPVK <- rep(0,(u+v)*p) + iter1 <- 0 + iter <- 0 + ERRO <- 0 + + # Prepare for Fortran subroutine + storage.mode(X) <- "double" + storage.mode(Ind) <- "integer" + storage.mode(mu) <- "double" + storage.mode(pred) <- "double" + storage.mode(y) <- "double" + storage.mode(M) <- "double" + storage.mode(Bvec) <- "double" + storage.mode(Ay) <- "double" + storage.mode(TEMPVK) <- "double" + + returned_data <- .Fortran("l", + LOSS = as.double(0), + X = as.double(X), + N = as.integer(n), + P = as.integer(p), + NMU = as.integer(nmu), + IND = as.integer(rep(0, n)), + MU = as.double(mu), + PRED = as.double(rep(0, n)), + LAMBDA = as.double(lambda), + Y = as.double(y), + CONST1 = as.double(const1), + CONST2 = as.double(const2), + U = as.integer(u), + V = as.integer(v), + M = as.double(M), + BVEC = as.double(Bvec), + TAU = as.double(tau), + AY = as.double(rep(0, (u+v)*p+1)), + TEMPVK = as.double(rep(0, (u+v)*p)), + ITER = as.integer(0), + FTOL = as.double(ftol), + LOSS1 = as.double(0), + ITER1 = as.integer(0), + ERRO = as.integer(0), + CONST3 = as.double(tiny), + ITMAX = as.integer(itmax) + ) + + aa <- list( + BestValue = returned_data$LOSS, + BestSolution = returned_data$BVEC, + ConvergenceCode = returned_data$ERRO, + iter1 = returned_data$ITER1, + iter = returned_data$ITER, + TimeTaken = proc.time()[3] - timetemp + ) + + aa + } + EuclidDist2 <- function (X, Y){ + n <- nrow(X) + m <- nrow(Y) + bx <- rowSums(X^2) + by <- rowSums(Y^2) + outer(bx, by, FUN = "+") - 2 * X %*% t(Y) + } + + ### Variable initialisation + outBestValues <- rep(NA,gamma+1) + outBestSolutions <- matrix(nrow=2*(u+v),ncol=gamma+1) + outTimeTaken <- rep(NA,gamma+1) # Is made one element longer at each iteration. + BestSolutionsFrequency <- rep(NA,gamma+1) + BestSolutionsIndices <- rep(NA,gamma+1) # Is made one element longer at each iteration. + SquaredDistancesBetweenBestSolutions <- matrix(nrow=gamma+1,ncol=gamma+1) + + ### Initial coefficients closest to regression biplot + temp <- optimtouse(Bvec) + outBestValues[1] <- temp$BestValue + outBestSolutions[,1] <- temp$BestSolution + outTimeTaken[1] <- temp$TimeTaken + BestSolutionsFrequency[1] <- 1 + BestSolutionsIndices[1] <- 1 + DistinctSolutions <- 1 + PreviousBestSolution <- NA + nSameSolutionConsecutively <- 0 + BigSigmaActivations <- NULL + + test.iter <- temp$iter + test.iter1 <- temp$iter1 + + ### Last best coefficients perturbed + for (gammacounter in 2:(gamma+1)){ + if (nSameSolutionConsecutively>=bigsigmaactivate){ + temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=bigsigma)) + BigSigmaActivations <- c(BigSigmaActivations,gammacounter) + } + else temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=smallsigma)) + outTimeTaken[gammacounter] <- temp$TimeTaken + tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution,nrow=1),t(outBestSolutions[,1:DistinctSolutions])) + if (any(tempSquaredDistances 103) { +# my.sample <- sample(1:n, size=103, replace=F) +# X <- X[my.sample,] +# Y <- Y[my.sample,] +# n <- nrow(X) +# } + +# tau <- spline.control$tau +# nmu <- spline.control$nmu +# u <- spline.control$u +# v <- spline.control$v +# lambda <- spline.control$lambda +# smallsigma <- spline.control$smallsigma +# bigsigma <- spline.control$bigsigma +# gamma <- spline.control$gamma +# bigsigmaactivate <- spline.control$bigsigmaactivate +# eps <- spline.control$eps +# tiny <- spline.control$tiny +# itmax <- spline.control$itmax +# ftol <- spline.control$ftol + +# cat("Calculating spline axis for variable", j) + +# if(dmeth==1) stop("dmeth should be equal to zero or integer greater than 1 \n") + +# Ytilde <- scale(scale(Y, center=FALSE, scale=1/sd), +# center=-1*means, scale=FALSE) + +# ytilde <- Ytilde[,j] +# mutilde <- seq(from=min(ytilde), to=max(ytilde), length.out=nmu) +# y <- Y[,j] +# rangey <- max(y) - min(y) +# mu <- seq(from=min(y)-.3*rangey, to=max(y)+.3*rangey, length.out=nmu) +# markers <- (pretty(ytilde) - means[j]) / sd[j] +# mu <- sort(c(mu, markers)) +# mu <- unique(mu) +# nmu <- length(mu) + +# if (v > 0) { +# knots <- seq.int(from=0, to=1, length.out=v+2)[-c(1, v+2)] +# knots <- stats::quantile(y, knots) +# M <- splines::bs(mu, knots=knots, degree=u, intercept=FALSE) +# } else { +# M <- splines::bs(mu, df=u+v, degree=u, intercept=FALSE) +# } + +# M <- scale(M, scale=FALSE, center=M[which.min(abs(mu)),]) +# Breg <- t(solve(t(X)%*%X)%*%t(X)%*%y) +# Zreg <- mu%*%Breg / sum(Breg^2) +# Bvec <- as.vector(solve(t(M)%*%M)%*%t(M)%*%Zreg) + +# const1 <- sum(y^2) +# const2 <- sum(X^2) / (n*p) + +# # ======================================== +# #The Fortran call has been replaced by the following Rcpp function +# #========================================= +# optimtouse <- function(Bvec) { +# timetemp <- proc.time()[3] + +# # Use Rcpp optimize_spline function +# returned_data <- optimize_spline( +# BVEC = Bvec, +# X = X, +# Y = y, +# M = M, +# MU = mu, +# LAMBDA = lambda, +# CONST1 = const1, +# CONST2 = const2, +# U = u, +# V = v, +# TAU = tau, +# FTOL = ftol, +# TINY = tiny, +# ITMAX = itmax +# ) + +# if(returned_data$ERRO > 0) { +# cat(" Warning: Error code =", returned_data$ERRO, "\n") +# } + +# aa <- list( +# BestValue = returned_data$LOSS, +# BestSolution = returned_data$BVEC, +# ConvergenceCode = returned_data$ERRO, +# iter1 = returned_data$ITER1, +# iter = returned_data$ITER, +# TimeTaken = proc.time()[3] - timetemp +# ) + +# aa +# } + +# EuclidDist2 <- function(X, Y) { +# n <- nrow(X) +# m <- nrow(Y) +# bx <- rowSums(X^2) +# by <- rowSums(Y^2) +# outer(bx, by, FUN = "+") - 2 * X %*% t(Y) +# } + +# # Variable initialization +# outBestValues <- rep(NA, gamma+1) +# outBestSolutions <- matrix(nrow=2*(u+v), ncol=gamma+1) +# outTimeTaken <- rep(NA, gamma+1) +# BestSolutionsFrequency <- rep(NA, gamma+1) +# BestSolutionsIndices <- rep(NA, gamma+1) + +# # Initial coefficients +# temp <- optimtouse(Bvec) +# outBestValues[1] <- temp$BestValue +# outBestSolutions[,1] <- temp$BestSolution +# outTimeTaken[1] <- temp$TimeTaken +# BestSolutionsFrequency[1] <- 1 +# BestSolutionsIndices[1] <- 1 +# DistinctSolutions <- 1 +# PreviousBestSolution <- NA +# nSameSolutionConsecutively <- 0 + +# # Multi-start optimization +# for (gammacounter in 2:(gamma+1)) { +# if (nSameSolutionConsecutively >= bigsigmaactivate) { +# temp <- optimtouse(outBestSolutions[,which.min(outBestValues)] + +# stats::rnorm((u+v)*2, mean=0, sd=bigsigma)) +# } else { +# temp <- optimtouse(outBestSolutions[,which.min(outBestValues)] + +# stats::rnorm((u+v)*2, mean=0, sd=smallsigma)) +# } + +# outTimeTaken[gammacounter] <- temp$TimeTaken +# tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution, nrow=1), +# t(outBestSolutions[,1:DistinctSolutions])) + +# if (any(tempSquaredDistances < eps)) { +# BestSolutionsIndices[gammacounter] <- tempAA <- which.min(tempSquaredDistances) +# BestSolutionsFrequency[tempAA] <- BestSolutionsFrequency[tempAA] + 1 +# if (!is.na(PreviousBestSolution) && tempAA == PreviousBestSolution) { +# nSameSolutionConsecutively <- nSameSolutionConsecutively + 1 +# } else { +# PreviousBestSolution <- tempAA +# nSameSolutionConsecutively <- 0 +# } +# } else { +# DistinctSolutions <- DistinctSolutions + 1 +# outBestValues[DistinctSolutions] <- temp$BestValue +# outBestSolutions[,DistinctSolutions] <- temp$BestSolution +# BestSolutionsFrequency[DistinctSolutions] <- 1 +# BestSolutionsIndices[gammacounter] <- DistinctSolutions +# nSameSolutionConsecutively <- 0 +# } +# } + +# axis.points <- cbind(M %*% matrix(outBestSolutions[,which.min(outBestValues)], ncol=2), +# mu, 0) + +# for (i in 1:nrow(axis.points)) { +# if (any(zapsmall(axis.points[i,3] - markers) == 0)) { +# axis.points[i, 4] <- 1 +# } +# } + +# axis.points[,3] <- axis.points[,3] * sd[j] + means[j] + +# # Return with attributes +# attr(axis.points, "loss") <- min(outBestValues, na.rm=TRUE) +# attr(axis.points, "bvec") <- outBestSolutions[,which.min(outBestValues)] +# attr(axis.points, "total_time") <- sum(outTimeTaken, na.rm=TRUE) +# attr(axis.points, "distinct_solutions") <- DistinctSolutions + +# axis.points +# } + + + +biplot.spline.axis <- function(j, X, Y, means, sd, + n.int, spline.control, dmeth=0, ... ){ + n <- nrow(X) + p <- ncol(X) + if (n > 103){ + my.sample <- sample (1:n, size=103, replace=F) + X <- X[my.sample,] + Y <- Y[my.sample,] + n <- nrow(X) + } + + tau <- spline.control$tau + nmu <- spline.control$nmu + u <- spline.control$u + v <- spline.control$v + lambda <- spline.control$lambda + smallsigma <- spline.control$smallsigma + bigsigma <- spline.control$bigsigma + gamma <- spline.control$gamma + bigsigmaactivate <- spline.control$bigsigmaactivate + eps <- spline.control$eps + tiny <- spline.control$tiny + itmax <- spline.control$itmax + ftol <- spline.control$ftol + + cat ("Calculating spline axis for variable", j, "\n") + if(dmeth==1) stop("dmeth should be equal to zero or integer greater than 1 \n") + Ytilde <- scale(scale(Y, center=FALSE, scale=1/sd), center=-1*means, scale=FALSE) + + ytilde <- Ytilde[,j] + mutilde <- seq(from=min(ytilde),to=max(ytilde),length.out=nmu) + y <- Y[,j] + rangey <- max(y)-min(y) + mu <- seq(from=min(y)-.3*rangey,to=max(y)+.3*rangey,length.out=nmu) + markers <- (pretty(ytilde)-means[j])/sd[j] + mu <- sort(c(mu,markers)) + mu <- unique(mu) + nmu <- length(mu) + + if (v>0){ + knots <- seq.int(from=0,to=1,length.out=v+2)[-c(1,v+2)] + knots <- stats::quantile(y,knots) + M <- splines::bs(mu,knots=knots,degree=u,intercept=FALSE) + } else M <- splines::bs(mu,df=u+v,degree=u,intercept=FALSE) + M <- scale(M,scale=FALSE,center=M[which.min(abs(mu)),]) # To ensure that the spline passes through the origin at the calibration which represents the mean of the variable + Breg <- t(solve(t(X)%*%X)%*%t(X)%*%y) + Zreg <- mu%*%Breg/sum(Breg^2) + Bvec <- as.vector(solve(t(M)%*%M)%*%t(M)%*%Zreg) # Closest to regression biplot + + const1 <- sum(y^2) + const2 <- sum(X^2)/(n*p) + TotalNumberOfLossFunctionCalls <- 0 + optimtouse <- function(Bvec) { + timetemp <- proc.time()[3] + + # Use Rcpp optimize_spline function + returned_data <- optimize_spline( + BVEC = Bvec, + X = X, + Y = y, + M = M, + MU = mu, + LAMBDA = lambda, + CONST1 = const1, + CONST2 = const2, + U = u, + V = v, + TAU = tau, + FTOL = ftol, + TINY = tiny, + ITMAX = itmax + ) + + if(returned_data$ERRO > 0) { + cat(" Warning: Error code =", returned_data$ERRO, "\n") + } + + aa <- list( + BestValue = returned_data$LOSS, + BestSolution = returned_data$BVEC, + ConvergenceCode = returned_data$ERRO, + iter1 = returned_data$ITER1, + iter = returned_data$ITER, + TimeTaken = proc.time()[3] - timetemp + ) + + aa + } + + EuclidDist2 <- function (X, Y) { + n <- nrow(X) + m <- nrow(Y) + bx <- rowSums(X^2) + by <- rowSums(Y^2) + outer(bx, by, FUN = "+") - 2 * X %*% t(Y) + } + + ### Variable initialisation + outBestValues <- rep(NA,gamma+1) + outBestSolutions <- matrix(nrow=2*(u+v),ncol=gamma+1) + outTimeTaken <- rep(NA,gamma+1) # Is made one element longer at each iteration. + BestSolutionsFrequency <- rep(NA,gamma+1) + BestSolutionsIndices <- rep(NA,gamma+1) # Is made one element longer at each iteration. + SquaredDistancesBetweenBestSolutions <- matrix(nrow=gamma+1,ncol=gamma+1) + + ### Initial coefficients closest to regression biplot + temp <- optimtouse(Bvec) + outBestValues[1] <- temp$BestValue + outBestSolutions[,1] <- temp$BestSolution + outTimeTaken[1] <- temp$TimeTaken + BestSolutionsFrequency[1] <- 1 + BestSolutionsIndices[1] <- 1 + DistinctSolutions <- 1 + PreviousBestSolution <- NA + nSameSolutionConsecutively <- 0 + BigSigmaActivations <- NULL + + test.iter <- temp$iter + test.iter1 <- temp$iter1 + + ### Last best coefficients perturbed + for (gammacounter in 2:(gamma+1)){ + if (nSameSolutionConsecutively>=bigsigmaactivate){ + temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=bigsigma)) + BigSigmaActivations <- c(BigSigmaActivations,gammacounter) + } + else temp <- optimtouse(outBestSolutions[,which.min(outBestValues)]+stats::rnorm((u+v)*2,mean=0,sd=smallsigma)) + outTimeTaken[gammacounter] <- temp$TimeTaken + tempSquaredDistances <- EuclidDist2(matrix(temp$BestSolution,nrow=1),t(outBestSolutions[,1:DistinctSolutions])) + if (any(tempSquaredDistances