diff --git a/NAMESPACE b/NAMESPACE index 3b4e71c..a3ee9b9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(ChenFang2019BetaRankTest) export(FGXFactorsTest) export(FRP) export(GKRFactorScreening) +export(GiglioXiu2021RiskPremia) export(HACcovariance) export(HJMisspecificationDistance) export(IterativeKleibergenPaap2006BetaRankTest) diff --git a/R/RcppExports.R b/R/RcppExports.R index 3c07eeb..9b8adbb 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -13,6 +13,10 @@ FRPCpp <- function(returns, factors, misspecification_robust = TRUE, include_sta .Call(`_intrinsicFRP_FRPCpp`, returns, factors, misspecification_robust, include_standard_errors, hac_prewhite, target_level_gkr2014_screening) } +GiglioXiu2021RiskPremiaCpp <- function(returns, factors, which_n_pca = 0L) { + .Call(`_intrinsicFRP_GiglioXiu2021RiskPremiaCpp`, returns, factors, which_n_pca) +} + GKRFactorScreeningCpp <- function(returns, factors, target_level = 0.05, hac_prewhite = FALSE) { .Call(`_intrinsicFRP_GKRFactorScreeningCpp`, returns, factors, target_level, hac_prewhite) } diff --git a/R/frp.R b/R/frp.R index 1e133ae..a87e9fd 100644 --- a/R/frp.R +++ b/R/frp.R @@ -109,3 +109,65 @@ FRP = function( )) } + +#' @title Compute Factor Risk Premia using Giglio and Xiu (2021) Method +#' +#' @description Computes the factor risk premia using the 3-step procedure of +#' Giglio and Xiu (2021) . The procedure consists in +#' 1) extracting p PCA from returns, where p is a consistent estimator of the +#' number of strong latent factors, 2) compute a cross-sectional regression of +#' average returns on the estimated betas of the p latent factors, and 3) compute +#' a time-series regression of observed factors on the p latent factors to obtain +#' their mimicking portfolio risk premia. +#' +#' @param returns A `n_observations x n_returns` matrix of asset returns. +#' @param factors A `n_observations x n_factors` matrix of risk factors. +#' @param which_n_pca Method to determine the number of PCA components: +#' `0` for Giglio Xiu (2021) , +#' `-1` for Ahn Horenstein (2013) , +#' or any positive integer for a specific number of PCs. +#' @param check_arguments A boolean: `TRUE` for internal check of all function +#' arguments; `FALSE` otherwise. Default is `TRUE`. +#' +#' @return A list containing: +#' \describe{ +#' \item{risk_premia}{A matrix of factor risk premia.} +#' \item{n_pca}{The number of principal components used.} +#' } +#' @examples +#' \dontrun{ +#' returns <- matrix(rnorm(200), nrow=20, ncol=10) +#' factors <- matrix(rnorm(60), nrow=20, ncol=3) +#' which_n_pca <- 0 +#' result <- GiglioXiu2021RiskPremia(returns, factors, which_n_pca) +#' print(result) +#' } +#' @export +GiglioXiu2021RiskPremia = function( + returns, + factors, + which_n_pca = 0, + check_arguments = TRUE +) { + + # check function arguments + if (check_arguments) { + + stopifnot("`returns` must contain numeric values" = is.numeric(returns)) + stopifnot("`factors` must contain numeric values" = is.numeric(factors)) + stopifnot("`returns` and `factors` must have the same number of rows" = nrow(returns) == nrow(factors)) + stopifnot("`returns` must not contain missing values (NA/NaN)" = !anyNA(returns)) + stopifnot("`factors` must not contain missing values (NA/NaN)" = !anyNA(factors)) + + stopifnot("`which_n_pca` must be numeric" = is.numeric(which_n_pca)) + + } + + # Ensure the function is defined in the C++ file and compiled + return(.Call(`_intrinsicFRP_GiglioXiu2021RiskPremiaCpp`, + returns, + factors, + which_n_pca + )) + +} diff --git a/man/GiglioXiu2021RiskPremia.Rd b/man/GiglioXiu2021RiskPremia.Rd new file mode 100644 index 0000000..b3aa160 --- /dev/null +++ b/man/GiglioXiu2021RiskPremia.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/frp.R +\name{GiglioXiu2021RiskPremia} +\alias{GiglioXiu2021RiskPremia} +\title{Compute Factor Risk Premia using Giglio and Xiu (2021) Method} +\usage{ +GiglioXiu2021RiskPremia( + returns, + factors, + which_n_pca = 0, + check_arguments = TRUE +) +} +\arguments{ +\item{returns}{A \verb{n_observations x n_returns} matrix of asset returns.} + +\item{factors}{A \verb{n_observations x n_factors} matrix of risk factors.} + +\item{which_n_pca}{Method to determine the number of PCA components: +\code{0} for Giglio Xiu (2021) \url{doi:10.1086/714090}, +\code{-1} for Ahn Horenstein (2013) \url{doi:10.3982/ECTA8968}, +or any positive integer for a specific number of PCs.} + +\item{check_arguments}{A boolean: \code{TRUE} for internal check of all function +arguments; \code{FALSE} otherwise. Default is \code{TRUE}.} +} +\value{ +A list containing: +\describe{ +\item{risk_premia}{A matrix of factor risk premia.} +\item{n_pca}{The number of principal components used.} +} +} +\description{ +Computes the factor risk premia using the 3-step procedure of +Giglio and Xiu (2021) \url{doi:10.1086/714090}. The procedure consists in +\enumerate{ +\item extracting p PCA from returns, where p is a consistent estimator of the +number of strong latent factors, 2) compute a cross-sectional regression of +average returns on the estimated betas of the p latent factors, and 3) compute +a time-series regression of observed factors on the p latent factors to obtain +their mimicking portfolio risk premia. +} +} +\examples{ +\dontrun{ +returns <- matrix(rnorm(200), nrow=20, ncol=10) +factors <- matrix(rnorm(60), nrow=20, ncol=3) +which_n_pca <- 0 +result <- GiglioXiu2021RiskPremia(returns, factors, which_n_pca) +print(result) +} +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index d281397..576f388 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -54,6 +54,19 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// GiglioXiu2021RiskPremiaCpp +Rcpp::List GiglioXiu2021RiskPremiaCpp(const arma::mat& returns, const arma::mat& factors, const int which_n_pca); +RcppExport SEXP _intrinsicFRP_GiglioXiu2021RiskPremiaCpp(SEXP returnsSEXP, SEXP factorsSEXP, SEXP which_n_pcaSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type returns(returnsSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type factors(factorsSEXP); + Rcpp::traits::input_parameter< const int >::type which_n_pca(which_n_pcaSEXP); + rcpp_result_gen = Rcpp::wrap(GiglioXiu2021RiskPremiaCpp(returns, factors, which_n_pca)); + return rcpp_result_gen; +END_RCPP +} // GKRFactorScreeningCpp Rcpp::List GKRFactorScreeningCpp(const arma::mat& returns, const arma::mat& factors, const double target_level, const bool hac_prewhite); RcppExport SEXP _intrinsicFRP_GKRFactorScreeningCpp(SEXP returnsSEXP, SEXP factorsSEXP, SEXP target_levelSEXP, SEXP hac_prewhiteSEXP) { @@ -194,6 +207,7 @@ static const R_CallMethodDef CallEntries[] = { {"_intrinsicFRP_FGXThreePassCovarianceCpp", (DL_FUNC) &_intrinsicFRP_FGXThreePassCovarianceCpp, 4}, {"_intrinsicFRP_FGXThreePassCovarianceNoControlsCpp", (DL_FUNC) &_intrinsicFRP_FGXThreePassCovarianceNoControlsCpp, 3}, {"_intrinsicFRP_FRPCpp", (DL_FUNC) &_intrinsicFRP_FRPCpp, 6}, + {"_intrinsicFRP_GiglioXiu2021RiskPremiaCpp", (DL_FUNC) &_intrinsicFRP_GiglioXiu2021RiskPremiaCpp, 3}, {"_intrinsicFRP_GKRFactorScreeningCpp", (DL_FUNC) &_intrinsicFRP_GKRFactorScreeningCpp, 4}, {"_intrinsicFRP_HACCovarianceMatrixCpp", (DL_FUNC) &_intrinsicFRP_HACCovarianceMatrixCpp, 2}, {"_intrinsicFRP_HACVarianceCpp", (DL_FUNC) &_intrinsicFRP_HACVarianceCpp, 2}, diff --git a/src/frp.cpp b/src/frp.cpp index 7da1521..1b7d533 100644 --- a/src/frp.cpp +++ b/src/frp.cpp @@ -4,6 +4,7 @@ #include "hac_covariance.h" #include "gkr_factor_screening.h" #include "utils.h" +#include "n_pca.h" ///////////////// ///// FRPCpp //// @@ -289,3 +290,78 @@ arma::vec StandardErrorsKRSFRPCpp( std::sqrt(static_cast(returns.n_rows)); } + +////////////////////////////////////// +///// GiglioXiu2021RiskPremiaCpp //// + +Rcpp::List GiglioXiu2021RiskPremiaCpp( + const arma::mat& returns, + const arma::mat& factors, + const int which_n_pca +) { + + // parameters + const unsigned int n_assets = returns.n_cols; + const unsigned int n_observations = returns.n_rows; + + // center returns + const arma::rowvec mean_returns = arma::mean(returns, 0); + const arma::mat centered_returns = returns.each_row() - mean_returns; + const arma::mat centered_factors = factors.each_row() - arma::mean(factors, 0); + + // returns decomposition + arma::vec e_vals; + arma::mat U; + arma::mat V; + arma::svd(U, e_vals, V, centered_returns.t() / (n_assets * n_observations)); + e_vals = arma::square(e_vals); + + + // compute the number of PCA + unsigned int n_pca; + + if (which_n_pca == 0) { + + // use the method in Giglio Xiu 2021 + unsigned int n_max_pca = std::floor(2 * n_assets / 3); + n_pca = NPCA_GiglioXiu2021Cpp(e_vals, n_assets, n_observations, n_max_pca); + + } else if (which_n_pca < 0) { + + // use the method in Ahn Horenstein 2013 + unsigned int n_max_pca = std::floor(2 * n_assets / 3); + n_pca = NPCA_AhnHorenstein2013Cpp(e_vals, n_max_pca)["er"]; + + } else { + + // set the number of PCA to the supplied value + n_pca = which_n_pca; + + } + + // latent factors and corresponding beta + const arma::mat pca_factors = std::sqrt(n_observations) * V.head_cols(n_pca); + const arma::mat beta_hat = centered_returns.t() * pca_factors / n_observations; + + // cross-sectional regression of average returns on estimated betas + const arma::vec gamma = SolveSympd( + beta_hat.t() * beta_hat, + beta_hat.t() * mean_returns.t() + ); + + // time-series regression of factors on latent factors + const arma::mat eta = SolveSympd( + pca_factors.t() * pca_factors, + pca_factors.t() * centered_factors + ).t(); + + // factor risk premia + const arma::vec risk_premia = eta * gamma; + + // output list containing factor risk premia and the number of pca used + return Rcpp::List::create( + Rcpp::Named("risk_premia") = risk_premia, + Rcpp::Named("n_pca") = n_pca + ); + +} diff --git a/src/frp.h b/src/frp.h index f906f61..cda1f90 100644 --- a/src/frp.h +++ b/src/frp.h @@ -133,4 +133,12 @@ arma::vec StandardErrorsKRSFRPCpp( const bool hac_prewhite = false ); +// Compute the factor risk premia estimates of Giglio Xiu 2021. +// [[Rcpp::export]] +Rcpp::List GiglioXiu2021RiskPremiaCpp( + const arma::mat& returns, + const arma::mat& factors, + const int which_n_pca = 0 +); + #endif diff --git a/src/n_pca.cpp b/src/n_pca.cpp new file mode 100644 index 0000000..94cd18b --- /dev/null +++ b/src/n_pca.cpp @@ -0,0 +1,78 @@ +// Author: Alberto Quaini + +#include "utils.h" + +//////////////////////////////////////////// +/////// NPCA_GiglioXiu2021Cpp ////////////// + +unsigned int NPCA_GiglioXiu2021Cpp( + const arma::vec& e_vals, + const unsigned int n_assets, + const unsigned int n_observations, + unsigned int p_max +) { + + // minimum between n_assets and n_observations + const unsigned int min_NT = std::min(n_assets, n_observations); + + // adjust p_max if it is too large + if (p_max > min_NT) { + p_max = min_NT - 1; + } + + // compute the penalty term + const double scaling = 0.5 * arma::median(e_vals); + const double penalty = scaling * (std::log(n_assets) + std::log(n_observations)) * + (std::pow(n_assets, -0.5) + std::pow(n_observations, -0.5)); + + // compute the criterion for each index + arma::vec criterion(p_max); + for (unsigned int i = 0; i < p_max; ++i) { + criterion(i) = e_vals(i) + (i + 1) * penalty; + } + + // return the index that minimizes the criterion + return criterion.index_min(); + +} + +//////////////////////////////////////////////// +/////// NPCA_AhnHorenstein2013Cpp ////////////// + +Rcpp::List NPCA_AhnHorenstein2013Cpp( + const arma::vec& evals, + unsigned int n_max = 0 +) { + + // set the number of eigenvalues + const unsigned int n_evals = evals.n_elem; + + // if n_max <= zero or >= n_evals, set it to n_evals - 1 + if (n_max <= 0 || n_max >= n_evals) { + n_max = n_evals - 1; + } + + const arma::uvec top = arma::regspace(0, n_max - 1); + const arma::uvec bottom = arma::regspace(1, n_max); + + // calculate the eigenvalue ratios and the index where the ratio is maximized + const arma::vec evals_ratio = evals(top) / evals(bottom); + const unsigned int er = evals_ratio.index_max() + 1; // +1 to match R's 1-based index + + // calculate the sum of the eigenvalues after each index + arma::vec evals_revcumsum = arma::reverse(arma::cumsum(arma::reverse(evals))); + evals_revcumsum.shed_row(n_evals - 1); // remove the last element + evals_revcumsum.insert_rows(n_evals - 1, arma::zeros(1)); // add zero at the end + + // divide the eigenvalues by the sum + const arma::vec evals_tr = evals / evals_revcumsum; + const arma::vec growth_ratio = arma::log( + 1 + evals_tr(top)) / arma::log(1 + evals_tr(bottom) + ); + const unsigned int gr = growth_ratio.index_max() + 1; // +1 to match R's 1-based index + + return Rcpp::List::create( + Rcpp::Named("er") = er, + Rcpp::Named("gr") = gr + ); +} diff --git a/src/n_pca.h b/src/n_pca.h new file mode 100644 index 0000000..c67bbdb --- /dev/null +++ b/src/n_pca.h @@ -0,0 +1,23 @@ +// Author: Alberto Quaini + +#ifndef N_PCA_H +#define N_PCA_H + +#include + +// Function for internal use. +// Implement the procedure to determine the number of PCA +// that summarize the risk in returns of Giglio Xiu (2021). +unsigned int NPCA_GiglioXiu2021Cpp( + const arma::vec& e_vals, + const unsigned int n_assets, + const unsigned int n_observations, + unsigned int p_max +); + +Rcpp::List NPCA_AhnHorenstein2013Cpp( + const arma::vec& evals, + unsigned int n_max = 0 +); + +#endif diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index 9a14792..766b901 100644 Binary files a/tests/testthat/Rplots.pdf and b/tests/testthat/Rplots.pdf differ diff --git a/tests/testthat/test-frp.R b/tests/testthat/test-frp.R index 799268e..0d03105 100644 --- a/tests/testthat/test-frp.R +++ b/tests/testthat/test-frp.R @@ -1,4 +1,4 @@ -test_that("Test FRP", { +test_that("Test FRP, GiglioXiu2021RiskPremia", { factors = factors[,-1] returns = returns[,-1] @@ -83,4 +83,88 @@ test_that("Test FRP", { expect_equal(frp$risk_premia, unname(risk_premia)) expect_equal(krs_frp$risk_premia, unname(krs_risk_premia)) + ##################################################### + # Test GiglioXiu2021RiskPremia + returns = matrix(rnorm(200), nrow=20, ncol=10) + factors = matrix(rnorm(60), nrow=20, ncol=3) + n_returns = ncol(returns) + n_factors = ncol(factors) + n_obs = nrow(returns) + + # Test basic functionality with default which_n_pca (0) + result = GiglioXiu2021RiskPremia(returns, factors, which_n_pca = 0) + expect_no_error(result) + expect_true(is.list(result)) + expect_true("risk_premia" %in% names(result)) + expect_true("n_pca" %in% names(result)) + expect_true(is.matrix(result$risk_premia)) + expect_true(is.numeric(result$n_pca)) + + # Test functionality with which_n_pca set to -1 (Ahn Horenstein 2013 method) + result = GiglioXiu2021RiskPremia(returns, factors, which_n_pca = -1) + expect_no_error(result) + expect_true(is.list(result)) + expect_true("risk_premia" %in% names(result)) + expect_true("n_pca" %in% names(result)) + expect_true(is.matrix(result$risk_premia)) + expect_true(is.numeric(result$n_pca)) + + # Test functionality with which_n_pca set to a positive integer (e.g., 2) + result = GiglioXiu2021RiskPremia(returns, factors, which_n_pca = 2) + expect_no_error(result) + expect_true(is.list(result)) + expect_true("risk_premia" %in% names(result)) + expect_true("n_pca" %in% names(result)) + expect_true(is.matrix(result$risk_premia)) + expect_true(is.numeric(result$n_pca)) + expect_equal(result$n_pca, 2) + + # Test error handling for incorrect dimensions (transposed matrices) + expect_error(GiglioXiu2021RiskPremia(t(returns), factors, which_n_pca = 0)) + expect_error(GiglioXiu2021RiskPremia(returns, t(factors), which_n_pca = 0)) + expect_error(GiglioXiu2021RiskPremia(t(returns), t(factors), which_n_pca = 0)) + + # Testing errors for wrong input types + expect_error(GiglioXiu2021RiskPremia(c(), factors, which_n_pca = 0)) + expect_error(GiglioXiu2021RiskPremia(returns, c(), which_n_pca = 0)) + expect_error(GiglioXiu2021RiskPremia(returns, factors, which_n_pca = "c")) + + # Testing missing values + returns_with_na <- returns + returns_with_na[1, 1] <- NA + expect_error(GiglioXiu2021RiskPremia(returns_with_na, factors, which_n_pca = 0)) + + factors_with_na <- factors + factors_with_na[1, 1] <- NA + expect_error(GiglioXiu2021RiskPremia(returns, factors_with_na, which_n_pca = 0)) + + + # Test if the function correctly throws an error when 'returns' has fewer rows than 'factors' + expect_error(GiglioXiu2021RiskPremia(returns[1:(nrow(returns)-5),], factors, which_n_pca = 0)) + + # Test if the function correctly throws an error when 'factors' has fewer rows than 'returns' + expect_error(GiglioXiu2021RiskPremia(returns, factors[1:(nrow(factors)-5),], which_n_pca = 0)) + + # Test if the function can handle a small number of observations correctly + small_returns = matrix(rnorm(60), nrow=5, ncol=12) + small_factors = matrix(rnorm(15), nrow=5, ncol=3) + result = GiglioXiu2021RiskPremia(small_returns, small_factors, which_n_pca = 0) + expect_no_error(result) + expect_true(is.list(result)) + expect_true("risk_premia" %in% names(result)) + expect_true("n_pca" %in% names(result)) + expect_true(is.matrix(result$risk_premia)) + expect_true(is.numeric(result$n_pca)) + + # Test if the function can handle a large number of observations correctly + large_returns = matrix(rnorm(20000), nrow=2000, ncol=10) + large_factors = matrix(rnorm(6000), nrow=2000, ncol=3) + result = GiglioXiu2021RiskPremia(large_returns, large_factors, which_n_pca = 0) + expect_no_error(result) + expect_true(is.list(result)) + expect_true("risk_premia" %in% names(result)) + expect_true("n_pca" %in% names(result)) + expect_true(is.matrix(result$risk_premia)) + expect_true(is.numeric(result$n_pca)) + })