The analysis of high-dimensional data sets is often forced to rely upon well-chosen summary statistics. A systematic approach to choosing such statistics, which is based upon a sound theoretical framework, is currently lacking. In this paper we develop a sequential scheme for scoring statistics according to whether their inclusion in the analysis will substantially improve the quality of inference. Our method can be applied to high-dimensional data sets for which exact likelihood equations are not possible. We illustrate the potential of our approach with a series of examples drawn from genetics. In summary, in a context in which well-chosen summary statistics are of high importance, we attempt to put the `well' into `chosen.'
%0 Journal Article
%1 joyce2008approximately
%A Joyce, Paul
%A Marjoram, Paul
%D 2008
%J Statistical Applications in Genetics and Molecular Biology
%K ABC methods sufficient_statistics
%N 1
%R doi:10.2202/1544-6115.1389
%T Approximately Sufficient Statistics and Bayesian Computation
%U https://doi.org/10.2202/1544-6115.1389
%V 7
%X The analysis of high-dimensional data sets is often forced to rely upon well-chosen summary statistics. A systematic approach to choosing such statistics, which is based upon a sound theoretical framework, is currently lacking. In this paper we develop a sequential scheme for scoring statistics according to whether their inclusion in the analysis will substantially improve the quality of inference. Our method can be applied to high-dimensional data sets for which exact likelihood equations are not possible. We illustrate the potential of our approach with a series of examples drawn from genetics. In summary, in a context in which well-chosen summary statistics are of high importance, we attempt to put the `well' into `chosen.'
@article{joyce2008approximately,
abstract = {The analysis of high-dimensional data sets is often forced to rely upon well-chosen summary statistics. A systematic approach to choosing such statistics, which is based upon a sound theoretical framework, is currently lacking. In this paper we develop a sequential scheme for scoring statistics according to whether their inclusion in the analysis will substantially improve the quality of inference. Our method can be applied to high-dimensional data sets for which exact likelihood equations are not possible. We illustrate the potential of our approach with a series of examples drawn from genetics. In summary, in a context in which well-chosen summary statistics are of high importance, we attempt to put the `well' into `chosen.'},
added-at = {2022-02-26T20:08:58.000+0100},
author = {Joyce, Paul and Marjoram, Paul},
biburl = {https://www.bibsonomy.org/bibtex/21f43d614f9fb2ac4e2dac17c25dfe642/peter.ralph},
doi = {doi:10.2202/1544-6115.1389},
interhash = {4910f62306d6017f08a1b105d08f2e52},
intrahash = {1f43d614f9fb2ac4e2dac17c25dfe642},
journal = {Statistical Applications in Genetics and Molecular Biology},
keywords = {ABC methods sufficient_statistics},
number = 1,
timestamp = {2022-02-26T20:08:58.000+0100},
title = {Approximately Sufficient Statistics and {Bayesian} Computation},
url = {https://doi.org/10.2202/1544-6115.1389},
volume = 7,
year = 2008
}