mirror of
https://gitlab.com/c3d/db48x.git
synced 2024-09-29 05:36:58 +02:00
stats: Add linear regression
Add computation of linear regression. Update documentation Related to #569 Signed-off-by: Christophe de Dinechin <christophe@dinechin.org>
This commit is contained in:
parent
e0e2d67e7a
commit
3a5282d9b3
7 changed files with 371 additions and 55 deletions
|
@ -138,19 +138,117 @@ Calculates the sample standard deviation of each of the columns of coordinate va
|
|||
|
||||
The standard deviation is the square root of the `Variance`.
|
||||
|
||||
CMD(StandardDeviation) ALIAS(StandardDeviation, "SDev")
|
||||
CMD(Bins)
|
||||
CMD(PopulationVariance) ALIAS(PopulationVariance, "PVar")
|
||||
CMD(PopulationStandardDeviation) ALIAS(PopulationStandardDeviation, "PSDev")
|
||||
CMD(PopulationCovariance) ALIAS(PopulationCovariance, "PCov")
|
||||
CMD(IndependentColumn) ALIAS(IndependentColumn, "XCol")
|
||||
CMD(DependentColumn) ALIAS(DependentColumn, "YCol")
|
||||
NAMED(DataColumns, "ColΣ")
|
||||
CMD(Intercept)
|
||||
CMD(Slope)
|
||||
CMD(LinearRegression) ALIAS(LinearRegression, "LR")
|
||||
CMD(BestFit)
|
||||
CMD(LinearFit) ALIAS(LinearFit, "LinFit")
|
||||
CMD(ExponentialFit) ALIAS(ExponentialFit, "ExpFit")
|
||||
CMD(PowerFit) ALIAS(PowerFit, "PwrFit")
|
||||
CMD(LogarithmicFit) ALIAS(LogarithmicFit, "LogFit")
|
||||
## LinearRegression (LR)
|
||||
|
||||
Uses the currently selected statistical model to calculate the linear regression
|
||||
coefficients (intercept and slope) for the selected dependent and independent
|
||||
variables in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The columns of independent and dependent data are specified by the first two
|
||||
elements in the reserved variable `ΣParameters`, set by `XCol` and `YCol`, respectively.
|
||||
The default independent and dependent columns are 1 and 2.
|
||||
|
||||
The selected statistical model is the fifth element in `ΣParameters`.
|
||||
LR stores the intercept and slope (untagged) as the third and fourth elements,
|
||||
respectively, in `ΣParameters`.
|
||||
|
||||
The coefficients of the exponential (`ExpFit`), logarithmic (`LogFit`),
|
||||
and power (`PwrFit`) models are calculated using transformations that allow
|
||||
the data to be fitted by standard linear regression.
|
||||
|
||||
The equations for these transformations are:
|
||||
|
||||
* `LinFit`: `y = slope * x + intercept`
|
||||
* `LogFit`: `y = slope * ln(x) + intercept`
|
||||
* `ExpFit`: `y = intercept * exp(slope * x)`
|
||||
* `PwrFit`: `y = intercept * x ^ slope`
|
||||
|
||||
where b is the intercept and m is the slope. The logarithmic model requires
|
||||
positive x-values (XCOL), the exponential model requires positive y-values
|
||||
(YCOL), and the power model requires positive x- and y-values.
|
||||
CMD(LinearRegression) ALIAS(LinearRegression, "LR")
|
||||
|
||||
## Intercept
|
||||
|
||||
Return the intercept value last computed by `LinearRegression`
|
||||
|
||||
This is a DB48X extension, not present on HP calculators
|
||||
|
||||
## Slope
|
||||
|
||||
Return the slope value last computed by `LinearRegression`
|
||||
|
||||
This is a DB48X extension, not present on HP calculators
|
||||
|
||||
## BestFit
|
||||
|
||||
Select the best linear regression mode based on current data, i.e. the
|
||||
regression mode where the correlation value is the highest.
|
||||
|
||||
## LinearFit (LINFIT)
|
||||
|
||||
Select linear fit, i.e. try to model data with a linear equation `y = a*x+b`.
|
||||
|
||||
## ExponentialFit (EXPFIT)
|
||||
|
||||
Select exponential fit, i.e. try to model data with an equation `y = b*exp(a*x)`
|
||||
|
||||
## LogarithmicFit (LOGFIT)
|
||||
|
||||
Select logarithmic fit, i.e. try to model data with an equation `y = a*ln(x)+b`.
|
||||
|
||||
## PowerFit (PWRFIT)
|
||||
|
||||
Select power fit, i.e. try to model data with an equation `y = x^a * b`.
|
||||
|
||||
|
||||
## FrequencyBins (BINS)
|
||||
|
||||
Sorts the elements of the independent column (XCOL) of the current statistics matrix (the reserved variable ΣDAT) into (nbins + 2) bins, where the left edge of bin 1 starts at value xmin and each bin has width xwidth.
|
||||
BINS returns a matrix containing the frequency of occurrences in each bin, and a 2-element array containing the frequency of occurrences falling below or above the defined range of x-values. The array can be stored into the reserved variable ΣDAT and used to plot a bar histogram of the bin data (for example, by executing BARPLOT).
|
||||
|
||||
|
||||
## PopulationVariance (PVAR)
|
||||
|
||||
Calculates the population variance of the coordinate values in each of the m
|
||||
columns in the current statistics matrix (`ΣData`).
|
||||
|
||||
The population variance (equal to the square of the population standard
|
||||
deviation) is returned as a vector of m real numbers, or as a single real number
|
||||
if there is a single column of data.
|
||||
|
||||
## PopulationStandardDeviation (PSDEV)
|
||||
|
||||
Calculates the population standard deviation of each of the m columns of
|
||||
coordinate values in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The command returns a vector of m real numbers, or a single real number if there
|
||||
is a single column of data.
|
||||
|
||||
## PopulationCovariance (PCOV)
|
||||
|
||||
Computes the population covariance of the independent and dependent data columns
|
||||
in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The columns are specified by the first two elements in reserved variable
|
||||
`ΣParameters`, set by `XCol` and `YCol` respectively. If `ΣParameters` does not
|
||||
exist, `PCOV` creates it and sets the elements to their default values, 1 and 2.
|
||||
|
||||
## IndependentColumn (XCOL)
|
||||
|
||||
Set the independent variable column in the reserved variable `ΣParameters`.
|
||||
|
||||
`XCol` ▶ (Update `ΣParameters`)
|
||||
|
||||
## DependentColumn (YCOL)
|
||||
|
||||
Set the dependent variable column in the reserved variable `ΣParameters`.
|
||||
|
||||
`YCol` ▶ (Update `ΣParameters`)
|
||||
|
||||
## DataColumns (COLΣ)
|
||||
|
||||
Set both the independent and dependent data columns in the reserved variable
|
||||
`ΣParameters`.
|
||||
|
||||
`XCol` `YCol` ▶ (Update `ΣParameters`)
|
||||
|
|
130
help/db48x.md
130
help/db48x.md
|
@ -5625,22 +5625,120 @@ Calculates the sample standard deviation of each of the columns of coordinate va
|
|||
|
||||
The standard deviation is the square root of the `Variance`.
|
||||
|
||||
CMD(StandardDeviation) ALIAS(StandardDeviation, "SDev")
|
||||
CMD(Bins)
|
||||
CMD(PopulationVariance) ALIAS(PopulationVariance, "PVar")
|
||||
CMD(PopulationStandardDeviation) ALIAS(PopulationStandardDeviation, "PSDev")
|
||||
CMD(PopulationCovariance) ALIAS(PopulationCovariance, "PCov")
|
||||
CMD(IndependentColumn) ALIAS(IndependentColumn, "XCol")
|
||||
CMD(DependentColumn) ALIAS(DependentColumn, "YCol")
|
||||
NAMED(DataColumns, "ColΣ")
|
||||
CMD(Intercept)
|
||||
CMD(Slope)
|
||||
CMD(LinearRegression) ALIAS(LinearRegression, "LR")
|
||||
CMD(BestFit)
|
||||
CMD(LinearFit) ALIAS(LinearFit, "LinFit")
|
||||
CMD(ExponentialFit) ALIAS(ExponentialFit, "ExpFit")
|
||||
CMD(PowerFit) ALIAS(PowerFit, "PwrFit")
|
||||
CMD(LogarithmicFit) ALIAS(LogarithmicFit, "LogFit")
|
||||
## LinearRegression (LR)
|
||||
|
||||
Uses the currently selected statistical model to calculate the linear regression
|
||||
coefficients (intercept and slope) for the selected dependent and independent
|
||||
variables in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The columns of independent and dependent data are specified by the first two
|
||||
elements in the reserved variable `ΣParameters`, set by `XCol` and `YCol`, respectively.
|
||||
The default independent and dependent columns are 1 and 2.
|
||||
|
||||
The selected statistical model is the fifth element in `ΣParameters`.
|
||||
LR stores the intercept and slope (untagged) as the third and fourth elements,
|
||||
respectively, in `ΣParameters`.
|
||||
|
||||
The coefficients of the exponential (`ExpFit`), logarithmic (`LogFit`),
|
||||
and power (`PwrFit`) models are calculated using transformations that allow
|
||||
the data to be fitted by standard linear regression.
|
||||
|
||||
The equations for these transformations are:
|
||||
|
||||
* `LinFit`: `y = slope * x + intercept`
|
||||
* `LogFit`: `y = slope * ln(x) + intercept`
|
||||
* `ExpFit`: `y = intercept * exp(slope * x)`
|
||||
* `PwrFit`: `y = intercept * x ^ slope`
|
||||
|
||||
where b is the intercept and m is the slope. The logarithmic model requires
|
||||
positive x-values (XCOL), the exponential model requires positive y-values
|
||||
(YCOL), and the power model requires positive x- and y-values.
|
||||
CMD(LinearRegression) ALIAS(LinearRegression, "LR")
|
||||
|
||||
## Intercept
|
||||
|
||||
Return the intercept value last computed by `LinearRegression`
|
||||
|
||||
This is a DB48X extension, not present on HP calculators
|
||||
|
||||
## Slope
|
||||
|
||||
Return the slope value last computed by `LinearRegression`
|
||||
|
||||
This is a DB48X extension, not present on HP calculators
|
||||
|
||||
## BestFit
|
||||
|
||||
Select the best linear regression mode based on current data, i.e. the
|
||||
regression mode where the correlation value is the highest.
|
||||
|
||||
## LinearFit (LINFIT)
|
||||
|
||||
Select linear fit, i.e. try to model data with a linear equation `y = a*x+b`.
|
||||
|
||||
## ExponentialFit (EXPFIT)
|
||||
|
||||
Select exponential fit, i.e. try to model data with an equation `y = b*exp(a*x)`
|
||||
|
||||
## LogarithmicFit (LOGFIT)
|
||||
|
||||
Select logarithmic fit, i.e. try to model data with an equation `y = a*ln(x)+b`.
|
||||
|
||||
## PowerFit (PWRFIT)
|
||||
|
||||
Select power fit, i.e. try to model data with an equation `y = x^a * b`.
|
||||
|
||||
|
||||
## FrequencyBins (BINS)
|
||||
|
||||
Sorts the elements of the independent column (XCOL) of the current statistics matrix (the reserved variable ΣDAT) into (nbins + 2) bins, where the left edge of bin 1 starts at value xmin and each bin has width xwidth.
|
||||
BINS returns a matrix containing the frequency of occurrences in each bin, and a 2-element array containing the frequency of occurrences falling below or above the defined range of x-values. The array can be stored into the reserved variable ΣDAT and used to plot a bar histogram of the bin data (for example, by executing BARPLOT).
|
||||
|
||||
|
||||
## PopulationVariance (PVAR)
|
||||
|
||||
Calculates the population variance of the coordinate values in each of the m
|
||||
columns in the current statistics matrix (`ΣData`).
|
||||
|
||||
The population variance (equal to the square of the population standard
|
||||
deviation) is returned as a vector of m real numbers, or as a single real number
|
||||
if there is a single column of data.
|
||||
|
||||
## PopulationStandardDeviation (PSDEV)
|
||||
|
||||
Calculates the population standard deviation of each of the m columns of
|
||||
coordinate values in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The command returns a vector of m real numbers, or a single real number if there
|
||||
is a single column of data.
|
||||
|
||||
## PopulationCovariance (PCOV)
|
||||
|
||||
Computes the population covariance of the independent and dependent data columns
|
||||
in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The columns are specified by the first two elements in reserved variable
|
||||
`ΣParameters`, set by `XCol` and `YCol` respectively. If `ΣParameters` does not
|
||||
exist, `PCOV` creates it and sets the elements to their default values, 1 and 2.
|
||||
|
||||
## IndependentColumn (XCOL)
|
||||
|
||||
Set the independent variable column in the reserved variable `ΣParameters`.
|
||||
|
||||
`XCol` ▶ (Update `ΣParameters`)
|
||||
|
||||
## DependentColumn (YCOL)
|
||||
|
||||
Set the dependent variable column in the reserved variable `ΣParameters`.
|
||||
|
||||
`YCol` ▶ (Update `ΣParameters`)
|
||||
|
||||
## DataColumns (COLΣ)
|
||||
|
||||
Set both the independent and dependent data columns in the reserved variable
|
||||
`ΣParameters`.
|
||||
|
||||
`XCol` `YCol` ▶ (Update `ΣParameters`)
|
||||
# Operations with Strings
|
||||
|
||||
## TOUTF
|
||||
|
|
130
help/db50x.md
130
help/db50x.md
|
@ -5625,22 +5625,120 @@ Calculates the sample standard deviation of each of the columns of coordinate va
|
|||
|
||||
The standard deviation is the square root of the `Variance`.
|
||||
|
||||
CMD(StandardDeviation) ALIAS(StandardDeviation, "SDev")
|
||||
CMD(Bins)
|
||||
CMD(PopulationVariance) ALIAS(PopulationVariance, "PVar")
|
||||
CMD(PopulationStandardDeviation) ALIAS(PopulationStandardDeviation, "PSDev")
|
||||
CMD(PopulationCovariance) ALIAS(PopulationCovariance, "PCov")
|
||||
CMD(IndependentColumn) ALIAS(IndependentColumn, "XCol")
|
||||
CMD(DependentColumn) ALIAS(DependentColumn, "YCol")
|
||||
NAMED(DataColumns, "ColΣ")
|
||||
CMD(Intercept)
|
||||
CMD(Slope)
|
||||
CMD(LinearRegression) ALIAS(LinearRegression, "LR")
|
||||
CMD(BestFit)
|
||||
CMD(LinearFit) ALIAS(LinearFit, "LinFit")
|
||||
CMD(ExponentialFit) ALIAS(ExponentialFit, "ExpFit")
|
||||
CMD(PowerFit) ALIAS(PowerFit, "PwrFit")
|
||||
CMD(LogarithmicFit) ALIAS(LogarithmicFit, "LogFit")
|
||||
## LinearRegression (LR)
|
||||
|
||||
Uses the currently selected statistical model to calculate the linear regression
|
||||
coefficients (intercept and slope) for the selected dependent and independent
|
||||
variables in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The columns of independent and dependent data are specified by the first two
|
||||
elements in the reserved variable `ΣParameters`, set by `XCol` and `YCol`, respectively.
|
||||
The default independent and dependent columns are 1 and 2.
|
||||
|
||||
The selected statistical model is the fifth element in `ΣParameters`.
|
||||
LR stores the intercept and slope (untagged) as the third and fourth elements,
|
||||
respectively, in `ΣParameters`.
|
||||
|
||||
The coefficients of the exponential (`ExpFit`), logarithmic (`LogFit`),
|
||||
and power (`PwrFit`) models are calculated using transformations that allow
|
||||
the data to be fitted by standard linear regression.
|
||||
|
||||
The equations for these transformations are:
|
||||
|
||||
* `LinFit`: `y = slope * x + intercept`
|
||||
* `LogFit`: `y = slope * ln(x) + intercept`
|
||||
* `ExpFit`: `y = intercept * exp(slope * x)`
|
||||
* `PwrFit`: `y = intercept * x ^ slope`
|
||||
|
||||
where b is the intercept and m is the slope. The logarithmic model requires
|
||||
positive x-values (XCOL), the exponential model requires positive y-values
|
||||
(YCOL), and the power model requires positive x- and y-values.
|
||||
CMD(LinearRegression) ALIAS(LinearRegression, "LR")
|
||||
|
||||
## Intercept
|
||||
|
||||
Return the intercept value last computed by `LinearRegression`
|
||||
|
||||
This is a DB50X extension, not present on HP calculators
|
||||
|
||||
## Slope
|
||||
|
||||
Return the slope value last computed by `LinearRegression`
|
||||
|
||||
This is a DB50X extension, not present on HP calculators
|
||||
|
||||
## BestFit
|
||||
|
||||
Select the best linear regression mode based on current data, i.e. the
|
||||
regression mode where the correlation value is the highest.
|
||||
|
||||
## LinearFit (LINFIT)
|
||||
|
||||
Select linear fit, i.e. try to model data with a linear equation `y = a*x+b`.
|
||||
|
||||
## ExponentialFit (EXPFIT)
|
||||
|
||||
Select exponential fit, i.e. try to model data with an equation `y = b*exp(a*x)`
|
||||
|
||||
## LogarithmicFit (LOGFIT)
|
||||
|
||||
Select logarithmic fit, i.e. try to model data with an equation `y = a*ln(x)+b`.
|
||||
|
||||
## PowerFit (PWRFIT)
|
||||
|
||||
Select power fit, i.e. try to model data with an equation `y = x^a * b`.
|
||||
|
||||
|
||||
## FrequencyBins (BINS)
|
||||
|
||||
Sorts the elements of the independent column (XCOL) of the current statistics matrix (the reserved variable ΣDAT) into (nbins + 2) bins, where the left edge of bin 1 starts at value xmin and each bin has width xwidth.
|
||||
BINS returns a matrix containing the frequency of occurrences in each bin, and a 2-element array containing the frequency of occurrences falling below or above the defined range of x-values. The array can be stored into the reserved variable ΣDAT and used to plot a bar histogram of the bin data (for example, by executing BARPLOT).
|
||||
|
||||
|
||||
## PopulationVariance (PVAR)
|
||||
|
||||
Calculates the population variance of the coordinate values in each of the m
|
||||
columns in the current statistics matrix (`ΣData`).
|
||||
|
||||
The population variance (equal to the square of the population standard
|
||||
deviation) is returned as a vector of m real numbers, or as a single real number
|
||||
if there is a single column of data.
|
||||
|
||||
## PopulationStandardDeviation (PSDEV)
|
||||
|
||||
Calculates the population standard deviation of each of the m columns of
|
||||
coordinate values in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The command returns a vector of m real numbers, or a single real number if there
|
||||
is a single column of data.
|
||||
|
||||
## PopulationCovariance (PCOV)
|
||||
|
||||
Computes the population covariance of the independent and dependent data columns
|
||||
in the current statistics matrix (reserved variable `ΣData`).
|
||||
|
||||
The columns are specified by the first two elements in reserved variable
|
||||
`ΣParameters`, set by `XCol` and `YCol` respectively. If `ΣParameters` does not
|
||||
exist, `PCOV` creates it and sets the elements to their default values, 1 and 2.
|
||||
|
||||
## IndependentColumn (XCOL)
|
||||
|
||||
Set the independent variable column in the reserved variable `ΣParameters`.
|
||||
|
||||
`XCol` ▶ (Update `ΣParameters`)
|
||||
|
||||
## DependentColumn (YCOL)
|
||||
|
||||
Set the dependent variable column in the reserved variable `ΣParameters`.
|
||||
|
||||
`YCol` ▶ (Update `ΣParameters`)
|
||||
|
||||
## DataColumns (COLΣ)
|
||||
|
||||
Set both the independent and dependent data columns in the reserved variable
|
||||
`ΣParameters`.
|
||||
|
||||
`XCol` `YCol` ▶ (Update `ΣParameters`)
|
||||
# Operations with Strings
|
||||
|
||||
## TOUTF
|
||||
|
|
|
@ -500,7 +500,7 @@ CMD(Variance) ALIAS(Variance, "Var")
|
|||
CMD(Correlation) ALIAS(Correlation, "Corr")
|
||||
CMD(Covariance) ALIAS(Covariance, "Cov")
|
||||
CMD(StandardDeviation) ALIAS(StandardDeviation, "SDev")
|
||||
CMD(Bins)
|
||||
CMD(FrequencyBins) ALIAS(FrequencyBins, "Bins")
|
||||
CMD(PopulationVariance) ALIAS(PopulationVariance, "PVar")
|
||||
CMD(PopulationStandardDeviation) ALIAS(PopulationStandardDeviation, "PSDev")
|
||||
CMD(PopulationCovariance) ALIAS(PopulationCovariance, "PCov")
|
||||
|
|
|
@ -579,7 +579,7 @@ MENU(PopulationMenu,
|
|||
// ----------------------------------------------------------------------------
|
||||
"XCol", ID_IndependentColumn,
|
||||
"YCol", ID_DependentColumn,
|
||||
"Bins", ID_Bins,
|
||||
"Bins", ID_FrequencyBins,
|
||||
"PopVar", ID_PopulationVariance,
|
||||
"PopSDev", ID_PopulationStandardDeviation,
|
||||
"PCovar", ID_PopulationCovariance);
|
||||
|
|
30
src/stats.cc
30
src/stats.cc
|
@ -32,6 +32,7 @@
|
|||
#include "arithmetic.h"
|
||||
#include "compare.h"
|
||||
#include "integer.h"
|
||||
#include "tag.h"
|
||||
#include "variables.h"
|
||||
|
||||
|
||||
|
@ -1194,7 +1195,7 @@ COMMAND_BODY(PopulationCovariance)
|
|||
|
||||
|
||||
|
||||
COMMAND_BODY(Bins)
|
||||
COMMAND_BODY(FrequencyBins)
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -1261,11 +1262,32 @@ COMMAND_BODY(Slope)
|
|||
|
||||
COMMAND_BODY(LinearRegression)
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Compute linear regression
|
||||
// ----------------------------------------------------------------------------
|
||||
{
|
||||
rt.unimplemented_error();
|
||||
return ERROR;
|
||||
StatsAccess stats;
|
||||
if (!stats)
|
||||
return ERROR;
|
||||
algebraic_g n = stats.num_rows();
|
||||
algebraic_g sx2 = stats.sum_x2();
|
||||
algebraic_g sx = stats.sum_x();
|
||||
algebraic_g sy = stats.sum_y();
|
||||
algebraic_g sxy = stats.sum_xy();
|
||||
algebraic_g ssxx = sx2 - sx * sx / n;
|
||||
algebraic_g ssxy = sxy - sx * sy / n;
|
||||
algebraic_g slope = ssxy / ssxx;
|
||||
algebraic_g intercept = (sy - slope * sx) / n;
|
||||
if (!intercept || !slope)
|
||||
return ERROR;
|
||||
stats.intercept = intercept;
|
||||
stats.slope = slope;
|
||||
tag_g itag = tag::make("Intercept", intercept.Safe());
|
||||
tag_g stag = tag::make("Slope", slope.Safe());
|
||||
if (!itag || !stag)
|
||||
return ERROR;
|
||||
if (!rt.push(itag.Safe()) || !rt.push(stag.Safe()))
|
||||
return ERROR;
|
||||
return OK;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -166,7 +166,7 @@ COMMAND_DECLARE(StandardDeviation);
|
|||
COMMAND_DECLARE(PopulationVariance);
|
||||
COMMAND_DECLARE(PopulationStandardDeviation);
|
||||
COMMAND_DECLARE(PopulationCovariance);
|
||||
COMMAND_DECLARE(Bins);
|
||||
COMMAND_DECLARE(FrequencyBins);
|
||||
COMMAND_DECLARE(Total);
|
||||
COMMAND_DECLARE(IndependentColumn);
|
||||
COMMAND_DECLARE(DependentColumn);
|
||||
|
|
Loading…
Reference in a new issue