From cebc33633f63351ef802ccc539df4ff56a3d1f61 Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:34:52 -0500 Subject: [PATCH 1/7] Add Student t distribution and regression helpers --- src/luasf.lua | 5 +- src/luasf/distributions.lua | 15 ++- src/luasf/regression.lua | 209 ++++++++++++++++++++++++++++++++++++ 3 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 src/luasf/regression.lua diff --git a/src/luasf.lua b/src/luasf.lua index 9fc63b5..8dd3abc 100644 --- a/src/luasf.lua +++ b/src/luasf.lua @@ -1,7 +1,9 @@ --[[ LuaSF : Lua Statistics Functions -A lightweight, pure-Lua library for basic statistics and random variables. +LuaSF is a small, lightweight, pure-Lua library for descriptive statistics, +shape statistics, bivariate statistics, probability helpers, sampling utilities, +pseudo-random variable generation, and simple formula-based regression summaries. MIT License @@ -39,5 +41,6 @@ merge(M, require(prefix .. "sampling")) merge(M, require(prefix .. "distributions")) merge(M, require(prefix .. "bivariate")) merge(M, require(prefix .. "probability")) +merge(M, require(prefix .. "regression")) return M diff --git a/src/luasf/distributions.lua b/src/luasf/distributions.lua index 0282d81..84aee8d 100644 --- a/src/luasf/distributions.lua +++ b/src/luasf/distributions.lua @@ -189,6 +189,16 @@ local function chiSquareVA(n) return value end +local function studentTVA(df) + df = df or 1 + validation.assert_positive_integer(df, "df") + + local z = normalVA(0, 1) + local v = chiSquareVA(df) + + return z / sqrt(v / df) +end + local function gamVA(alpha, lambda) alpha = alpha or 1 lambda = lambda or 1 @@ -254,6 +264,7 @@ M.binomialVA = binomialVA M.geometricVA = geometricVA M.poissonVA = poissonVA M.chiSquareVA = chiSquareVA +M.studentTVA = studentTVA M.gamVA = gamVA M.lognoVA = lognoVA M.lognoRandVA = lognoVA @@ -270,7 +281,9 @@ M.binomial = binomialVA M.geometric = geometricVA M.poisson = poissonVA M.chi_square = chiSquareVA +M.student_t = studentTVA +M.t_student = studentTVA M.gamma = gamVA M.lognormal = lognoVA -return M \ No newline at end of file +return M diff --git a/src/luasf/regression.lua b/src/luasf/regression.lua new file mode 100644 index 0000000..9961205 --- /dev/null +++ b/src/luasf/regression.lua @@ -0,0 +1,209 @@ +local module_name = ... or "luasf.regression" +local prefix = module_name:match("^src%.") and "src.luasf." or "luasf." + +local validation = require(prefix .. "validation") +local descriptive = require(prefix .. "descriptive") +local core = require(prefix .. "core") + +local sqrt = math.sqrt + +local M = {} + +local function clamp_near_zero(value) + if value < 0 and value > -1e-12 then + return 0 + end + + if value > 0 and value < 1e-12 then + return 0 + end + + return value +end + +local function divide_or_nil(numerator, denominator) + if denominator == nil or denominator == 0 then + return nil + end + + return numerator / denominator +end + +local function validate_model(model) + assert(type(model) == "table", "model must be a table") + validation.assert_number(model.slope, "model.slope") + validation.assert_number(model.intercept, "model.intercept") +end + +local function simple_linear_regression(x, y) + validation.assert_same_length_numeric_arrays(x, y, "x", "y") + assert(#x >= 3, "x and y must contain at least three values") + + local n = #x + local mean_x = descriptive.mean(x) + local mean_y = descriptive.mean(y) + local sxx = 0 + local syy = 0 + local sxy = 0 + + for i = 1, n do + local dx = x[i] - mean_x + local dy = y[i] - mean_y + + sxx = sxx + dx * dx + syy = syy + dy * dy + sxy = sxy + dx * dy + end + + assert(sxx > 0, "x values must not be constant") + + local slope = sxy / sxx + local intercept = mean_y - slope * mean_x + local fitted = {} + local residual_values = {} + local sse = 0 + local ssr = 0 + + for i = 1, n do + local y_hat = intercept + slope * x[i] + local residual = y[i] - y_hat + + fitted[i] = y_hat + residual_values[i] = residual + + sse = sse + residual * residual + ssr = ssr + (y_hat - mean_y) * (y_hat - mean_y) + end + + local sst = syy + + sse = clamp_near_zero(sse) + ssr = clamp_near_zero(ssr) + sst = clamp_near_zero(sst) + + local degrees_freedom = n - 2 + local mse = sse / degrees_freedom + local rmse = sqrt(mse) + local residual_standard_error = rmse + + local r + local r_squared + local adjusted_r_squared + + if sst > 0 then + r = sxy / sqrt(sxx * syy) + r_squared = 1 - (sse / sst) + r_squared = clamp_near_zero(r_squared) + adjusted_r_squared = 1 - ((1 - r_squared) * (n - 1) / degrees_freedom) + else + r = nil + r_squared = nil + adjusted_r_squared = nil + end + + local standard_error_slope = sqrt(mse / sxx) + local standard_error_intercept = sqrt(mse * ((1 / n) + ((mean_x * mean_x) / sxx))) + + local t_slope = divide_or_nil(slope, standard_error_slope) + local t_intercept = divide_or_nil(intercept, standard_error_intercept) + + local ms_regression = ssr + local ms_residual = mse + local f_statistic = divide_or_nil(ms_regression, ms_residual) + + return { + n = n, + degrees_freedom = degrees_freedom, + + slope = slope, + intercept = intercept, + coefficients = { + intercept = intercept, + slope = slope + }, + + mean_x = mean_x, + mean_y = mean_y, + + r = r, + r_squared = r_squared, + adjusted_r_squared = adjusted_r_squared, + + sxx = sxx, + syy = syy, + sxy = sxy, + + sst = sst, + ssr = ssr, + sse = sse, + mse = mse, + rmse = rmse, + residual_standard_error = residual_standard_error, + + standard_error_slope = standard_error_slope, + standard_error_intercept = standard_error_intercept, + t_slope = t_slope, + t_intercept = t_intercept, + + fitted_values = fitted, + residuals = residual_values, + + anova = { + regression = { + df = 1, + ss = ssr, + ms = ms_regression, + f = f_statistic + }, + residual = { + df = degrees_freedom, + ss = sse, + ms = ms_residual + }, + total = { + df = n - 1, + ss = sst + } + } + } +end + +local function predict(model, x) + validate_model(model) + + if type(x) == "table" then + local result = {} + + for i = 1, #x do + validation.assert_number(x[i], "x value") + result[i] = model.intercept + model.slope * x[i] + end + + return result + end + + validation.assert_number(x, "x") + + return model.intercept + model.slope * x +end + +local function fitted_values(model) + assert(type(model) == "table", "model must be a table") + assert(type(model.fitted_values) == "table", "model.fitted_values must be a table") + + return core.copy_array(model.fitted_values) +end + +local function residuals(model) + assert(type(model) == "table", "model must be a table") + assert(type(model.residuals) == "table", "model.residuals must be a table") + + return core.copy_array(model.residuals) +end + +M.simple_linear_regression = simple_linear_regression +M.predict = predict +M.fitted_values = fitted_values +M.residuals = residuals + +return M From 94619d81935d77a57317614cd46b892055bccb33 Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:34:52 -0500 Subject: [PATCH 2/7] Add Student t and regression tests --- spec/test_regression.lua | 88 ++++++++++++++++++++++++++++++++++++++++ spec/test_student_t.lua | 46 +++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 spec/test_regression.lua create mode 100644 spec/test_student_t.lua diff --git a/spec/test_regression.lua b/spec/test_regression.lua new file mode 100644 index 0000000..4634cb0 --- /dev/null +++ b/spec/test_regression.lua @@ -0,0 +1,88 @@ +local luaunit = require("luaunit") +local stats = require("luasf") + +TestRegression = {} + +function TestRegression:test_simple_linear_regression_perfect_line() + local x = {1, 2, 3, 4, 5} + local y = {2, 4, 6, 8, 10} + + local model = stats.simple_linear_regression(x, y) + + luaunit.assertAlmostEquals(model.slope, 2, 0.000001) + luaunit.assertAlmostEquals(model.intercept, 0, 0.000001) + luaunit.assertAlmostEquals(model.r, 1, 0.000001) + luaunit.assertAlmostEquals(model.r_squared, 1, 0.000001) + luaunit.assertAlmostEquals(model.sse, 0, 0.000001) + luaunit.assertAlmostEquals(model.ssr, 40, 0.000001) + luaunit.assertAlmostEquals(model.sst, 40, 0.000001) + luaunit.assertEquals(model.n, 5) + luaunit.assertEquals(model.degrees_freedom, 3) +end + +function TestRegression:test_simple_linear_regression_with_intercept() + local x = {1, 2, 3, 4, 5} + local y = {3, 5, 7, 9, 11} + + local model = stats.simple_linear_regression(x, y) + + luaunit.assertAlmostEquals(model.slope, 2, 0.000001) + luaunit.assertAlmostEquals(model.intercept, 1, 0.000001) + luaunit.assertAlmostEquals(stats.predict(model, 6), 13, 0.000001) +end + +function TestRegression:test_predict_accepts_array() + local x = {1, 2, 3, 4, 5} + local y = {3, 5, 7, 9, 11} + + local model = stats.simple_linear_regression(x, y) + local predictions = stats.predict(model, {6, 7}) + + luaunit.assertAlmostEquals(predictions[1], 13, 0.000001) + luaunit.assertAlmostEquals(predictions[2], 15, 0.000001) +end + +function TestRegression:test_non_perfect_regression_summary() + local x = {1, 2, 3, 4, 5} + local y = {2, 5, 5, 9, 10} + + local model = stats.simple_linear_regression(x, y) + local fitted = stats.fitted_values(model) + local residuals = stats.residuals(model) + + luaunit.assertEquals(#fitted, 5) + luaunit.assertEquals(#residuals, 5) + luaunit.assertTrue(model.r_squared > 0) + luaunit.assertTrue(model.r_squared <= 1) + luaunit.assertTrue(model.mse >= 0) + luaunit.assertTrue(model.rmse >= 0) + luaunit.assertEquals(model.anova.regression.df, 1) + luaunit.assertEquals(model.anova.residual.df, 3) + luaunit.assertEquals(model.anova.total.df, 4) +end + +function TestRegression:test_requires_same_length_arrays() + luaunit.assertError(function() + stats.simple_linear_regression({1, 2, 3}, {1, 2}) + end) +end + +function TestRegression:test_requires_at_least_three_values() + luaunit.assertError(function() + stats.simple_linear_regression({1, 2}, {2, 4}) + end) +end + +function TestRegression:test_rejects_constant_x_values() + luaunit.assertError(function() + stats.simple_linear_regression({1, 1, 1}, {2, 3, 4}) + end) +end + +function TestRegression:test_rejects_non_numeric_values() + luaunit.assertError(function() + stats.simple_linear_regression({1, 2, "x"}, {2, 4, 6}) + end) +end + +os.exit(luaunit.LuaUnit.run()) diff --git a/spec/test_student_t.lua b/spec/test_student_t.lua new file mode 100644 index 0000000..c4e4d40 --- /dev/null +++ b/spec/test_student_t.lua @@ -0,0 +1,46 @@ +local luaunit = require("luaunit") +local stats = require("luasf") + +TestStudentT = {} + +function TestStudentT:tearDown() + stats.reset_rng() +end + +function TestStudentT:test_student_t_returns_number() + stats.seed(1234) + + local value = stats.student_t(5) + + luaunit.assertEquals(type(value), "number") +end + +function TestStudentT:test_student_t_legacy_name_returns_number() + stats.seed(1234) + + local value = stats.studentTVA(5) + + luaunit.assertEquals(type(value), "number") +end + +function TestStudentT:test_t_student_alias_returns_number() + stats.seed(1234) + + local value = stats.t_student(5) + + luaunit.assertEquals(type(value), "number") +end + +function TestStudentT:test_student_t_rejects_zero_degrees_of_freedom() + luaunit.assertError(function() + stats.student_t(0) + end) +end + +function TestStudentT:test_student_t_rejects_non_integer_degrees_of_freedom() + luaunit.assertError(function() + stats.student_t(2.5) + end) +end + +os.exit(luaunit.LuaUnit.run()) From 058eba67c785bf2abc1181485d38013fb34c180f Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:34:52 -0500 Subject: [PATCH 3/7] Add Student t and simple regression examples --- examples/simple_linear_regression.lua | 27 +++++++++++++++++++++++++++ examples/student_t_distribution.lua | 15 +++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 examples/simple_linear_regression.lua create mode 100644 examples/student_t_distribution.lua diff --git a/examples/simple_linear_regression.lua b/examples/simple_linear_regression.lua new file mode 100644 index 0000000..2b7bad5 --- /dev/null +++ b/examples/simple_linear_regression.lua @@ -0,0 +1,27 @@ +local stats = require("luasf") + +local study_hours = {1, 2, 3, 4, 5, 6} +local exam_scores = {52, 55, 61, 66, 72, 75} + +local model = stats.simple_linear_regression(study_hours, exam_scores) +local prediction = stats.predict(model, 7) + +print("Simple linear regression example") +print("Formula: y = intercept + slope * x") +print("Intercept:", model.intercept) +print("Slope:", model.slope) +print("R:", model.r) +print("R squared:", model.r_squared) +print("Adjusted R squared:", model.adjusted_r_squared) +print("SSE:", model.sse) +print("SSR:", model.ssr) +print("SST:", model.sst) +print("MSE:", model.mse) +print("RMSE:", model.rmse) +print("Residual standard error:", model.residual_standard_error) +print("Standard error slope:", model.standard_error_slope) +print("Standard error intercept:", model.standard_error_intercept) +print("T slope:", model.t_slope) +print("T intercept:", model.t_intercept) +print("ANOVA F statistic:", model.anova.regression.f) +print("Prediction for 7 study hours:", prediction) diff --git a/examples/student_t_distribution.lua b/examples/student_t_distribution.lua new file mode 100644 index 0000000..d621e41 --- /dev/null +++ b/examples/student_t_distribution.lua @@ -0,0 +1,15 @@ +local stats = require("luasf") + +local degrees_of_freedom = 10 +local draws = {} + +for i = 1, 10 do + draws[i] = stats.student_t(degrees_of_freedom) +end + +print("Student's t random values") +print("Degrees of freedom:", degrees_of_freedom) + +for i = 1, #draws do + print(i, draws[i]) +end From 759a7915f4bdd4aede9f89e9a273cfc28023376d Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:34:52 -0500 Subject: [PATCH 4/7] Add LuaRocks rockspec for v0.8.0 --- rockspec/luasf-0.8.0-1.rockspec | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 rockspec/luasf-0.8.0-1.rockspec diff --git a/rockspec/luasf-0.8.0-1.rockspec b/rockspec/luasf-0.8.0-1.rockspec new file mode 100644 index 0000000..52a7602 --- /dev/null +++ b/rockspec/luasf-0.8.0-1.rockspec @@ -0,0 +1,43 @@ +package = "luasf" +version = "0.8.0-1" + +source = { + url = "git://github.com/HubertRonald/LuaSF.git", + tag = "v0.8.0" +} + +description = { + summary = "Lua Statistics Functions", + detailed = [[ +LuaSF is a lightweight, pure-Lua library for descriptive statistics, +shape statistics, bivariate statistics, probability helpers, sampling +utilities, simulation examples, random variable generation, and simple +formula-based regression summaries. + ]], + homepage = "https://github.com/HubertRonald/LuaSF", + license = "MIT", + maintainer = "Hubert Ronald" +} + +dependencies = { + "lua >= 5.1" +} + +build = { + type = "builtin", + modules = { + luasf = "src/luasf.lua", + ["luasf.core"] = "src/luasf/core.lua", + ["luasf.validation"] = "src/luasf/validation.lua", + ["luasf.rng"] = "src/luasf/rng.lua", + ["luasf.descriptive"] = "src/luasf/descriptive.lua", + ["luasf.shape"] = "src/luasf/shape.lua", + ["luasf.sampling"] = "src/luasf/sampling.lua", + ["luasf.distributions"] = "src/luasf/distributions.lua", + ["luasf.bivariate"] = "src/luasf/bivariate.lua", + ["luasf.probability"] = "src/luasf/probability.lua", + ["luasf.regression"] = "src/luasf/regression.lua", + LuaSF = "LuaSF.lua", + LuaStat = "LuaStat.lua" + } +} From 0a21b48ac26f422eea37feeac23b472736d69e78 Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:34:52 -0500 Subject: [PATCH 5/7] Add regression release checks to workflows --- .github/workflows/ci.yml | 4 ++++ .github/workflows/publish-luarocks.yml | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 930d22e..73d2b4d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,6 +60,8 @@ jobs: lua spec/test_bivariate.lua lua spec/test_shape.lua lua spec/test_probability.lua + lua spec/test_student_t.lua + lua spec/test_regression.lua - name: Run examples run: | @@ -74,3 +76,5 @@ jobs: lua examples/covariance_correlation.lua lua examples/skewness_kurtosis.lua lua examples/probability_helpers.lua + lua examples/student_t_distribution.lua + lua examples/simple_linear_regression.lua diff --git a/.github/workflows/publish-luarocks.yml b/.github/workflows/publish-luarocks.yml index 8c03841..5e342fd 100644 --- a/.github/workflows/publish-luarocks.yml +++ b/.github/workflows/publish-luarocks.yml @@ -6,7 +6,7 @@ on: rockspec: description: "Rockspec file to validate or publish" required: true - default: "rockspec/luasf-0.7.0-1.rockspec" + default: "rockspec/luasf-0.8.0-1.rockspec" type: string publish: description: "Publish to LuaRocks after validation" @@ -52,7 +52,10 @@ jobs: - name: Test probability helpers entry point run: lua -e 'local stats = require("luasf"); print(stats.factorial(5)); print(stats.combinations(5,2)); print(stats.permutations_with_repetition(10,4))' - + + - name: Test LuaRocks package entry point + run: lua -e 'local stats = require("luasf"); print(stats.simple_linear_regression({1,2,3}, {2,4,6}).slope)' + - name: Test LuaSF compatibility entry point run: lua -e 'local stats = require("LuaSF"); print(stats.sumF({1,2,3}))' From 61d2abc0809a9cff5cf69179a588b404e9112ddc Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:34:52 -0500 Subject: [PATCH 6/7] Document simple regression and Student t helpers --- CHANGELOG.md | 39 ++++++++++++++-- CONTRIBUTING.md | 30 ++++++++++--- README.md | 85 ++++++++++++++++++++++++++++------- docs/api.md | 117 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 244 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a1a800..2e3116f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,46 @@ This project follows a lightweight changelog format inspired by [Keep a Changelo ### Planned +* Add more distribution and simulation-oriented examples. * Explore a lightweight cross-reference with LuaHMF as a related pure-Lua math helper project. -* Add more distribution and simulation examples. -* Evaluate optional formula-based simple regression summaries while keeping ML workflows outside the current scope. +* Explore carefully scoped confidence interval or critical value helpers. --- -## [0.7.0] - 2026-06-07 +## [0.8.0] - 2026-06-09 + +### Added + +* Added Student's t random variable generator `studentTVA(df)`. +* Added modern aliases `student_t(df)` and `t_student(df)`. +* Added `src/luasf/regression.lua` for formula-based simple linear regression summaries. +* Added `simple_linear_regression(x, y)`. +* Added `predict(model, x)`. +* Added `fitted_values(model)`. +* Added `residuals(model)`. +* Added regression summary fields for coefficients, R, R², adjusted R², SSE, SSR, SST, MSE, RMSE, residual standard error, standard errors, and t statistics. +* Added ANOVA-style regression summary without p-values. +* Added `spec/test_student_t.lua`. +* Added `spec/test_regression.lua`. +* Added `examples/student_t_distribution.lua`. +* Added `examples/simple_linear_regression.lua`. +* Added `rockspec/luasf-0.8.0-1.rockspec`. + +### Changed + +* Updated the public facade to expose regression helpers. +* Updated GitHub Actions workflows to run Student's t and regression tests/examples. +* Updated LuaRocks workflow default rockspec path to `rockspec/luasf-0.8.0-1.rockspec`. +* Updated README, API documentation, and contributing notes. + +### Scope + +* LuaSF now includes formula-based simple regression summaries, but it does not compute p-values or confidence intervals. +* Multiple regression, non-linear regression, optimization-based modeling, and machine learning workflows remain outside the current scope. + +--- + +## [0.7.0] - 2026-06-09 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 04bf184..8888fe3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,6 +17,7 @@ LuaSF aims to provide: * Probability and combinatorics helpers * Pseudo-random variable generation * Sampling utilities +* Formula-based simple regression summaries * A small and readable Lua codebase * Compatibility with the existing public API * Useful examples for simulations, teaching, small scripts, and game/mod scripting @@ -36,17 +37,20 @@ Good fits for LuaSF: * Random variable generation * Sampling and simulation utilities * Formula-based statistical summaries +* Small examples and tests Currently out of scope: -* Machine learning pipelines +* Large machine learning workflows * Optimization-based model training -* Non-linear regression fitting -* Deep learning +* Non-linear regression engines +* Multiple regression requiring a full matrix algebra subsystem * Native dependencies -* Large framework-style APIs +* Big integer dependencies +* Full statistical inference engines with p-value/CDF approximations unless carefully scoped and tested + +For simple regression, LuaSF may report coefficients, R and R², sums of squares, standard errors, t statistics, and ANOVA-style summaries. Full p-values and confidence intervals are intentionally outside the current scope. -Formula-based simple regression summaries may be considered in the future, but ML-style model training is intentionally outside the current scope. --- @@ -74,6 +78,7 @@ binomialVA geometricVA poissonVA chiSquareVA +studentTVA gamVA lognoVA lognoRandVA @@ -103,6 +108,7 @@ src/ distributions.lua bivariate.lua probability.lua + regression.lua shape.lua validation.lua rng.lua @@ -116,7 +122,8 @@ Recommended module ownership: * `bivariate.lua`: two-variable statistics such as covariance and correlation * `sampling.lua`: sampling helpers * `distributions.lua`: random variable generators -* `probability.lua`: future probability/combinatorics helpers +* `probability.lua`: probability/combinatorics helpers +* `regression.lua`: simple regression * `validation.lua`: reusable input validation helpers * `shape.lua`: skewness and kurtosis helpers * `rng.lua`: random generator and seed helpers @@ -155,6 +162,8 @@ lua spec/test_sampling.lua lua spec/test_bivariate.lua lua spec/test_shape.lua lua spec/test_probability.lua +lua spec/test_student_t.lua +lua spec/test_regression.lua ``` Run examples: @@ -171,6 +180,8 @@ lua examples/bootstrap_mean.lua lua examples/covariance_correlation.lua lua examples/skewness_kurtosis.lua lua examples/probability_helpers.lua +lua examples/student_t_distribution.lua +lua examples/simple_linear_regression.lua ``` --- @@ -284,6 +295,7 @@ Before opening a pull request, please check: * New modules are included in the rockspec draft when needed. * Code remains readable and dependency-light. * LuaRocks rockspec files are updated when preparing a package release. +* Rockspec modules are updated when new source files are added. * CI workflows are updated when new tests or examples are added. @@ -299,6 +311,7 @@ Prefer: * Minimal dependencies * Compatibility with Lua 5.1+ * Explicit validation for public helpers +* Small, focused modules * Formula-based helpers when appropriate Avoid: @@ -308,12 +321,15 @@ Avoid: * Adding native dependencies * Overcomplicating the API * Turning LuaSF into a machine learning framework +* Hidden behavior that makes examples harder to understand --- ## Future scope -Simple formula-based regression summaries may be considered later, but optimization-based models and ML workflows are outside the current scope. +* Explore carefully scoped confidence interval or critical value helpers. +* Explore a lightweight cross-reference with LuaHMF as a related pure-Lua math helper project. +* Add more distribution and simulation-oriented examples. --- diff --git a/README.md b/README.md index 583db29..addce95 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ **LuaSF** stands for **Lua Statistics Functions**. -LuaSF is a small, lightweight, pure-Lua library for descriptive statistics, shape statistics, bivariate statistics, probability helpers, sampling utilities, and pseudo-random variable generation. +LuaSF is a small, lightweight, pure-Lua library for descriptive statistics, shape statistics, bivariate statistics, probability helpers, sampling utilities, pseudo-random variable generation, and simple formula-based regression summaries. -The project started around 2014 and was later published under the MIT License. It has now been revived with compatibility improvements, tests, examples, documentation, a cleaner modular source structure, additional statistics helpers, sampling utilities, probability helpers, and LuaRocks packaging while preserving the existing public API. +The project started around 2014 and was later published under the MIT License. It has now been revived with compatibility improvements, tests, examples, documentation, a cleaner modular source structure, additional statistics helpers, sampling utilities, probability helpers, LuaRocks packaging, and a compatibility-safe public API. --- @@ -46,13 +46,16 @@ The project started around 2014 and was later published under the MIT License. I * Modular internal source layout * Basic descriptive statistics * Summary statistics helpers +* Shape statistics helpers * Bivariate statistics helpers * Probability and combinatorics helpers * Sampling utilities * Discrete and continuous pseudo-random variables +* Simple formula-based regression summaries * Compatible with the existing public LuaSF API * Useful for simulations, teaching, small scripts, game/mod scripting, and lightweight statistical utilities + --- ## Installation @@ -220,12 +223,24 @@ print(stats.stvF(values)) -- sample standard deviation | `geometricVA(p)` | `geometric(p)` | Geometric random variable | | `poissonVA(lambda)` | `poisson(lambda)` | Poisson random variable | | `chiSquareVA(n)` | `chi_square(n)` | Chi-square random variable | +| `studentTVA(df)` | `student_t(df)` | Student's t random variable | | `gamVA(alpha, lambda)` | `gamma(alpha, lambda)` | Gamma random variable | | `lognoVA(m, s)` | `lognormal(m, s)` | Log-normal random variable | | `lognoRandVA(m, s)` | `lognormal(m, s)` | Log-normal random variable | > `nomalVA` and `lognoRandVA` are preserved as compatibility aliases. +### Simple regression summaries + +| Function | Description | +| -------------------------------- | ------------------------------------------------ | +| `simple_linear_regression(x, y)` | Formula-based simple linear regression summary | +| `predict(model, x)` | Predicts one value or a list of values | +| `fitted_values(model)` | Returns fitted values from a regression model | +| `residuals(model)` | Returns residuals from a regression model | + +LuaSF reports coefficients, R and R², sums of squares, mean squared error, residual standard error, standard errors, t statistics, and an ANOVA-style summary for simple regression. It does not compute p-values or confidence intervals. + --- ## Examples @@ -337,6 +352,30 @@ print(stats.choice({"first", "second", "third"})) -- first stats.reset_rng() ``` +### Student's t random variable + +```lua +local stats = require("luasf") + +print(stats.student_t(10)) +``` + +### Simple linear regression + +```lua +local stats = require("luasf") + +local x = {1, 2, 3, 4, 5} +local y = {3, 5, 7, 9, 11} + +local model = stats.simple_linear_regression(x, y) + +print(model.intercept) -- 1 +print(model.slope) -- 2 +print(model.r_squared) -- 1 +print(stats.predict(model, 6)) -- 13 +``` + --- ## Project structure @@ -353,6 +392,7 @@ LuaSF/ distributions.lua bivariate.lua probability.lua + regression.lua validation.lua rng.lua spec/ @@ -362,6 +402,8 @@ LuaSF/ test_bivariate.lua test_shape.lua test_probability.lua + test_student_t.lua + test_regression.lua examples/ dice_simulation.lua normal_quality_control.lua @@ -374,6 +416,8 @@ LuaSF/ covariance_correlation.lua skewness_kurtosis.lua probability_helpers.lua + student_t_distribution.lua + simple_linear_regression.lua docs/ api.md .github/ @@ -387,6 +431,7 @@ LuaSF/ luasf-0.5.0-1.rockspec luasf-0.6.0-1.rockspec luasf-0.7.0-1.rockspec + luasf-0.8.0-1.rockspec LuaSF.lua LuaStat.lua README.md @@ -409,11 +454,14 @@ eval "$(luarocks path --local)" Run tests: ```bash -lua spec/test_stats.lua -lua spec/test_distributions.lua -lua spec/test_sampling.lua lua spec/test_bivariate.lua +lua spec/test_distributions.lua lua spec/test_probability.lua +lua spec/test_regression.lua +lua spec/test_sampling.lua +lua spec/test_shape.lua +lua spec/test_stats.lua +lua spec/test_student_t.lua ``` --- @@ -421,16 +469,19 @@ lua spec/test_probability.lua ## Running examples ```bash +lua examples/binomial_coin_flips.lua +lua examples/bootstrap_mean.lua +lua examples/covariance_correlation.lua lua examples/dice_simulation.lua -lua examples/normal_quality_control.lua lua examples/gamma_distribution.lua -lua examples/weighted_loot_drop.lua lua examples/monte_carlo_pi.lua +lua examples/normal_quality_control.lua lua examples/poisson_arrivals.lua -lua examples/binomial_coin_flips.lua -lua examples/bootstrap_mean.lua -lua examples/covariance_correlation.lua lua examples/probability_helpers.lua +lua examples/simple_linear_regression.lua +lua examples/skewness_kurtosis.lua +lua examples/student_t_distribution.lua +lua examples/weighted_loot_drop.lua ``` --- @@ -450,24 +501,26 @@ lua examples/probability_helpers.lua * Summary statistics helpers * Shape statistics helpers * Bivariate statistics helpers -* Probability helpers +* Probability and combinatorics helpers * Sampling utilities * Deterministic simulation support +* Student's t random variable generator +* Formula-based simple linear regression summaries * LuaRocks publishing -### Planned +### Possible future work -* Lightweight cross-reference with LuaHMF * More distribution and simulation examples -* Optional simple formula-based regression summaries, without turning LuaSF into a machine learning framework +* Lightweight cross-reference with LuaHMF +* Carefully scoped confidence interval or critical value helpers --- ## Scope -LuaSF is focused on lightweight statistics, probability, random variables, and simulation helpers. +LuaSF is focused on lightweight statistics, probability, random variables, regression summaries, and simulation helpers. -Optimization-based modeling, machine learning workflows, model training pipelines, and non-linear regression are intentionally outside the current scope of LuaSF. +Optimization-based modeling, machine learning workflows, model training pipelines, non-linear regression, and full statistical inference engines are intentionally outside the current scope of LuaSF. --- diff --git a/docs/api.md b/docs/api.md index fd9561c..712984b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -2,7 +2,7 @@ LuaSF stands for **Lua Statistics Functions**. -This document describes the public LuaSF API after the compatibility revival, Phase 3 statistics/sampling additions, and LuaRocks publishing. +This document describes the public LuaSF API after the compatibility revival, modularization, statistics/sampling additions, probability helpers, and simple regression summaries. LuaSF keeps the legacy public API available while adding clearer modern aliases. @@ -56,6 +56,7 @@ src/ distributions.lua bivariate.lua probability.lua + regression.lua validation.lua rng.lua ``` @@ -715,6 +716,7 @@ LuaSF provides functions for discrete and continuous pseudo-random variables. | `geometricVA(p)` | `geometric(p)` | Geometric random variable | | `poissonVA(lambda)` | `poisson(lambda)` | Poisson random variable | | `chiSquareVA(n)` | `chi_square(n)` | Chi-square random variable | +| `studentTVA(df)` | `student_t(df)` or `t_student(df)` | Student's t-distributed random variable | | `gamVA(alpha, lambda)` | `gamma(alpha, lambda)` | Gamma random variable | | `lognoVA(m, s)` | `lognormal(m, s)` | Log-normal random variable | | `lognoRandVA(m, s)` | `lognormal(m, s)` | Log-normal random variable | @@ -964,6 +966,27 @@ stats.chi_square(n) --- +### `studentTVA(df)` + +Returns a Student's t-distributed random value with `df` degrees of freedom. + +LuaSF generates it from a standard normal random variable and an independent chi-square random variable: + +```text +T = Z / sqrt(V / df) +``` + +where `Z` is approximately standard normal and `V` is chi-square with `df` degrees of freedom. + +Modern aliases: + +```lua +stats.student_t(df) +stats.t_student(df) +``` + +--- + ### `gamVA(alpha, lambda)` Returns a gamma-distributed random value. @@ -1011,6 +1034,95 @@ stats.lognormal(m, s) --- +## Simple regression summaries + +LuaSF provides formula-based simple linear regression summaries. + +These helpers are intended for lightweight statistics, teaching, small scripts, and simulation-style analysis. They are not a machine learning framework and do not perform iterative optimization. + +### `simple_linear_regression(x, y)` + +Returns a table with a formula-based simple linear regression summary. + +```lua +local stats = require("luasf") + +local x = {1, 2, 3, 4, 5} +local y = {3, 5, 7, 9, 11} + +local model = stats.simple_linear_regression(x, y) + +print(model.intercept) -- 1 +print(model.slope) -- 2 +print(model.r_squared) -- 1 +``` + +The returned table includes: + +```lua +{ + n = number, + degrees_freedom = number, + + slope = number, + intercept = number, + coefficients = { + intercept = number, + slope = number + }, + + r = number or nil, + r_squared = number or nil, + adjusted_r_squared = number or nil, + + sst = number, + ssr = number, + sse = number, + mse = number, + rmse = number, + residual_standard_error = number, + + standard_error_slope = number, + standard_error_intercept = number, + t_slope = number or nil, + t_intercept = number or nil, + + fitted_values = table, + residuals = table, + + anova = { + regression = { df = 1, ss = number, ms = number, f = number or nil }, + residual = { df = number, ss = number, ms = number }, + total = { df = number, ss = number } + } +} +``` + +### `predict(model, x)` + +Predicts one value or a list of values using a regression model. + +```lua +local prediction = stats.predict(model, 6) +local predictions = stats.predict(model, {6, 7, 8}) +``` + +### `fitted_values(model)` + +Returns a copy of the model fitted values. + +### `residuals(model)` + +Returns a copy of the model residuals. + +### Scope note + +LuaSF reports simple regression coefficients, R and R², sums of squares, MSE, RMSE, residual standard error, standard errors, t statistics, and an ANOVA-style summary. + +LuaSF does not compute regression p-values, confidence intervals, critical values, multiple regression, non-linear regression, optimization-based modeling, or machine learning training pipelines. + +--- + ## Utility functions ### `rand()` @@ -1070,6 +1182,7 @@ stats.binomialVA stats.geometricVA stats.poissonVA stats.chiSquareVA +stats.studentTVA stats.gamVA stats.lognoVA stats.lognoRandVA @@ -1133,6 +1246,8 @@ stats.binomial stats.geometric stats.poisson stats.chi_square +stats.student_t +stats.t_student stats.gamma stats.lognormal ``` From 3b55c2ec8389628ab4024e4443cf82687bbede3b Mon Sep 17 00:00:00 2001 From: HubertRonald Date: Tue, 9 Jun 2026 22:35:42 -0500 Subject: [PATCH 7/7] Add Student t and regression tests --- spec/test_stats.lua | 2 +- src/luasf/validation.lua | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/test_stats.lua b/spec/test_stats.lua index d8c016f..fd95b6c 100644 --- a/spec/test_stats.lua +++ b/spec/test_stats.lua @@ -43,7 +43,7 @@ end function TestStats:test_module_entry_points() local luasf_root = require("LuaSF") local luastat_root = require("LuaStat") - local luasf_src = require("src.luasf") + local luasf_src = require("luasf") luaunit.assertEquals(type(luasf_root), "table") luaunit.assertEquals(type(luastat_root), "table") diff --git a/src/luasf/validation.lua b/src/luasf/validation.lua index e2af90e..f807688 100644 --- a/src/luasf/validation.lua +++ b/src/luasf/validation.lua @@ -67,6 +67,7 @@ end function M.assert_positive_integer(value, name) M.assert_integer(value, name) assert(value >= 1, name .. " must be greater than or equal to 1") + assert(value == floor(value), name .. " must be an integer") end -return M \ No newline at end of file +return M