diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 005dcb8..2312782 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: rev: v0.8.8 hooks: - id: licenseheaders - args: ["-t", ".copyright.tmpl", "-cy", "-f", "-d", "statapp"] + args: ["-t", ".copyright.tmpl", "-cy", "-f", "-d", "statapp", "-x", "statapp/_vendor/*.py"] pass_filenames: false - repo: local hooks: diff --git a/poetry.lock b/poetry.lock index 1c709bf..1893a7a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -210,6 +210,23 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + [[package]] name = "nodeenv" version = "1.8.0" @@ -605,6 +622,20 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, +] + +[package.dependencies] +mpmath = ">=0.19" + [[package]] name = "tomli" version = "2.0.1" @@ -771,4 +802,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.9" -content-hash = "ff683c2a3f778cd6ad946d6aa4b1f567514f36026e07fccffdb2cd7e86778e0a" +content-hash = "21505ce00b560ff75b732d5cb5ee983d1aff7b6e5dc919a306ac2147ea14f3ea" diff --git a/pyproject.toml b/pyproject.toml index 5a89242..7a9c9a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ pandas = { version = "^2", markers = "python_version < '3.9'" } pylint = { version = "^2", markers = "python_version < '3.9'" } # scipy = { version = "^1", markers = "python_version < '3.9'" } # openpyxl = "^3.1.2" +sympy = "^1.12" [build-system] diff --git a/statapp/__main__.py b/statapp/__main__.py index 5e760de..2abf198 100644 --- a/statapp/__main__.py +++ b/statapp/__main__.py @@ -19,9 +19,12 @@ # import sys +import numpy as np from PySide2 import QtCore from PySide2.QtWidgets import QApplication +from statapp import calculations +from statapp.calculations import generateXValues, generateYValues from statapp.main_window import MainWindow @@ -40,4 +43,18 @@ def main(): return app.exec_() if __name__ == "__main__": + # Для быстрой отладки + N = 10 + y = generateYValues(100, 5, N) + x1 = generateXValues(20, 2, 0, y) + x2 = generateXValues(10, 1, 0, y) + + data = np.concatenate([y, x1, x2], axis=1) + + out = calculations.squaredPolynom(data) + + coef = [] + + print() + sys.exit(main()) diff --git a/statapp/_vendor/multipolyfit.py b/statapp/_vendor/multipolyfit.py new file mode 100644 index 0000000..7302051 --- /dev/null +++ b/statapp/_vendor/multipolyfit.py @@ -0,0 +1,116 @@ +# Copyright (c) 2023 Matthew Rocklin +# All rights reserved. + +# This source code is distributed under the terms of the BSD license, +# which allows you to use, modify, and distribute it +# as long as you comply with the license terms. + +# In addition, this code has been modified by Maxim Slipenko and +# is now also licensed under the GPL-3.0. +# See the GPL-3.0 license for details. + +# TODO: remove +# pylint: skip-file + +from numpy import linalg, zeros, ones, hstack, asarray +import itertools + +def basis_vector(n, i): + """ Return an array like [0, 0, ..., 1, ..., 0, 0] + + >>> from multipolyfit.core import basis_vector + >>> basis_vector(3, 1) + array([0, 1, 0]) + >>> basis_vector(5, 4) + array([0, 0, 0, 0, 1]) + """ + x = zeros(n, dtype=int) + x[i] = 1 + return x + +def as_tall(x): + """ Turns a row vector into a column vector """ + return x.reshape(x.shape + (1,)) + +def multipolyfit(xs, y, deg, full=False, model_out=False, powers_out=False): + """ + Least squares multivariate polynomial fit + + Fit a polynomial like ``y = a**2 + 3a - 2ab + 4b**2 - 1`` + with many covariates a, b, c, ... + + Parameters + ---------- + + xs : array_like, shape (M, k) + x-coordinates of the k covariates over the M sample points + y : array_like, shape(M,) + y-coordinates of the sample points. + deg : int + Degree o fthe fitting polynomial + model_out : bool (defaults to True) + If True return a callable function + If False return an array of coefficients + powers_out : bool (defaults to False) + Returns the meaning of each of the coefficients in the form of an + iterator that gives the powers over the inputs and 1 + For example if xs corresponds to the covariates a,b,c then the array + [1, 2, 1, 0] corresponds to 1**1 * a**2 * b**1 * c**0 + + See Also + -------- + numpy.polyfit + + """ + y = asarray(y).squeeze() + rows = y.shape[0] + xs = asarray(xs) + num_covariates = xs.shape[1] + xs = hstack((ones((xs.shape[0], 1), dtype=xs.dtype) , xs)) + + generators = [basis_vector(num_covariates+1, i) + for i in range(num_covariates+1)] + + # All combinations of degrees + powers = [sum(x) for x in itertools.combinations_with_replacement(generators, deg)] + + # Raise data to specified degree pattern, stack in order + A = hstack(asarray([as_tall((xs**p).prod(1)) for p in powers])) + + beta = linalg.lstsq(A, y, rcond=None)[0] + + if model_out: + return mk_model(beta, powers) + + if powers_out: + return beta, powers + return beta + +def mk_model(beta, powers): + """ Create a callable python function out of beta/powers from multipolyfit + + This function is callable from within multipolyfit using the model_out flag + """ + # Create a function that takes in many x values + # and returns an approximate y value + def model(*args): + num_covariates = len(powers[0]) - 1 + if len(args)!=(num_covariates): + raise ValueError("Expected %d inputs"%num_covariates) + xs = asarray((1,) + args) + return sum([coeff * (xs**p).prod() + for p, coeff in zip(powers, beta)]) + return model + +def mk_sympy_function(beta, powers): + from sympy import symbols, Add, Mul, S + terms = get_terms(powers) + return Add(*[coeff * term for term, coeff in zip(terms, beta)]) + +def get_terms(powers): + from sympy import symbols, Add, Mul, S + num_covariates = len(powers[0]) - 1 + xs = (S.One,) + symbols('x0:%d' % num_covariates) + + terms = [Mul(*[x ** deg for x, deg in zip(xs, power)]) for power in powers] + return terms diff --git a/statapp/calculations.py b/statapp/calculations.py index 25381ea..8242f4d 100644 --- a/statapp/calculations.py +++ b/statapp/calculations.py @@ -21,6 +21,7 @@ from dataclasses import dataclass import numpy as np import pandas as pd +from statapp._vendor.multipolyfit import multipolyfit, mk_sympy_function DIRECT_LINK = 0 INDIRECT_LINK = 1 @@ -92,3 +93,14 @@ def linearPolynom(inputData) -> LinearPolynomResult: out.to_numpy(), np.float64(mse[0]) ) + +def squaredPolynom(inputData) -> LinearPolynomResult: + x = inputData[:, 1:] + y = inputData[:, 0] + data = pd.DataFrame(x) + betas, powers = multipolyfit(x, y, 2, powers_out=True) + res = mk_sympy_function(betas, powers) + print(data) + print(res) + + return powers