From e4c44014e83a5c75fcf30455eb61375bf65503d9 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:23:02 -0500 Subject: [PATCH 01/75] Touch init.py --- paddleboat/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 paddleboat/__init__.py diff --git a/paddleboat/__init__.py b/paddleboat/__init__.py new file mode 100644 index 0000000..e69de29 From dde17609f90346ba0a989ecf488ce11cb720f482 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:24:15 -0500 Subject: [PATCH 02/75] Add lisence --- paddleboat/LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 paddleboat/LICENSE diff --git a/paddleboat/LICENSE b/paddleboat/LICENSE new file mode 100644 index 0000000..97c8925 --- /dev/null +++ b/paddleboat/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [2019] [NYUPredocs] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 16bc4f56e56f6382814968fa0487a2b12de67544 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:26:25 -0500 Subject: [PATCH 03/75] Add readme --- paddleboat/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 paddleboat/README.md diff --git a/paddleboat/README.md b/paddleboat/README.md new file mode 100644 index 0000000..e69de29 From 423468b2edade05c6e67d752ea294bfef52ea61c Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:30:37 -0500 Subject: [PATCH 04/75] Readme title/subtitle --- paddleboat/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddleboat/README.md b/paddleboat/README.md index e69de29..9f52d0c 100644 --- a/paddleboat/README.md +++ b/paddleboat/README.md @@ -0,0 +1,3 @@ +# paddleboat + +paddling together since 2019. From 9571401ee3b507fc1fcb9c54f21351d7ba90c3d0 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:32:49 -0500 Subject: [PATCH 05/75] Team members and function add --- paddleboat/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/paddleboat/README.md b/paddleboat/README.md index 9f52d0c..a82e07f 100644 --- a/paddleboat/README.md +++ b/paddleboat/README.md @@ -1,3 +1,11 @@ # paddleboat paddling together since 2019. + +## Function + +Implements OLS. + +## Team members + +Nadav, Harriet, Casey and Joel. From 2039b463987b8d867cf925a935191c6d1f704455 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:35:09 -0500 Subject: [PATCH 06/75] Move to project 2 folder --- .../project_2_packages/paddleboat/LICENSE | 21 +++++++++++++++++++ .../project_2_packages/paddleboat/README.md | 11 ++++++++++ .../project_2_packages/paddleboat/__init__.py | 0 3 files changed, 32 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/LICENSE create mode 100644 Projects/project_2_packages/paddleboat/README.md create mode 100644 Projects/project_2_packages/paddleboat/__init__.py diff --git a/Projects/project_2_packages/paddleboat/LICENSE b/Projects/project_2_packages/paddleboat/LICENSE new file mode 100644 index 0000000..97c8925 --- /dev/null +++ b/Projects/project_2_packages/paddleboat/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) [2019] [NYUPredocs] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Projects/project_2_packages/paddleboat/README.md b/Projects/project_2_packages/paddleboat/README.md new file mode 100644 index 0000000..a82e07f --- /dev/null +++ b/Projects/project_2_packages/paddleboat/README.md @@ -0,0 +1,11 @@ +# paddleboat + +paddling together since 2019. + +## Function + +Implements OLS. + +## Team members + +Nadav, Harriet, Casey and Joel. diff --git a/Projects/project_2_packages/paddleboat/__init__.py b/Projects/project_2_packages/paddleboat/__init__.py new file mode 100644 index 0000000..e69de29 From d2ddc754df2a86fc980ed70bd1ca90ed7f92b65b Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:37:34 -0500 Subject: [PATCH 07/75] Remove old paddleboat folder --- paddleboat/LICENSE | 21 --------------------- paddleboat/README.md | 11 ----------- paddleboat/__init__.py | 0 3 files changed, 32 deletions(-) delete mode 100644 paddleboat/LICENSE delete mode 100644 paddleboat/README.md delete mode 100644 paddleboat/__init__.py diff --git a/paddleboat/LICENSE b/paddleboat/LICENSE deleted file mode 100644 index 97c8925..0000000 --- a/paddleboat/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) [2019] [NYUPredocs] - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/paddleboat/README.md b/paddleboat/README.md deleted file mode 100644 index a82e07f..0000000 --- a/paddleboat/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# paddleboat - -paddling together since 2019. - -## Function - -Implements OLS. - -## Team members - -Nadav, Harriet, Casey and Joel. diff --git a/paddleboat/__init__.py b/paddleboat/__init__.py deleted file mode 100644 index e69de29..0000000 From 9bbc68e11fd814fdc991daab80a5c8ce41db1c2a Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:41:57 -0500 Subject: [PATCH 08/75] Add OLS.py --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Projects/project_2_packages/paddleboat/OLS/OLS.py diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py new file mode 100644 index 0000000..e69de29 From 276e45a4ca0c8ed57925291e0e8c187011bc616b Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:51:05 -0500 Subject: [PATCH 09/75] Name functions --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index e69de29..04fe513 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -0,0 +1,11 @@ +import pandas as pd +import numpy as np + +def get_betas(): + print("Working!") + +def get_ses(): + print("Working!") + +def main(): + print("Working!") From ecd2e7bff9350e804312896ad77125b223713f7d Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 14:59:36 -0500 Subject: [PATCH 10/75] get_betas pseudocode --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 04fe513..113a9b6 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -1,7 +1,9 @@ import pandas as pd import numpy as np -def get_betas(): +def get_betas(X, Y): + betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) + return betas print("Working!") def get_ses(): From 2b1b005a9302e65ef96477d170490a264f0b5ae0 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:01:47 -0500 Subject: [PATCH 11/75] Get residuals function --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 113a9b6..e766369 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -6,6 +6,9 @@ def get_betas(X, Y): return betas print("Working!") +def get_residuals(betas, X): + print("Working!") + def get_ses(): print("Working!") From d989cef2b8da9321c31eb10ddabdba886a179c5b Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:02:27 -0500 Subject: [PATCH 12/75] Residuals pseudo-code --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index e766369..c295d8b 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -7,6 +7,8 @@ def get_betas(X, Y): print("Working!") def get_residuals(betas, X): + residuals = betas * X + return residuals print("Working!") def get_ses(): From f352cd5f41f0067ce3c8d6e1a7e16544b548da58 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:04:17 -0500 Subject: [PATCH 13/75] Get SEs pseudocode --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index c295d8b..e67002d 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -12,6 +12,9 @@ def get_residuals(betas, X): print("Working!") def get_ses(): + residuals2 = residuals^2 + XX = (transpose(X) * X)^(-1) + ses = (residuals2 / (N-1)) * XX print("Working!") def main(): From 349564a7bc03b40f535b73a56583f3a0db7181a5 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:07:18 -0500 Subject: [PATCH 14/75] get_n pseudocode --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index e67002d..5fdfd6e 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -11,6 +11,15 @@ def get_residuals(betas, X): return residuals print("Working!") +def get_n(X, Y): + n_X = length(X) + n_Y = length(Y) + if n_X == n_Y: + n = n_X + else: + print("Error!") + return n + def get_ses(): residuals2 = residuals^2 XX = (transpose(X) * X)^(-1) From 16e9cfebdc9109b11485d42ae0cea994acacefd1 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:10:11 -0500 Subject: [PATCH 15/75] Reorder working messages --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 5fdfd6e..6bc103f 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -3,13 +3,13 @@ def get_betas(X, Y): betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) - return betas print("Working!") + return betas def get_residuals(betas, X): residuals = betas * X - return residuals print("Working!") + return residuals def get_n(X, Y): n_X = length(X) @@ -18,6 +18,7 @@ def get_n(X, Y): n = n_X else: print("Error!") + print("Working!") return n def get_ses(): @@ -25,6 +26,7 @@ def get_ses(): XX = (transpose(X) * X)^(-1) ses = (residuals2 / (N-1)) * XX print("Working!") + return ses def main(): print("Working!") From 6b54a09e5502e18b565e687e29d0858108115e32 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:10:54 -0500 Subject: [PATCH 16/75] Space functions better --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 6bc103f..c2bfb0c 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -3,29 +3,38 @@ def get_betas(X, Y): betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) + print("Working!") + return betas def get_residuals(betas, X): residuals = betas * X + print("Working!") + return residuals def get_n(X, Y): n_X = length(X) n_Y = length(Y) + if n_X == n_Y: n = n_X else: print("Error!") + print("Working!") + return n def get_ses(): residuals2 = residuals^2 XX = (transpose(X) * X)^(-1) ses = (residuals2 / (N-1)) * XX + print("Working!") + return ses def main(): From c248d109259594ddcf4c9cbe72dbe75c7f52e509 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Fri, 6 Dec 2019 15:15:08 -0500 Subject: [PATCH 17/75] merging for README and setup.py --- .../project_2_packages/paddleboat/README.md | 2 +- .../project_2_packages/paddleboat/setup.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 Projects/project_2_packages/paddleboat/setup.py diff --git a/Projects/project_2_packages/paddleboat/README.md b/Projects/project_2_packages/paddleboat/README.md index a82e07f..2823ef8 100644 --- a/Projects/project_2_packages/paddleboat/README.md +++ b/Projects/project_2_packages/paddleboat/README.md @@ -8,4 +8,4 @@ Implements OLS. ## Team members -Nadav, Harriet, Casey and Joel. +Nadav Tadelis, Harriet Jeon, Casey McQuillan and Joel Becker. diff --git a/Projects/project_2_packages/paddleboat/setup.py b/Projects/project_2_packages/paddleboat/setup.py new file mode 100644 index 0000000..8af6353 --- /dev/null +++ b/Projects/project_2_packages/paddleboat/setup.py @@ -0,0 +1,22 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="paddleboat2sls", + version="0.0.1", + author="Harriet Jeon, Nadav Tadelis, Casey McQuillan, Joel Becker", + author_email="joelhbkr@gmail.com", + description="Practice publishing an econometric python package", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/nyupredocs/modularizationandtesting", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.6', +) \ No newline at end of file From cba6c0db5bf7ca06030db17bc486f90a8b302d5e Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:15:44 -0500 Subject: [PATCH 18/75] Some docstrings and change to get_residuals --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index c2bfb0c..998c5ba 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -2,17 +2,18 @@ import numpy as np def get_betas(X, Y): + """Get betas (according to OLS formula)""" betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) - - print("Working!") + print("Working!") return betas -def get_residuals(betas, X): - residuals = betas * X +def get_residuals(betas, X, Y): + """Get residuals (according to OLS formula)""" + y_hat = betas * X + residuals = Y - y_hat print("Working!") - return residuals def get_n(X, Y): @@ -25,7 +26,6 @@ def get_n(X, Y): print("Error!") print("Working!") - return n def get_ses(): @@ -34,7 +34,6 @@ def get_ses(): ses = (residuals2 / (N-1)) * XX print("Working!") - return ses def main(): From 80196c73965ac115c34812adfc982369d7a15672 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:16:54 -0500 Subject: [PATCH 19/75] Remaining SEs --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 998c5ba..4346e8d 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -17,6 +17,7 @@ def get_residuals(betas, X, Y): return residuals def get_n(X, Y): + """Get N, check independent vs dependent variables""" n_X = length(X) n_Y = length(Y) @@ -29,6 +30,7 @@ def get_n(X, Y): return n def get_ses(): + """Get SEs (according to OLS formula)""" residuals2 = residuals^2 XX = (transpose(X) * X)^(-1) ses = (residuals2 / (N-1)) * XX From 3d50c95760102705dfef8db6adc4ef76b55f5e25 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:21:14 -0500 Subject: [PATCH 20/75] Docstring main, comment fake transpose --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 4346e8d..9984484 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -3,7 +3,7 @@ def get_betas(X, Y): """Get betas (according to OLS formula)""" - betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) + betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) # transpose is not a real function print("Working!") return betas @@ -32,11 +32,12 @@ def get_n(X, Y): def get_ses(): """Get SEs (according to OLS formula)""" residuals2 = residuals^2 - XX = (transpose(X) * X)^(-1) + XX = (transpose(X) * X)^(-1) # transpose is not a real function ses = (residuals2 / (N-1)) * XX print("Working!") return ses def main(): + """Performs OLS, prints output to table""" print("Working!") From d39121fc4ece863f20f4c6a721bb55706103eb56 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:22:59 -0500 Subject: [PATCH 21/75] Parameterise get_ses --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 9984484..496fa5b 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -29,15 +29,17 @@ def get_n(X, Y): print("Working!") return n -def get_ses(): +def get_ses(residuals, X, Y): """Get SEs (according to OLS formula)""" residuals2 = residuals^2 XX = (transpose(X) * X)^(-1) # transpose is not a real function + N = get_n(X, Y) ses = (residuals2 / (N-1)) * XX print("Working!") return ses + def main(): """Performs OLS, prints output to table""" print("Working!") From 08c785d9c1d6ba3b1f63323986e3d3114791d6e6 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Fri, 6 Dec 2019 15:23:50 -0500 Subject: [PATCH 22/75] Function to return degrees of freedom when given np.array. --- .../project_2_packages/paddleboat/OLS/OLS_casey.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/OLS/OLS_casey.py diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS_casey.py b/Projects/project_2_packages/paddleboat/OLS/OLS_casey.py new file mode 100644 index 0000000..46da91d --- /dev/null +++ b/Projects/project_2_packages/paddleboat/OLS/OLS_casey.py @@ -0,0 +1,13 @@ +import numpy as np +import pandas as pd + + +def deg_freedom(dependent_variable_data) + if isinstance(dependent_variable_data, np.array): + dimensions = np.shape(dependent_variable_data) + deg_freedom = dimension[0] - dimensions[1] + + return deg_freedom + + + From b5501b92cff2b5d2b3be89c0d1b1fffd183acfb2 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:26:08 -0500 Subject: [PATCH 23/75] r^2 function --- Projects/project_2_packages/paddleboat/OLS/OLS.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 496fa5b..2b8b831 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -39,6 +39,18 @@ def get_ses(residuals, X, Y): print("Working!") return ses +def get_r2(Y, X, betas): + """Get R^2""" + y_hat = X * betas + y_bar = mean(y) + + SSR = sum((y_hat - y_bar)^2) + SST = sum((y - y_bar)^2) + + r2 = SSR / SST + + print("Working!") + return r2 def main(): """Performs OLS, prints output to table""" From 25af723f7e02af049bbc2567cf82f4371d57e331 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Fri, 6 Dec 2019 15:43:56 -0500 Subject: [PATCH 24/75] creating all the ols functions --- .../project_2_packages/paddleboat/OLS/OLS.py | 158 +++++++++++++----- 1 file changed, 117 insertions(+), 41 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 2b8b831..466594e 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -1,57 +1,133 @@ -import pandas as pd import numpy as np +import pandas as pd +import math +import scipy +np.set_printoptions(suppress=True) -def get_betas(X, Y): - """Get betas (according to OLS formula)""" - betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) # transpose is not a real function +# Function computing a least squares fit +def beta_ols(Y, X): + ''' + Estimate OLS coefficients - print("Working!") - return betas + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix -def get_residuals(betas, X, Y): - """Get residuals (according to OLS formula)""" - y_hat = betas * X - residuals = Y - y_hat + Returns + ------- + beta_hat : vector of coefficients + ''' - print("Working!") - return residuals + if len(Y.shape) != 1: + return print('ERROR: Y must be an Nx1 matrix') + elif Y.shape[0] != X.shape[0]: + return print('ERROR: Y and X must have the same number of observations') + else: + left = X.transpose().dot(X).values + right = X.transpose().dot(Y).values + beta_hat = np.linalg.inv(left).dot(right) + return beta_hat -def get_n(X, Y): - """Get N, check independent vs dependent variables""" - n_X = length(X) - n_Y = length(Y) - if n_X == n_Y: - n = n_X - else: - print("Error!") +def resids(Y, X): + ''' + Estimate OLS residuals + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + e : vector of residuals + ''' + e = beta_ols(Y,X).dot(X.values.T)-Y + return e + + +def Sigma(Y,X): + ''' + Estimate OLS variance-covariance matrix + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix - print("Working!") - return n + Returns + ------- + e : var-cov matrix as pandas df + ''' + e = resids(Y,X) + std_hat = e.dot(e.T)/(X.shape[0]-X.shape[1]) + Sigma = std_hat*np.linalg.inv(X.transpose().dot(X).values) + return pd.DataFrame(Sigma) -def get_ses(residuals, X, Y): - """Get SEs (according to OLS formula)""" - residuals2 = residuals^2 - XX = (transpose(X) * X)^(-1) # transpose is not a real function - N = get_n(X, Y) - ses = (residuals2 / (N-1)) * XX +def variance_ols(Y,X): + ''' + Estimate OLS variance-covariance matrix - print("Working!") - return ses + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix -def get_r2(Y, X, betas): - """Get R^2""" - y_hat = X * betas - y_bar = mean(y) + Returns + ------- + var : variance of coefficients + ''' + diags = np.diagonal(Sigma(Y, X)) + var = np.sqrt(diags) + return var + + +def r2_ols(Y, X): + ''' + Estimate R^2 for OLS + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + R2 : value of R^2 + ''' + + y_hat = beta_ols(Y,X).dot(X.values.T) + y_bar = np.mean(y) - SSR = sum((y_hat - y_bar)^2) - SST = sum((y - y_bar)^2) + SSR = np.sum((y_hat - y_bar)**2) + SST = np.sum((y - y_bar)**2) r2 = SSR / SST - - print("Working!") return r2 -def main(): - """Performs OLS, prints output to table""" - print("Working!") +def least_sq(Y, X): + ''' + Output nicely OLS results + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + R2 : value of R^2 + ''' + + print('Coefficients = ', beta_ols(Y, X)) + print('Coeff. SErrs = ', np.sqrt(np.diagonal(Sigma(Y,X)))) + print('') + print('95% Confidence Interval for Coefficients') + print(' Lower Bound:', beta_ols(Y, X) - 1.96*np.sqrt(np.diagonal(Sigma(Y,X)))) + print(' Upper Bound:', beta_ols(Y, X) + 1.96*np.sqrt(np.diagonal(Sigma(Y,X)))) + print('') + print('R-Squared:', r2_ols(Y,X)) + print('') + print("Variance-Covariance Matrix:") + return Sigma(Y,X) From 0c1fe8cd677f98d1c1ee476020baef5baa10df8c Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 15:54:34 -0500 Subject: [PATCH 25/75] Add LASSO.py --- .../paddleboat/LASSO/LASSO.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/LASSO/LASSO.py diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py new file mode 100644 index 0000000..51b58a7 --- /dev/null +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -0,0 +1,57 @@ +import pandas as pd +import numpy as np + +def get_betas(X, Y): + """Get betas (according to OLS formula)""" + betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) # transpose is not a real function + + print("Working!") + return betas + +def get_residuals(betas, X, Y): + """Get residuals (according to OLS formula)""" + y_hat = betas * X + residuals = Y - y_hat + + print("Working!") + return residuals + +def get_n(X, Y): + """Get N, check independent vs dependent variables""" + n_X = length(X) + n_Y = length(Y) + + if n_X == n_Y: + n = n_X + else: + print("Error!") + + print("Working!") + return n + +def get_ses(residuals, X, Y): + """Get SEs (according to OLS formula)""" + residuals2 = residuals^2 + XX = (transpose(X) * X)^(-1) # transpose is not a real function + N = get_n(X, Y) + ses = (residuals2 / (N-1)) * XX + + print("Working!") + return ses + +def get_r2(Y, X, betas): + """Get R^2""" + y_hat = X * betas + y_bar = mean(y) + + SSR = sum((y_hat - y_bar)^2) + SST = sum((y - y_bar)^2) + + r2 = SSR / SST + + print("Working!") + return r2 + +def main(): + """Performs OLS, prints output to table""" + print("Working!") From 543e35a088cb02cbd0613515ccc2ea0c43c33d47 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:18:20 -0500 Subject: [PATCH 26/75] Loss function --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 51b58a7..0ba684f 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -52,6 +52,14 @@ def get_r2(Y, X, betas): print("Working!") return r2 +def get_loss_function(SSE, lambda, betas): + """Get loss function""" + betas_without_intercept = betas[1:length(betas)] + loss_function = SSE + lambda * betas_without_intercept + + print("Working!") + return r2 + def main(): """Performs OLS, prints output to table""" print("Working!") From a78f739c6d3139066ff2b4d48355bae5c4da5403 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:19:29 -0500 Subject: [PATCH 27/75] Rename loss function return --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 0ba684f..330e2cb 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -58,7 +58,7 @@ def get_loss_function(SSE, lambda, betas): loss_function = SSE + lambda * betas_without_intercept print("Working!") - return r2 + return loss_function def main(): """Performs OLS, prints output to table""" From 2c9c1e60fe9c35e97ba650f5c626bfbdae5f2acc Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:20:52 -0500 Subject: [PATCH 28/75] Get SSE function --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 330e2cb..80416b2 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -52,6 +52,14 @@ def get_r2(Y, X, betas): print("Working!") return r2 +def get_sse(Y, X, betas): + """Get sum of squared errors""" + y_hat = X * beta + sse = (Y - y_hat) ** 2 + + print("Working!") + return sse + def get_loss_function(SSE, lambda, betas): """Get loss function""" betas_without_intercept = betas[1:length(betas)] From 206303f92620a8e518eba267056192d8ecfddeb7 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:23:56 -0500 Subject: [PATCH 29/75] Correct loss function --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 80416b2..9166c0b 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -63,7 +63,7 @@ def get_sse(Y, X, betas): def get_loss_function(SSE, lambda, betas): """Get loss function""" betas_without_intercept = betas[1:length(betas)] - loss_function = SSE + lambda * betas_without_intercept + loss_function = SSE + lambda * sum(abs(betas_without_intercept)) print("Working!") return loss_function From 87945fb5b7c487ef1352ec7a4b24736169a3a193 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Fri, 6 Dec 2019 16:32:04 -0500 Subject: [PATCH 30/75] Fixed transpose function --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 9166c0b..7c8f087 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -3,7 +3,7 @@ def get_betas(X, Y): """Get betas (according to OLS formula)""" - betas = (transpose(X) * X)^(-1) * (transpose(X) * Y) # transpose is not a real function + betas = (np.transpose(X) * X)^(-1) * (np.transpose(X) * Y) # transpose is a numpy function print("Working!") return betas @@ -32,7 +32,7 @@ def get_n(X, Y): def get_ses(residuals, X, Y): """Get SEs (according to OLS formula)""" residuals2 = residuals^2 - XX = (transpose(X) * X)^(-1) # transpose is not a real function + XX = (np.transpose(X) * X)^(-1) # transpose is not a real function N = get_n(X, Y) ses = (residuals2 / (N-1)) * XX From 4ac9bc9cfcfb84ab2774485e0d90b5be0a85ef6b Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:35:54 -0500 Subject: [PATCH 31/75] Barebones grid search function --- .../project_2_packages/paddleboat/LASSO/LASSO.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 9166c0b..f81ed2b 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -68,6 +68,22 @@ def get_loss_function(SSE, lambda, betas): print("Working!") return loss_function +def get_coefficients_given_lambda(lambda): + return(coefficients) + +def pick_lowest_lamda(): + lambdas = [1,10] + losses = list(length(lambda)) + for lambda in lambdas: + loss = loss_function(lambda) + list.append(loss) + min_loss = min(losses) + lowest_lambda = loss(min_loss_position_in_list) + return(lowest_lambda) + + + loss_values = loss_function(lambda) + def main(): """Performs OLS, prints output to table""" print("Working!") From 4d35d4c15423eba9b1fe6072ee5b55b2f1fedd9e Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:37:57 -0500 Subject: [PATCH 32/75] Improve presentation of grid search func --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index f81ed2b..b2f6869 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -72,13 +72,18 @@ def get_coefficients_given_lambda(lambda): return(coefficients) def pick_lowest_lamda(): + """Pick lowest lambda""" lambdas = [1,10] losses = list(length(lambda)) + for lambda in lambdas: loss = loss_function(lambda) list.append(loss) + min_loss = min(losses) lowest_lambda = loss(min_loss_position_in_list) + + print("Working!") return(lowest_lambda) From e40b0ac421bca1ea2133f76e307f4f0aff85df48 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:39:01 -0500 Subject: [PATCH 33/75] Remove stray line from grid func --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 75b3832..fbcd792 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -86,9 +86,6 @@ def pick_lowest_lamda(): print("Working!") return(lowest_lambda) - - loss_values = loss_function(lambda) - def main(): """Performs OLS, prints output to table""" print("Working!") From 45d7022bf956ff9d79f40e543bada4a1ec35f1fd Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:44:37 -0500 Subject: [PATCH 34/75] Correctly format docstring quotes --- .../paddleboat/LASSO/LASSO.py | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index fbcd792..be9c045 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -2,14 +2,18 @@ import numpy as np def get_betas(X, Y): - """Get betas (according to OLS formula)""" + ''' + Get betas (according to OLS formula) + ''' betas = (np.transpose(X) * X)^(-1) * (np.transpose(X) * Y) # transpose is a numpy function print("Working!") return betas def get_residuals(betas, X, Y): - """Get residuals (according to OLS formula)""" + ''' + Get residuals (according to OLS formula) + ''' y_hat = betas * X residuals = Y - y_hat @@ -17,7 +21,9 @@ def get_residuals(betas, X, Y): return residuals def get_n(X, Y): - """Get N, check independent vs dependent variables""" + ''' + Get N, check independent vs dependent variables + ''' n_X = length(X) n_Y = length(Y) @@ -30,7 +36,9 @@ def get_n(X, Y): return n def get_ses(residuals, X, Y): - """Get SEs (according to OLS formula)""" + ''' + Get SEs (according to OLS formula) + ''' residuals2 = residuals^2 XX = (np.transpose(X) * X)^(-1) # transpose is not a real function N = get_n(X, Y) @@ -40,7 +48,9 @@ def get_ses(residuals, X, Y): return ses def get_r2(Y, X, betas): - """Get R^2""" + ''' + Get R^2 + ''' y_hat = X * betas y_bar = mean(y) @@ -53,7 +63,8 @@ def get_r2(Y, X, betas): return r2 def get_sse(Y, X, betas): - """Get sum of squared errors""" + ''' + Get sum of squared errors''' y_hat = X * beta sse = (Y - y_hat) ** 2 @@ -61,7 +72,9 @@ def get_sse(Y, X, betas): return sse def get_loss_function(SSE, lambda, betas): - """Get loss function""" + ''' + Get loss function + ''' betas_without_intercept = betas[1:length(betas)] loss_function = SSE + lambda * sum(abs(betas_without_intercept)) @@ -69,10 +82,15 @@ def get_loss_function(SSE, lambda, betas): return loss_function def get_coefficients_given_lambda(lambda): + ''' + Get coefficients + ''' return(coefficients) def pick_lowest_lamda(): - """Pick lowest lambda""" + ''' + Pick lowest lambda + ''' lambdas = [1,10] losses = list(length(lambda)) @@ -87,5 +105,7 @@ def pick_lowest_lamda(): return(lowest_lambda) def main(): - """Performs OLS, prints output to table""" + ''' + Performs LASSO, prints output to table + ''' print("Working!") From e838c523126e26f0fecd0e8f1fe7fa6a9c00b8e7 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Fri, 6 Dec 2019 16:47:47 -0500 Subject: [PATCH 35/75] Lambda -> Lamb --- .../paddleboat/LASSO/LASSO.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 062e63d..e98f764 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -54,35 +54,34 @@ def get_r2(Y, X, betas): def get_sse(Y, X, betas): """Get sum of squared errors""" - y_hat = X * beta + y_hat = X * betas sse = (Y - y_hat) ** 2 print("Working!") return sse -def get_loss_function(SSE, lambda, betas): +def get_loss_function(SSE, lamb, betas): """Get loss function""" betas_without_intercept = betas[1:length(betas)] - loss_function = SSE + lambda * sum(abs(betas_without_intercept)) + loss_function = SSE + lamb * sum(abs(betas_without_intercept)) print("Working!") return loss_function -def get_coefficients_given_lambda(lambda): +def get_coefficients_given_lamb(lamb): return(coefficients) def pick_lowest_lamda(): - lambdas = [1,10] - losses = list(length(lambda)) - for lambda in lambdas: - loss = loss_function(lambda) + lambs = [1,10] + losses = list(length(lamb)) + for lamb in lambs: + loss = loss_function(lamb) list.append(loss) min_loss = min(losses) - lowest_lambda = loss(min_loss_position_in_list) - return(lowest_lambda) - + lowest_lamb = loss(min_loss_position_in_list) + return(lowest_lamb) - loss_values = loss_function(lambda) + loss_values = loss_function(lamb) def main(): """Performs OLS, prints output to table""" From 5404c2d0822d3ff8e6fb1c9ea9cbdeefa55d6e4f Mon Sep 17 00:00:00 2001 From: Harriet Jeon Date: Fri, 6 Dec 2019 16:56:54 -0500 Subject: [PATCH 36/75] made changes to lambda --- Projects/project_2_packages/paddleboat/LASSO/LASSO.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 4b63e69..4dbac93 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -91,10 +91,15 @@ def pick_lowest_lamda(): ''' Pick lowest lambda ''' - lambs = [1,10] + lambs = [0.001, 0.01, 0.1, 0.5, 1, 2, 10] + l_num = length(lam) + pred_num = X.shape[1] losses = list(length(lamb)) - for lamb in lambs: + # prepare data for enumerate + coeff_a = np.zeros((l_num, pred_num)) + + for ind, i in enumerate(lambs): loss = loss_function(lamb) list.append(loss) From 2d69dc4be711118390b08e382f0ac89590ad676d Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Fri, 6 Dec 2019 16:57:08 -0500 Subject: [PATCH 37/75] changing psuedo code to python for lasso --- .../paddleboat/LASSO/LASSO.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index fbcd792..2049513 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -60,31 +60,36 @@ def get_sse(Y, X, betas): print("Working!") return sse -def get_loss_function(SSE, lambda, betas): +def get_loss_function(SSE, lamb, betas): """Get loss function""" - betas_without_intercept = betas[1:length(betas)] - loss_function = SSE + lambda * sum(abs(betas_without_intercept)) + betas_no_intercept = betas[1:len(betas)] + loss_function = SSE + lamb * np.sum(np.abs(betas_no_intercept)) print("Working!") return loss_function -def get_coefficients_given_lambda(lambda): +def get_coeffs_given_lambda(X, Y, lamb): + Z = # STANDARDIZED X + Y_c = # CENTERED Y + coefficients = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])).dot(Z.transpose().dot(Y_c)) return(coefficients) -def pick_lowest_lamda(): +def pick_lowest_lambda(X, Y): """Pick lowest lambda""" - lambdas = [1,10] - losses = list(length(lambda)) + lambs = range(0, 1, 100) + losses = list() - for lambda in lambdas: - loss = loss_function(lambda) - list.append(loss) + for l in lambs: + coeffs = get_coeffs_given_lambda(X, Y, l) + SSE = get_sse(Y, X, coeffs) + loss = loss_function(SSE, l, coeffs) + losses.append(loss) min_loss = min(losses) lowest_lambda = loss(min_loss_position_in_list) print("Working!") - return(lowest_lambda) + return(lowest_lamb) def main(): """Performs OLS, prints output to table""" From 0e32891c2229efc5cbd470914c05693d29774a87 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Fri, 6 Dec 2019 16:59:55 -0500 Subject: [PATCH 38/75] Update functionality in readme --- Projects/project_2_packages/paddleboat/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/README.md b/Projects/project_2_packages/paddleboat/README.md index 2823ef8..20e3efb 100644 --- a/Projects/project_2_packages/paddleboat/README.md +++ b/Projects/project_2_packages/paddleboat/README.md @@ -4,7 +4,13 @@ paddling together since 2019. ## Function -Implements OLS. +### OLS + +`OLS.py` implements OLS. + +### LASSO + +`LASSO.py` implements LASSO. ## Team members From 9aa7b8fce066047a27ab51cc4ea73675fba9ced7 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 09:53:01 -0500 Subject: [PATCH 39/75] correcting some base functions, computing lasso coeffs --- .../paddleboat/LASSO/LASSO_clean.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py new file mode 100644 index 0000000..d1b36ae --- /dev/null +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py @@ -0,0 +1,102 @@ +import pandas as pd +import numpy as np +import OLS as ols +from sklearn import preprocessing + +def get_sse(Y, X, betas): + ''' + Get sum of square errors + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + betas : Vector of estimated coefficients + + Returns + ------- + sse : Sum of square errors + ''' + + e = betas.dot(X.values.T)-Y + sse = np.sum(e**2) + return sse + + +def get_loss_function(Y, X, lamb, betas): + """ + Compute loss function + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (no intercept column) + lamb : Lambda value to use for L2 + betas : Vector of estimated coefficients + + Returns + ------- + loss : Computed loss + """ + if np.sum(X[1]) == len(X[1]): + return print('ERROR: X should not have an intercept') + + loss = get_sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) + return loss + + +def beta_lasso(X, Y, lamb): + """ + Compute lasso coeffs + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (no intercept column) + lamb : Lambda value to use for L2 + + Returns + ------- + coefficients : Vector of Lasso coefficients + """ + Z = preprocessing.scale(X_train) + Y_c = Y - np.mean(Y) + + left = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])) + right = Z.transpose().dot(Y_c) + coefficients = left.dot(right) + return(coefficients) + +def pick_lowest_lambda(X, Y): + """ + Compute loss function + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (no intercept column) + lamb : Lambda value to use for L1 + betas : Vector of estimated coefficients + + Returns + ------- + loss : Computed loss + """ + lambs = range(0, 1, 100) + losses = list() + + for l in lambs: + coeffs = get_coeffs_given_lambda(X, Y, l) + SSE = get_sse(Y, X, coeffs) + loss = loss_function(SSE, l, coeffs) + losses.append(loss) + + min_loss = min(losses) + lowest_lambda = loss(min_loss_position_in_list) + + print("Working!") + return(lowest_lamb) + +def main(): + """Performs OLS, prints output to table""" + print("Working!") \ No newline at end of file From b43016e6cfd6a926db7659b46e9a5fb1dea5efc9 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 09:57:43 -0500 Subject: [PATCH 40/75] added note about inefficiency of inverse vs SVD --- .../project_2_packages/paddleboat/LASSO/LASSO_clean.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py index d1b36ae..df65082 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py @@ -58,6 +58,12 @@ def beta_lasso(X, Y, lamb): Returns ------- coefficients : Vector of Lasso coefficients + + Note + ---- + For simplicity we use matrix inverses, + which are not computationally efficient at O(p^3). + SVD would be a more efficient approach. """ Z = preprocessing.scale(X_train) Y_c = Y - np.mean(Y) @@ -67,6 +73,7 @@ def beta_lasso(X, Y, lamb): coefficients = left.dot(right) return(coefficients) + def pick_lowest_lambda(X, Y): """ Compute loss function From 1c90dcdf864bd5e7492dc8c8533edbbcb90c0262 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 10:08:38 -0500 Subject: [PATCH 41/75] Empty test scripts --- Projects/project_2_packages/paddleboat/test/test_LASSO.py | 0 Projects/project_2_packages/paddleboat/test/test_OLS.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 Projects/project_2_packages/paddleboat/test/test_LASSO.py create mode 100644 Projects/project_2_packages/paddleboat/test/test_OLS.py diff --git a/Projects/project_2_packages/paddleboat/test/test_LASSO.py b/Projects/project_2_packages/paddleboat/test/test_LASSO.py new file mode 100644 index 0000000..e69de29 diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py new file mode 100644 index 0000000..e69de29 From 49c15dd39539002995db6570a715962ae46a6d3f Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 10:12:26 -0500 Subject: [PATCH 42/75] OLS tests --- .../paddleboat/test/test_OLS.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index e69de29..7147f97 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -0,0 +1,31 @@ +import numpy as np +import transcripty as tpy +import unittest + + +class TestHPM(unittest.TestCase): + def setUp(self): + self.hpm = tpy.HeterogeneousProbabilityModel( + 0.35, 0.6778, 1.0556, 0.0, 6, 12, 3, 125 + ) + + def test_gpa_irrelevance(self): + self.assertEqual(self.hpm(0.0, 0.0), self.hpm(0.0, 1.0)) + + def test_p_larger_gammamin(self): + """Tests p(a)""" + self.assertGreaterEqual(self.hpm.gamma_min, self.hpm(-100.0, 0.0)) + + def test_p_smaller_one(self): + self.assertLessEqual(1.0, self.hpm(100.0, 0.0)) + + def test_p(self): + gamma_min, gamma_1 = self.hpm.gamma_min, self.hpm.gamma_1 + gamma_2 = self.hpm.gamma_2 + a = 2.5 + p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) + self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) + + +if __name__ == "__main__": + unittest.main() From e4daf5ee6b566eb54a930c0f15e17cffe512ba74 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 10:12:35 -0500 Subject: [PATCH 43/75] LASSO tests --- .../paddleboat/test/test_LASSO.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/test/test_LASSO.py b/Projects/project_2_packages/paddleboat/test/test_LASSO.py index e69de29..7147f97 100644 --- a/Projects/project_2_packages/paddleboat/test/test_LASSO.py +++ b/Projects/project_2_packages/paddleboat/test/test_LASSO.py @@ -0,0 +1,31 @@ +import numpy as np +import transcripty as tpy +import unittest + + +class TestHPM(unittest.TestCase): + def setUp(self): + self.hpm = tpy.HeterogeneousProbabilityModel( + 0.35, 0.6778, 1.0556, 0.0, 6, 12, 3, 125 + ) + + def test_gpa_irrelevance(self): + self.assertEqual(self.hpm(0.0, 0.0), self.hpm(0.0, 1.0)) + + def test_p_larger_gammamin(self): + """Tests p(a)""" + self.assertGreaterEqual(self.hpm.gamma_min, self.hpm(-100.0, 0.0)) + + def test_p_smaller_one(self): + self.assertLessEqual(1.0, self.hpm(100.0, 0.0)) + + def test_p(self): + gamma_min, gamma_1 = self.hpm.gamma_min, self.hpm.gamma_1 + gamma_2 = self.hpm.gamma_2 + a = 2.5 + p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) + self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) + + +if __name__ == "__main__": + unittest.main() From 7f2449c9eb787d5872448e58e0817d16824bc2ce Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 10:14:06 -0500 Subject: [PATCH 44/75] Test init --- Projects/project_2_packages/paddleboat/test/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Projects/project_2_packages/paddleboat/test/__init__.py diff --git a/Projects/project_2_packages/paddleboat/test/__init__.py b/Projects/project_2_packages/paddleboat/test/__init__.py new file mode 100644 index 0000000..e69de29 From bd6a127057bc2a2abdaaa8ee80b88756d0a6fd29 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 10:14:44 -0500 Subject: [PATCH 45/75] started work on bootstrapping SEs --- .../paddleboat/LASSO/LASSO_clean.py | 60 +++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py index df65082..de1478b 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py @@ -2,8 +2,9 @@ import numpy as np import OLS as ols from sklearn import preprocessing +import tqdm -def get_sse(Y, X, betas): +def sse(Y, X, betas): ''' Get sum of square errors @@ -23,7 +24,7 @@ def get_sse(Y, X, betas): return sse -def get_loss_function(Y, X, lamb, betas): +def loss(Y, X, lamb, betas): """ Compute loss function @@ -41,7 +42,7 @@ def get_loss_function(Y, X, lamb, betas): if np.sum(X[1]) == len(X[1]): return print('ERROR: X should not have an intercept') - loss = get_sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) + loss = sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) return loss @@ -65,7 +66,7 @@ def beta_lasso(X, Y, lamb): which are not computationally efficient at O(p^3). SVD would be a more efficient approach. """ - Z = preprocessing.scale(X_train) + Z = preprocessing.scale(X) Y_c = Y - np.mean(Y) left = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])) @@ -74,6 +75,57 @@ def beta_lasso(X, Y, lamb): return(coefficients) +def lasso_se_boot(X, Y, lamb, betas, n_iter = 100, progress_disable = False): + """ + Estimate Lasso standard errors through bootstrapping + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + lamb : Lambda value to use for L2 + betas : Vector of estimated coefficients + n_ter : Integer of number of iterations for bootstrapping + progress_disable : Disable option for tqdm progress bar + + Returns + ------- + boot_se : vector of standard errors + """ + + nobs = X.shape[0] + K = int(X[1] - 1) + + beta_hat_boots = np.zeros((n_iter, K)) + + for b_iter in tqdm(range(0, n_iter), disable=progress_disable): + b_index = np.random.choice(range(0, nobs), nobs, replace = True) + + b_beta_hat = beta_lasso(Y, X, lamb) + + # Saving coefficient estimates + beta_hat_boots[b_iter] = b_beta_hat + + # Estimated coefficients from bootstrapping + beta_hat_boots = pd.DataFrame(beta_hat_boots) + beta_hat_boots.index.name = 'boot_iter' + beta_hat_boots.columns = X.columns.values.tolist() + + # Bootstrapped variance of coefficient estimates + beta_hat_boot_var = beta_hat_boots.var(axis=0) + + # Bootstrapped SE of coefficient estimates + beta_hat_boot_SE = np.sqrt(beta_hat_boot_var) + + # Bootstrapped t-stats for null that coefficient = 0 + ## note that we use the coefficient estimates from the full sample + ## but the variance from the bootstrapping procedure + beta_hat_boot_t = beta_lasso(Y, X) / beta_hat_boot_SE + + # Bootstrapped p values from t test (two-sided) + beta_hat_boot_p = pd.Series(2 * (1- t.cdf(np.abs(beta_hat_boot_t), df = nobs - K)), beta_hat_boot_t.index) + + def pick_lowest_lambda(X, Y): """ Compute loss function From c5811d0c102fac01caa96560f54aa5d0a1448156 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Sat, 7 Dec 2019 10:19:50 -0500 Subject: [PATCH 46/75] Create testing_OLS.py --- .../paddleboat/OLS/testing_OLS.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/OLS/testing_OLS.py diff --git a/Projects/project_2_packages/paddleboat/OLS/testing_OLS.py b/Projects/project_2_packages/paddleboat/OLS/testing_OLS.py new file mode 100644 index 0000000..0940cd9 --- /dev/null +++ b/Projects/project_2_packages/paddleboat/OLS/testing_OLS.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Dec 7 10:02:42 2019 + +@author: caseymcquillan +""" + +import numpy as np +import pandas as pd +import math +import scipy +from OLS import least_sq + + From b8ef520014313d19da6bdf5daf969846bdadd9f9 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 10:30:16 -0500 Subject: [PATCH 47/75] creating fit function, returns coeffs and bootstrapped errors --- .../paddleboat/LASSO/LASSO_clean.py | 53 ++++++------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py index de1478b..a1b7c47 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py @@ -75,7 +75,7 @@ def beta_lasso(X, Y, lamb): return(coefficients) -def lasso_se_boot(X, Y, lamb, betas, n_iter = 100, progress_disable = False): +def lasso_fit(X, Y, lamb, n_iter = 100, progress_disable = False): """ Estimate Lasso standard errors through bootstrapping @@ -84,13 +84,18 @@ def lasso_se_boot(X, Y, lamb, betas, n_iter = 100, progress_disable = False): Y : Nx1 Matrix X : Matrix lamb : Lambda value to use for L2 - betas : Vector of estimated coefficients n_ter : Integer of number of iterations for bootstrapping progress_disable : Disable option for tqdm progress bar Returns ------- - boot_se : vector of standard errors + results : Results wrapper with lasso results + coefficients = Lasso coefficients from full sample + bootstrap_coeffs = Lasso coefficients from bootstrapping procedure + bootstrap_coeffs_var = Coefficient variance from bootstrapping + bootstrap_coeffs_SE = Coefficient standard errors from bootstrapping + bootstrap_coeffs_t = T-stats (from bootstrapping SE) + bootstrap_coeffs_p = P-values """ nobs = X.shape[0] @@ -100,6 +105,7 @@ def lasso_se_boot(X, Y, lamb, betas, n_iter = 100, progress_disable = False): for b_iter in tqdm(range(0, n_iter), disable=progress_disable): b_index = np.random.choice(range(0, nobs), nobs, replace = True) + Y, X= Y.iloc[b_index], X.iloc[b_index] b_beta_hat = beta_lasso(Y, X, lamb) @@ -120,42 +126,17 @@ def lasso_se_boot(X, Y, lamb, betas, n_iter = 100, progress_disable = False): # Bootstrapped t-stats for null that coefficient = 0 ## note that we use the coefficient estimates from the full sample ## but the variance from the bootstrapping procedure - beta_hat_boot_t = beta_lasso(Y, X) / beta_hat_boot_SE + beta_hat_boot_t = beta_lasso(Y, X, lamb) / beta_hat_boot_SE # Bootstrapped p values from t test (two-sided) beta_hat_boot_p = pd.Series(2 * (1- t.cdf(np.abs(beta_hat_boot_t), df = nobs - K)), beta_hat_boot_t.index) + return Results_wrap(model = print('Lasso, lambda =', lamb), + coefficients = beta_lasso(Y, X, lamb), + bootstrap_coeffs = beta_hat_boots, + bootstrap_coeffs_var = beta_hat_boot_var, + bootstrap_coeffs_SE = beta_hat_boot_SE, + bootstrap_coeffs_t = beta_hat_boot_t, + bootstrap_coeffs_p = beta_hat_boot_p) -def pick_lowest_lambda(X, Y): - """ - Compute loss function - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix (no intercept column) - lamb : Lambda value to use for L1 - betas : Vector of estimated coefficients - - Returns - ------- - loss : Computed loss - """ - lambs = range(0, 1, 100) - losses = list() - - for l in lambs: - coeffs = get_coeffs_given_lambda(X, Y, l) - SSE = get_sse(Y, X, coeffs) - loss = loss_function(SSE, l, coeffs) - losses.append(loss) - - min_loss = min(losses) - lowest_lambda = loss(min_loss_position_in_list) - - print("Working!") - return(lowest_lamb) -def main(): - """Performs OLS, prints output to table""" - print("Working!") \ No newline at end of file From 195a3df9c0c8593a1e77ff3ea616aa6f94314c87 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 10:32:03 -0500 Subject: [PATCH 48/75] renaming for clarity --- .../paddleboat/LASSO/LASSO.py | 304 +++++++----------- .../paddleboat/LASSO/LASSO_clean.py | 142 -------- .../paddleboat/LASSO/LASSO_depreciated.py | 212 ++++++++++++ 3 files changed, 329 insertions(+), 329 deletions(-) delete mode 100644 Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py create mode 100644 Projects/project_2_packages/paddleboat/LASSO/LASSO_depreciated.py diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index aa8d13e..a1b7c47 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -1,212 +1,142 @@ import pandas as pd import numpy as np +import OLS as ols +from sklearn import preprocessing +import tqdm -def get_betas(X, Y): - """Get betas (according to OLS formula)""" - betas = (np.transpose(X) * X)^(-1) * (np.transpose(X) * Y) # transpose is a numpy function - - print("Working!") - return betas - -def get_residuals(betas, X, Y): - """Get residuals (according to OLS formula)""" - y_hat = betas * X - residuals = Y - y_hat - - print("Working!") - return residuals - -def get_n(X, Y): - """Get N, check independent vs dependent variables""" - n_X = length(X) - n_Y = length(Y) - - if n_X == n_Y: - n = n_X - else: - print("Error!") - - print("Working!") - return n - -def get_ses(residuals, X, Y): - """Get SEs (according to OLS formula)""" - residuals2 = residuals^2 - XX = (np.transpose(X) * X)^(-1) # transpose is not a real function - N = get_n(X, Y) - ses = (residuals2 / (N-1)) * XX - - print("Working!") - return ses - -def get_r2(Y, X, betas): - """Get R^2""" - y_hat = X * betas - y_bar = mean(y) - - SSR = sum((y_hat - y_bar)^2) - SST = sum((y - y_bar)^2) - - r2 = SSR / SST - - print("Working!") - return r2 - -def get_sse(Y, X, betas): - """Get sum of squared errors""" - y_hat = X * beta - sse = (Y - y_hat) ** 2 - - print("Working!") - return sse - -def get_loss_function(SSE, lamb, betas): - """Get loss function""" - betas_no_intercept = betas[1:len(betas)] - loss_function = SSE + lamb * np.sum(np.abs(betas_no_intercept)) - - print("Working!") - return loss_function - -def get_coeffs_given_lambda(X, Y, lamb): - Z = # STANDARDIZED X - Y_c = # CENTERED Y - coefficients = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])).dot(Z.transpose().dot(Y_c)) - return(coefficients) - -def pick_lowest_lambda(X, Y): - """Pick lowest lambda""" - lambs = range(0, 1, 100) - losses = list() - - for l in lambs: - coeffs = get_coeffs_given_lambda(X, Y, l) - SSE = get_sse(Y, X, coeffs) - loss = loss_function(SSE, l, coeffs) - losses.append(loss) - - min_loss = min(losses) - lowest_lambda = loss(min_loss_position_in_list) - - print("Working!") - return(lowest_lamb) - -def main(): - """Performs OLS, prints output to table""" - print("Working!") -import pandas as pd -import numpy as np - -def get_betas(X, Y): - ''' - Get betas (according to OLS formula) +def sse(Y, X, betas): ''' - betas = (np.transpose(X) * X)^(-1) * (np.transpose(X) * Y) # transpose is a numpy function + Get sum of square errors - print("Working!") - return betas + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + betas : Vector of estimated coefficients -def get_residuals(betas, X, Y): - ''' - Get residuals (according to OLS formula) + Returns + ------- + sse : Sum of square errors ''' - y_hat = betas * X - residuals = Y - y_hat - - print("Working!") - return residuals - -def get_n(X, Y): - ''' - Get N, check independent vs dependent variables - ''' - n_X = length(X) - n_Y = length(Y) - - if n_X == n_Y: - n = n_X - else: - print("Error!") + + e = betas.dot(X.values.T)-Y + sse = np.sum(e**2) + return sse - print("Working!") - return n -def get_ses(residuals, X, Y): - ''' - Get SEs (according to OLS formula) - ''' - residuals2 = residuals^2 - XX = (np.transpose(X) * X)^(-1) # transpose is not a real function - N = get_n(X, Y) - ses = (residuals2 / (N-1)) * XX +def loss(Y, X, lamb, betas): + """ + Compute loss function + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (no intercept column) + lamb : Lambda value to use for L2 + betas : Vector of estimated coefficients + + Returns + ------- + loss : Computed loss + """ + if np.sum(X[1]) == len(X[1]): + return print('ERROR: X should not have an intercept') + + loss = sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) + return loss + + +def beta_lasso(X, Y, lamb): + """ + Compute lasso coeffs + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (no intercept column) + lamb : Lambda value to use for L2 + + Returns + ------- + coefficients : Vector of Lasso coefficients + + Note + ---- + For simplicity we use matrix inverses, + which are not computationally efficient at O(p^3). + SVD would be a more efficient approach. + """ + Z = preprocessing.scale(X) + Y_c = Y - np.mean(Y) + + left = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])) + right = Z.transpose().dot(Y_c) + coefficients = left.dot(right) + return(coefficients) - print("Working!") - return ses -def get_r2(Y, X, betas): - ''' - Get R^2 - ''' - y_hat = X * betas - y_bar = mean(y) +def lasso_fit(X, Y, lamb, n_iter = 100, progress_disable = False): + """ + Estimate Lasso standard errors through bootstrapping - SSR = sum((y_hat - y_bar)^2) - SST = sum((y - y_bar)^2) + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + lamb : Lambda value to use for L2 + n_ter : Integer of number of iterations for bootstrapping + progress_disable : Disable option for tqdm progress bar - r2 = SSR / SST + Returns + ------- + results : Results wrapper with lasso results + coefficients = Lasso coefficients from full sample + bootstrap_coeffs = Lasso coefficients from bootstrapping procedure + bootstrap_coeffs_var = Coefficient variance from bootstrapping + bootstrap_coeffs_SE = Coefficient standard errors from bootstrapping + bootstrap_coeffs_t = T-stats (from bootstrapping SE) + bootstrap_coeffs_p = P-values + """ + + nobs = X.shape[0] + K = int(X[1] - 1) - print("Working!") - return r2 + beta_hat_boots = np.zeros((n_iter, K)) -def get_sse(Y, X, betas): - ''' - Get sum of squared errors''' - y_hat = X * betas - sse = (Y - y_hat) ** 2 + for b_iter in tqdm(range(0, n_iter), disable=progress_disable): + b_index = np.random.choice(range(0, nobs), nobs, replace = True) + Y, X= Y.iloc[b_index], X.iloc[b_index] - print("Working!") - return sse + b_beta_hat = beta_lasso(Y, X, lamb) -def get_loss_function(SSE, lamb, betas): - ''' - Get loss function - ''' - betas_without_intercept = betas[1:length(betas)] - loss_function = SSE + lamb * sum(abs(betas_without_intercept)) + # Saving coefficient estimates + beta_hat_boots[b_iter] = b_beta_hat - print("Working!") - return loss_function + # Estimated coefficients from bootstrapping + beta_hat_boots = pd.DataFrame(beta_hat_boots) + beta_hat_boots.index.name = 'boot_iter' + beta_hat_boots.columns = X.columns.values.tolist() -def get_coefficients_given_lambda(lamb): - ''' - Get coefficients - ''' - return(coefficients) + # Bootstrapped variance of coefficient estimates + beta_hat_boot_var = beta_hat_boots.var(axis=0) -def pick_lowest_lamda(): - ''' - Pick lowest lambda - ''' - lambs = [0.001, 0.01, 0.1, 0.5, 1, 2, 10] - l_num = length(lam) - pred_num = X.shape[1] - losses = list(length(lamb)) + # Bootstrapped SE of coefficient estimates + beta_hat_boot_SE = np.sqrt(beta_hat_boot_var) - # prepare data for enumerate - coeff_a = np.zeros((l_num, pred_num)) + # Bootstrapped t-stats for null that coefficient = 0 + ## note that we use the coefficient estimates from the full sample + ## but the variance from the bootstrapping procedure + beta_hat_boot_t = beta_lasso(Y, X, lamb) / beta_hat_boot_SE - for ind, i in enumerate(lambs): - loss = loss_function(lamb) - list.append(loss) + # Bootstrapped p values from t test (two-sided) + beta_hat_boot_p = pd.Series(2 * (1- t.cdf(np.abs(beta_hat_boot_t), df = nobs - K)), beta_hat_boot_t.index) - min_loss = min(losses) - lowest_lamb = loss(min_loss_position_in_list) + return Results_wrap(model = print('Lasso, lambda =', lamb), + coefficients = beta_lasso(Y, X, lamb), + bootstrap_coeffs = beta_hat_boots, + bootstrap_coeffs_var = beta_hat_boot_var, + bootstrap_coeffs_SE = beta_hat_boot_SE, + bootstrap_coeffs_t = beta_hat_boot_t, + bootstrap_coeffs_p = beta_hat_boot_p) - print("Working!") - return(lowest_lamb) -def main(): - ''' - Performs LASSO, prints output to table - ''' - print("Working!") diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py deleted file mode 100644 index a1b7c47..0000000 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO_clean.py +++ /dev/null @@ -1,142 +0,0 @@ -import pandas as pd -import numpy as np -import OLS as ols -from sklearn import preprocessing -import tqdm - -def sse(Y, X, betas): - ''' - Get sum of square errors - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - betas : Vector of estimated coefficients - - Returns - ------- - sse : Sum of square errors - ''' - - e = betas.dot(X.values.T)-Y - sse = np.sum(e**2) - return sse - - -def loss(Y, X, lamb, betas): - """ - Compute loss function - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix (no intercept column) - lamb : Lambda value to use for L2 - betas : Vector of estimated coefficients - - Returns - ------- - loss : Computed loss - """ - if np.sum(X[1]) == len(X[1]): - return print('ERROR: X should not have an intercept') - - loss = sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) - return loss - - -def beta_lasso(X, Y, lamb): - """ - Compute lasso coeffs - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix (no intercept column) - lamb : Lambda value to use for L2 - - Returns - ------- - coefficients : Vector of Lasso coefficients - - Note - ---- - For simplicity we use matrix inverses, - which are not computationally efficient at O(p^3). - SVD would be a more efficient approach. - """ - Z = preprocessing.scale(X) - Y_c = Y - np.mean(Y) - - left = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])) - right = Z.transpose().dot(Y_c) - coefficients = left.dot(right) - return(coefficients) - - -def lasso_fit(X, Y, lamb, n_iter = 100, progress_disable = False): - """ - Estimate Lasso standard errors through bootstrapping - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - lamb : Lambda value to use for L2 - n_ter : Integer of number of iterations for bootstrapping - progress_disable : Disable option for tqdm progress bar - - Returns - ------- - results : Results wrapper with lasso results - coefficients = Lasso coefficients from full sample - bootstrap_coeffs = Lasso coefficients from bootstrapping procedure - bootstrap_coeffs_var = Coefficient variance from bootstrapping - bootstrap_coeffs_SE = Coefficient standard errors from bootstrapping - bootstrap_coeffs_t = T-stats (from bootstrapping SE) - bootstrap_coeffs_p = P-values - """ - - nobs = X.shape[0] - K = int(X[1] - 1) - - beta_hat_boots = np.zeros((n_iter, K)) - - for b_iter in tqdm(range(0, n_iter), disable=progress_disable): - b_index = np.random.choice(range(0, nobs), nobs, replace = True) - Y, X= Y.iloc[b_index], X.iloc[b_index] - - b_beta_hat = beta_lasso(Y, X, lamb) - - # Saving coefficient estimates - beta_hat_boots[b_iter] = b_beta_hat - - # Estimated coefficients from bootstrapping - beta_hat_boots = pd.DataFrame(beta_hat_boots) - beta_hat_boots.index.name = 'boot_iter' - beta_hat_boots.columns = X.columns.values.tolist() - - # Bootstrapped variance of coefficient estimates - beta_hat_boot_var = beta_hat_boots.var(axis=0) - - # Bootstrapped SE of coefficient estimates - beta_hat_boot_SE = np.sqrt(beta_hat_boot_var) - - # Bootstrapped t-stats for null that coefficient = 0 - ## note that we use the coefficient estimates from the full sample - ## but the variance from the bootstrapping procedure - beta_hat_boot_t = beta_lasso(Y, X, lamb) / beta_hat_boot_SE - - # Bootstrapped p values from t test (two-sided) - beta_hat_boot_p = pd.Series(2 * (1- t.cdf(np.abs(beta_hat_boot_t), df = nobs - K)), beta_hat_boot_t.index) - - return Results_wrap(model = print('Lasso, lambda =', lamb), - coefficients = beta_lasso(Y, X, lamb), - bootstrap_coeffs = beta_hat_boots, - bootstrap_coeffs_var = beta_hat_boot_var, - bootstrap_coeffs_SE = beta_hat_boot_SE, - bootstrap_coeffs_t = beta_hat_boot_t, - bootstrap_coeffs_p = beta_hat_boot_p) - - diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO_depreciated.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO_depreciated.py new file mode 100644 index 0000000..aa8d13e --- /dev/null +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO_depreciated.py @@ -0,0 +1,212 @@ +import pandas as pd +import numpy as np + +def get_betas(X, Y): + """Get betas (according to OLS formula)""" + betas = (np.transpose(X) * X)^(-1) * (np.transpose(X) * Y) # transpose is a numpy function + + print("Working!") + return betas + +def get_residuals(betas, X, Y): + """Get residuals (according to OLS formula)""" + y_hat = betas * X + residuals = Y - y_hat + + print("Working!") + return residuals + +def get_n(X, Y): + """Get N, check independent vs dependent variables""" + n_X = length(X) + n_Y = length(Y) + + if n_X == n_Y: + n = n_X + else: + print("Error!") + + print("Working!") + return n + +def get_ses(residuals, X, Y): + """Get SEs (according to OLS formula)""" + residuals2 = residuals^2 + XX = (np.transpose(X) * X)^(-1) # transpose is not a real function + N = get_n(X, Y) + ses = (residuals2 / (N-1)) * XX + + print("Working!") + return ses + +def get_r2(Y, X, betas): + """Get R^2""" + y_hat = X * betas + y_bar = mean(y) + + SSR = sum((y_hat - y_bar)^2) + SST = sum((y - y_bar)^2) + + r2 = SSR / SST + + print("Working!") + return r2 + +def get_sse(Y, X, betas): + """Get sum of squared errors""" + y_hat = X * beta + sse = (Y - y_hat) ** 2 + + print("Working!") + return sse + +def get_loss_function(SSE, lamb, betas): + """Get loss function""" + betas_no_intercept = betas[1:len(betas)] + loss_function = SSE + lamb * np.sum(np.abs(betas_no_intercept)) + + print("Working!") + return loss_function + +def get_coeffs_given_lambda(X, Y, lamb): + Z = # STANDARDIZED X + Y_c = # CENTERED Y + coefficients = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])).dot(Z.transpose().dot(Y_c)) + return(coefficients) + +def pick_lowest_lambda(X, Y): + """Pick lowest lambda""" + lambs = range(0, 1, 100) + losses = list() + + for l in lambs: + coeffs = get_coeffs_given_lambda(X, Y, l) + SSE = get_sse(Y, X, coeffs) + loss = loss_function(SSE, l, coeffs) + losses.append(loss) + + min_loss = min(losses) + lowest_lambda = loss(min_loss_position_in_list) + + print("Working!") + return(lowest_lamb) + +def main(): + """Performs OLS, prints output to table""" + print("Working!") +import pandas as pd +import numpy as np + +def get_betas(X, Y): + ''' + Get betas (according to OLS formula) + ''' + betas = (np.transpose(X) * X)^(-1) * (np.transpose(X) * Y) # transpose is a numpy function + + print("Working!") + return betas + +def get_residuals(betas, X, Y): + ''' + Get residuals (according to OLS formula) + ''' + y_hat = betas * X + residuals = Y - y_hat + + print("Working!") + return residuals + +def get_n(X, Y): + ''' + Get N, check independent vs dependent variables + ''' + n_X = length(X) + n_Y = length(Y) + + if n_X == n_Y: + n = n_X + else: + print("Error!") + + print("Working!") + return n + +def get_ses(residuals, X, Y): + ''' + Get SEs (according to OLS formula) + ''' + residuals2 = residuals^2 + XX = (np.transpose(X) * X)^(-1) # transpose is not a real function + N = get_n(X, Y) + ses = (residuals2 / (N-1)) * XX + + print("Working!") + return ses + +def get_r2(Y, X, betas): + ''' + Get R^2 + ''' + y_hat = X * betas + y_bar = mean(y) + + SSR = sum((y_hat - y_bar)^2) + SST = sum((y - y_bar)^2) + + r2 = SSR / SST + + print("Working!") + return r2 + +def get_sse(Y, X, betas): + ''' + Get sum of squared errors''' + y_hat = X * betas + sse = (Y - y_hat) ** 2 + + print("Working!") + return sse + +def get_loss_function(SSE, lamb, betas): + ''' + Get loss function + ''' + betas_without_intercept = betas[1:length(betas)] + loss_function = SSE + lamb * sum(abs(betas_without_intercept)) + + print("Working!") + return loss_function + +def get_coefficients_given_lambda(lamb): + ''' + Get coefficients + ''' + return(coefficients) + +def pick_lowest_lamda(): + ''' + Pick lowest lambda + ''' + lambs = [0.001, 0.01, 0.1, 0.5, 1, 2, 10] + l_num = length(lam) + pred_num = X.shape[1] + losses = list(length(lamb)) + + # prepare data for enumerate + coeff_a = np.zeros((l_num, pred_num)) + + for ind, i in enumerate(lambs): + loss = loss_function(lamb) + list.append(loss) + + min_loss = min(losses) + lowest_lamb = loss(min_loss_position_in_list) + + print("Working!") + return(lowest_lamb) + +def main(): + ''' + Performs LASSO, prints output to table + ''' + print("Working!") From 916d3559f9da5e69b4c0e9e29f162eec89b6784d Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 10:43:35 -0500 Subject: [PATCH 49/75] Subfolder inits --- Projects/project_2_packages/paddleboat/LASSO/__init__.py | 0 Projects/project_2_packages/paddleboat/OLS/__init__.py | 0 Projects/project_2_packages/paddleboat/test/test_OLS.py | 2 ++ 3 files changed, 2 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/LASSO/__init__.py create mode 100644 Projects/project_2_packages/paddleboat/OLS/__init__.py diff --git a/Projects/project_2_packages/paddleboat/LASSO/__init__.py b/Projects/project_2_packages/paddleboat/LASSO/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Projects/project_2_packages/paddleboat/OLS/__init__.py b/Projects/project_2_packages/paddleboat/OLS/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index 7147f97..ae064d5 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -26,6 +26,8 @@ def test_p(self): p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) + + if __name__ == "__main__": unittest.main() From b6261a2c9a4f163f88ec3c7c1558afaab46d7ce8 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Sat, 7 Dec 2019 10:46:14 -0500 Subject: [PATCH 50/75] Adding path for OLS on test --- Projects/project_2_packages/paddleboat/test/test_OLS.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index 7147f97..1fa8d9d 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -1,6 +1,11 @@ +import sys +sys.path.insert(0,'../OLS') + import numpy as np import transcripty as tpy import unittest +from OLS import beta_ols + class TestHPM(unittest.TestCase): From a43afe4da768fe5823e141f5af3812e9b6ebdb4a Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 11:17:01 -0500 Subject: [PATCH 51/75] changed indentation of docstrings --- .../project_2_packages/paddleboat/OLS/OLS.py | 136 +++++++++--------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 466594e..241e306 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -6,18 +6,18 @@ # Function computing a least squares fit def beta_ols(Y, X): - ''' - Estimate OLS coefficients + ''' + Estimate OLS coefficients - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix - Returns - ------- - beta_hat : vector of coefficients - ''' + Returns + ------- + beta_hat : vector of coefficients + ''' if len(Y.shape) != 1: return print('ERROR: Y must be an Nx1 matrix') @@ -31,71 +31,71 @@ def beta_ols(Y, X): def resids(Y, X): - ''' - Estimate OLS residuals - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - - Returns - ------- - e : vector of residuals - ''' + ''' + Estimate OLS residuals + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + e : vector of residuals + ''' e = beta_ols(Y,X).dot(X.values.T)-Y return e def Sigma(Y,X): - ''' - Estimate OLS variance-covariance matrix - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - - Returns - ------- - e : var-cov matrix as pandas df - ''' + ''' + Estimate OLS variance-covariance matrix + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + e : var-cov matrix as pandas df + ''' e = resids(Y,X) std_hat = e.dot(e.T)/(X.shape[0]-X.shape[1]) Sigma = std_hat*np.linalg.inv(X.transpose().dot(X).values) return pd.DataFrame(Sigma) def variance_ols(Y,X): - ''' - Estimate OLS variance-covariance matrix - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - - Returns - ------- - var : variance of coefficients - ''' + ''' + Estimate OLS variance-covariance matrix + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + var : variance of coefficients + ''' diags = np.diagonal(Sigma(Y, X)) var = np.sqrt(diags) return var def r2_ols(Y, X): - ''' - Estimate R^2 for OLS + ''' + Estimate R^2 for OLS - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix - Returns - ------- - R2 : value of R^2 - ''' + Returns + ------- + R2 : value of R^2 + ''' y_hat = beta_ols(Y,X).dot(X.values.T) y_bar = np.mean(y) @@ -107,18 +107,18 @@ def r2_ols(Y, X): return r2 def least_sq(Y, X): - ''' - Output nicely OLS results - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - - Returns - ------- - R2 : value of R^2 - ''' + ''' + Output nicely OLS results + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + + Returns + ------- + R2 : value of R^2 + ''' print('Coefficients = ', beta_ols(Y, X)) print('Coeff. SErrs = ', np.sqrt(np.diagonal(Sigma(Y,X)))) From 63691b3803f6bb5f525f1dee19bcff03b0084177 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 11:44:43 -0500 Subject: [PATCH 52/75] added synth data and tested coeff estimates from OLS --- .../paddleboat/test/test_OLS.py | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index 8d465aa..4d252ec 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -5,6 +5,8 @@ import transcripty as tpy import unittest from OLS import beta_ols +import statsmodels.api as sm +import pandas as pd @@ -31,7 +33,34 @@ def test_p(self): p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) - + def test_simple_b_ols(self): + np.random.seed(60683) # testing different seed + n = 50000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_vars = Y + + # running model + coeff_estimates = beta_ols(dependent_vars, independent_vars) + self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + if __name__ == "__main__": From eed1eac4b52b8a854cfe2a56d64ba5f8fde92268 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 11:49:59 -0500 Subject: [PATCH 53/75] Very minor changes --- Projects/project_2_packages/paddleboat/test/test_OLS.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index 4d252ec..f060539 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -2,7 +2,6 @@ sys.path.insert(0,'../OLS') import numpy as np -import transcripty as tpy import unittest from OLS import beta_ols import statsmodels.api as sm @@ -62,6 +61,5 @@ def test_simple_b_ols(self): self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) - if __name__ == "__main__": unittest.main() From 7748a2feab6695ab7180923e7eccafce0282e246 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 14:11:33 -0500 Subject: [PATCH 54/75] fix some suff, function read incorrrectly, need to fix. unclear why beta_lasso reads into lasso_fit --- .../paddleboat/LASSO/LASSO.py | 183 +++++++++--------- 1 file changed, 91 insertions(+), 92 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index a1b7c47..83e5674 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -1,111 +1,72 @@ import pandas as pd import numpy as np -import OLS as ols from sklearn import preprocessing -import tqdm - -def sse(Y, X, betas): - ''' - Get sum of square errors - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - betas : Vector of estimated coefficients - - Returns - ------- - sse : Sum of square errors - ''' - - e = betas.dot(X.values.T)-Y - sse = np.sum(e**2) - return sse - - -def loss(Y, X, lamb, betas): - """ - Compute loss function - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix (no intercept column) - lamb : Lambda value to use for L2 - betas : Vector of estimated coefficients - - Returns - ------- - loss : Computed loss - """ - if np.sum(X[1]) == len(X[1]): - return print('ERROR: X should not have an intercept') +from tqdm import tqdm +from scipy.stats import t + + +def beta_lasso(Y, X, lamb): + """ + Compute lasso coeffs + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (with intercept column) + lamb : Lambda value to use for L2 + + Returns + ------- + coefficients : Vector of Lasso coefficients + + Note + ---- + For simplicity we use matrix inverses, + which are not computationally efficient at O(p^3). + SVD would be a more efficient approach. + """ - loss = sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) - return loss - - -def beta_lasso(X, Y, lamb): - """ - Compute lasso coeffs - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix (no intercept column) - lamb : Lambda value to use for L2 - - Returns - ------- - coefficients : Vector of Lasso coefficients - - Note - ---- - For simplicity we use matrix inverses, - which are not computationally efficient at O(p^3). - SVD would be a more efficient approach. - """ - Z = preprocessing.scale(X) + Z = X.iloc[:, 1:] + Z = pd.DataFrame(preprocessing.scale(Z)) Y_c = Y - np.mean(Y) - left = np.linalg.inv(Z.transpose().dot(Z).values() + lamb * np.identity(X.shape[1])) + left = np.linalg.inv(Z.transpose().dot(Z) + lamb * np.identity(Z.shape[1])) right = Z.transpose().dot(Y_c) coefficients = left.dot(right) return(coefficients) -def lasso_fit(X, Y, lamb, n_iter = 100, progress_disable = False): - """ - Estimate Lasso standard errors through bootstrapping - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - lamb : Lambda value to use for L2 - n_ter : Integer of number of iterations for bootstrapping - progress_disable : Disable option for tqdm progress bar - - Returns - ------- - results : Results wrapper with lasso results - coefficients = Lasso coefficients from full sample - bootstrap_coeffs = Lasso coefficients from bootstrapping procedure - bootstrap_coeffs_var = Coefficient variance from bootstrapping - bootstrap_coeffs_SE = Coefficient standard errors from bootstrapping - bootstrap_coeffs_t = T-stats (from bootstrapping SE) - bootstrap_coeffs_p = P-values - """ - - nobs = X.shape[0] - K = int(X[1] - 1) + +def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): + """ + Estimate Lasso standard errors through bootstrapping + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + lamb : Lambda value to use for L2 + n_ter : Integer of number of iterations for bootstrapping + progress_disable : Disable option for tqdm progress bar + + Returns + ------- + results : Results wrapper with lasso results + coefficients = Lasso coefficients from full sample + bootstrap_coeffs = Lasso coefficients from bootstrapping procedure + bootstrap_coeffs_var = Coefficient variance from bootstrapping + bootstrap_coeffs_SE = Coefficient standard errors from bootstrapping + bootstrap_coeffs_t = T-stats (from bootstrapping SE) + bootstrap_coeffs_p = P-values + """ + nobs = np.shape(X)[0] + K = np.shape(X)[1] - 1 beta_hat_boots = np.zeros((n_iter, K)) for b_iter in tqdm(range(0, n_iter), disable=progress_disable): b_index = np.random.choice(range(0, nobs), nobs, replace = True) - Y, X= Y.iloc[b_index], X.iloc[b_index] + Y, X= pd.DataFrame(Y).iloc[b_index], pd.DataFrame(X).iloc[b_index] b_beta_hat = beta_lasso(Y, X, lamb) @@ -140,3 +101,41 @@ def lasso_fit(X, Y, lamb, n_iter = 100, progress_disable = False): bootstrap_coeffs_p = beta_hat_boot_p) +def sse(Y, X, betas): + ''' + Get sum of square errors + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + betas : Vector of estimated coefficients + + Returns + ------- + sse : Sum of square errors + ''' + + e = betas.dot(X.values.T)-Y + sse = np.sum(e**2) + return sse + + +def loss(Y, X, lamb, betas): + """ + Compute loss function + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix (with intercept column) + lamb : Lambda value to use for L2 + betas : Vector of estimated coefficients + + Returns + ------- + loss : Computed loss + """ + + loss = sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) + return loss \ No newline at end of file From 75026deb6a9f4d4261238b2f4173c73c4696e5f4 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 14:26:53 -0500 Subject: [PATCH 55/75] super weird function behavior, need to figure out --- .../paddleboat/LASSO/LASSO.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 83e5674..4a26f2b 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -36,6 +36,25 @@ def beta_lasso(Y, X, lamb): return(coefficients) +def sse(Y, X, betas): + ''' + Get sum of square errors + + Parameters + ---------- + Y : Nx1 Matrix + X : Matrix + betas : Vector of estimated coefficients + + Returns + ------- + sse : Sum of square errors + ''' + + e = betas.dot(X.values.T)-Y + sse = np.sum(e**2) + return sse + def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): """ @@ -101,25 +120,6 @@ def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): bootstrap_coeffs_p = beta_hat_boot_p) -def sse(Y, X, betas): - ''' - Get sum of square errors - - Parameters - ---------- - Y : Nx1 Matrix - X : Matrix - betas : Vector of estimated coefficients - - Returns - ------- - sse : Sum of square errors - ''' - - e = betas.dot(X.values.T)-Y - sse = np.sum(e**2) - return sse - def loss(Y, X, lamb, betas): """ From 10b1c441413c0ffa0342f4b75aa561786b10099f Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 14:50:49 -0500 Subject: [PATCH 56/75] Add travis file --- .travis.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..4520081 --- /dev/null +++ b/.travis.yml @@ -0,0 +1 @@ +language: python From 329b9bcb9394316a4775ed134e1b1dbcc5674dcf Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 14:56:05 -0500 Subject: [PATCH 57/75] Full first draft travis --- .travis.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.travis.yml b/.travis.yml index 4520081..6a5c92c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1 +1,17 @@ language: python + +# ===== Linux ====== +dist: xenial +python: + - 3.7 +matrix: + include: + - name: "Python 3.7 on linux" + os: linux + language: python + before_install: + - pip install -U pip + - pip install -r requirements.txt + - pip install . + - cd Projects/project_2_packages/paddleboat + script: python -m unittest -v From e2acb3486246a04035ba72bbe05c5e0a132b9307 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 14:57:27 -0500 Subject: [PATCH 58/75] remove requitements.txt --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6a5c92c..2e60b79 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ matrix: language: python before_install: - pip install -U pip - - pip install -r requirements.txt - pip install . - cd Projects/project_2_packages/paddleboat script: python -m unittest -v From 2570e67a9394662691abf603ba5c5b2b803b217d Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 15:02:07 -0500 Subject: [PATCH 59/75] Remove all package install --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2e60b79..55ae808 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,5 @@ matrix: language: python before_install: - pip install -U pip - - pip install . - cd Projects/project_2_packages/paddleboat script: python -m unittest -v From 6d431d0e3da457da0fd7e8d82ae5a9755f86cdec Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Sat, 7 Dec 2019 15:17:42 -0500 Subject: [PATCH 60/75] Pushing current testing file @ 3:15pm --- .../project_2_packages/paddleboat/OLS/OLS.py | 4 +- .../paddleboat/test/test_OLS.py | 58 ++++++++++++------- 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 241e306..74a459e 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -49,7 +49,7 @@ def resids(Y, X): def Sigma(Y,X): ''' - Estimate OLS variance-covariance matrix + ≈ Parameters ---------- @@ -58,7 +58,7 @@ def Sigma(Y,X): Returns ------- - e : var-cov matrix as pandas df + Sigma : var-cov matrix as pandas df ''' e = resids(Y,X) std_hat = e.dot(e.T)/(X.shape[0]-X.shape[1]) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index 4d252ec..a94639c 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -4,34 +4,13 @@ import numpy as np import transcripty as tpy import unittest -from OLS import beta_ols +from OLS import beta_ols, Sigma import statsmodels.api as sm import pandas as pd class TestHPM(unittest.TestCase): - def setUp(self): - self.hpm = tpy.HeterogeneousProbabilityModel( - 0.35, 0.6778, 1.0556, 0.0, 6, 12, 3, 125 - ) - - def test_gpa_irrelevance(self): - self.assertEqual(self.hpm(0.0, 0.0), self.hpm(0.0, 1.0)) - - def test_p_larger_gammamin(self): - """Tests p(a)""" - self.assertGreaterEqual(self.hpm.gamma_min, self.hpm(-100.0, 0.0)) - - def test_p_smaller_one(self): - self.assertLessEqual(1.0, self.hpm(100.0, 0.0)) - - def test_p(self): - gamma_min, gamma_1 = self.hpm.gamma_min, self.hpm.gamma_1 - gamma_2 = self.hpm.gamma_2 - a = 2.5 - p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) - self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) def test_simple_b_ols(self): np.random.seed(60683) # testing different seed @@ -62,6 +41,41 @@ def test_simple_b_ols(self): self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + def test_Sigma(self): + np.random.seed(1435) # testing different seed + n = 500000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_vars = Y + + + + # running model + covariance_matrix = Sigma(dependent_vars, independent_vars) + print(covariance_matrix) + test_kc = independent_vars.cov() + print(test_kc) + + #self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + + if __name__ == "__main__": unittest.main() From 2af7108c15ea76d3c9efd7ceb9ab76b6afc9e5c1 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 15:25:04 -0500 Subject: [PATCH 61/75] fixed lasso functions, added Results_wrapper class --- .../paddleboat/LASSO/LASSO.py | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py index 4a26f2b..8b0d051 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py +++ b/Projects/project_2_packages/paddleboat/LASSO/LASSO.py @@ -85,17 +85,16 @@ def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): for b_iter in tqdm(range(0, n_iter), disable=progress_disable): b_index = np.random.choice(range(0, nobs), nobs, replace = True) - Y, X= pd.DataFrame(Y).iloc[b_index], pd.DataFrame(X).iloc[b_index] - - b_beta_hat = beta_lasso(Y, X, lamb) + _Y, _X = pd.DataFrame(Y).iloc[b_index], pd.DataFrame(X).iloc[b_index] + b_beta_hat = beta_lasso(np.array(_Y), _X, lamb) # Saving coefficient estimates - beta_hat_boots[b_iter] = b_beta_hat + beta_hat_boots[b_iter, :] = b_beta_hat.squeeze() # Estimated coefficients from bootstrapping beta_hat_boots = pd.DataFrame(beta_hat_boots) beta_hat_boots.index.name = 'boot_iter' - beta_hat_boots.columns = X.columns.values.tolist() + #beta_hat_boots.columns = X.columns.values.tolist() # Bootstrapped variance of coefficient estimates beta_hat_boot_var = beta_hat_boots.var(axis=0) @@ -121,6 +120,47 @@ def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): +class Results_wrap(object): + ''' + Summarize the Regression Results (based of statsmodels) + Parameters + ----------- + yname : string, optional + Default is `y` + xname : list of strings, optional + Default is `var_##` for ## in p the number of regressors + title : string, optional + Title for the top table. If not None, then this replaces the + default title + alpha : float + significance level for the confidence intervals + Returns + ------- + smry : Summary instance + this holds the summary tables and text, which can be printed or + converted to various output formats. + ''' + def __init__(self, model, coefficients, + cov_type='nonrobust', bootstrap_coeffs=None, bootstrap_coeffs_var=None, bootstrap_coeffs_SE=None, bootstrap_coeffs_t=None, bootstrap_coeffs_p=None): + self.model = model + self.coefficients = coefficients + self.cov_type = cov_type + self.beta_hat_boots = bootstrap_coeffs + self.beta_hat_boots_var = bootstrap_coeffs_var + self.beta_hat_boots_SE = bootstrap_coeffs_SE + self.beta_hat_boots_t = bootstrap_coeffs_t + self.beta_hat_boots_p = bootstrap_coeffs_p + + def summary(self, title=None): + ''' + TO DO: PRINTABLE SUMMARY RESULTS LIKE STATSMODELS + ''' + # testing that summary is callable + if title is None: + title = self.model + ' ' + "Regression Results" + return print(title) + + def loss(Y, X, lamb, betas): """ Compute loss function From 37fe10d73b571207f859615d7f9cdc1b92834dc9 Mon Sep 17 00:00:00 2001 From: Nadav Tadelis Date: Sat, 7 Dec 2019 15:27:05 -0500 Subject: [PATCH 62/75] change to correct naming --- .../project_2_packages/paddleboat/LASSO/{LASSO.py => RIDGE.py} | 0 .../paddleboat/test/{test_LASSO.py => test_RIDGE.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename Projects/project_2_packages/paddleboat/LASSO/{LASSO.py => RIDGE.py} (100%) rename Projects/project_2_packages/paddleboat/test/{test_LASSO.py => test_RIDGE.py} (100%) diff --git a/Projects/project_2_packages/paddleboat/LASSO/LASSO.py b/Projects/project_2_packages/paddleboat/LASSO/RIDGE.py similarity index 100% rename from Projects/project_2_packages/paddleboat/LASSO/LASSO.py rename to Projects/project_2_packages/paddleboat/LASSO/RIDGE.py diff --git a/Projects/project_2_packages/paddleboat/test/test_LASSO.py b/Projects/project_2_packages/paddleboat/test/test_RIDGE.py similarity index 100% rename from Projects/project_2_packages/paddleboat/test/test_LASSO.py rename to Projects/project_2_packages/paddleboat/test/test_RIDGE.py From ff936166503f697d4886ca3eb51ee46cb8fee80c Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 15:29:23 -0500 Subject: [PATCH 63/75] Rename LASSO to RIDGE, include OLS tests --- .../paddleboat/test/test_LASSO.py | 31 -------- .../paddleboat/test/test_RIDGE.py | 79 +++++++++++++++++++ 2 files changed, 79 insertions(+), 31 deletions(-) delete mode 100644 Projects/project_2_packages/paddleboat/test/test_LASSO.py create mode 100644 Projects/project_2_packages/paddleboat/test/test_RIDGE.py diff --git a/Projects/project_2_packages/paddleboat/test/test_LASSO.py b/Projects/project_2_packages/paddleboat/test/test_LASSO.py deleted file mode 100644 index 7147f97..0000000 --- a/Projects/project_2_packages/paddleboat/test/test_LASSO.py +++ /dev/null @@ -1,31 +0,0 @@ -import numpy as np -import transcripty as tpy -import unittest - - -class TestHPM(unittest.TestCase): - def setUp(self): - self.hpm = tpy.HeterogeneousProbabilityModel( - 0.35, 0.6778, 1.0556, 0.0, 6, 12, 3, 125 - ) - - def test_gpa_irrelevance(self): - self.assertEqual(self.hpm(0.0, 0.0), self.hpm(0.0, 1.0)) - - def test_p_larger_gammamin(self): - """Tests p(a)""" - self.assertGreaterEqual(self.hpm.gamma_min, self.hpm(-100.0, 0.0)) - - def test_p_smaller_one(self): - self.assertLessEqual(1.0, self.hpm(100.0, 0.0)) - - def test_p(self): - gamma_min, gamma_1 = self.hpm.gamma_min, self.hpm.gamma_1 - gamma_2 = self.hpm.gamma_2 - a = 2.5 - p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) - self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) - - -if __name__ == "__main__": - unittest.main() diff --git a/Projects/project_2_packages/paddleboat/test/test_RIDGE.py b/Projects/project_2_packages/paddleboat/test/test_RIDGE.py new file mode 100644 index 0000000..83c8eaa --- /dev/null +++ b/Projects/project_2_packages/paddleboat/test/test_RIDGE.py @@ -0,0 +1,79 @@ +import sys +sys.path.insert(0,'../OLS') + +import numpy as np +import unittest +from OLS import beta_ols, Sigma +import statsmodels.api as sm +import pandas as pd + + + +class TestHPM(unittest.TestCase): + + def test_simple_b_ols(self): + np.random.seed(60683) # testing different seed + n = 50000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_vars = Y + + # running model + coeff_estimates = beta_ols(dependent_vars, independent_vars) + self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + + + def test_Sigma(self): + np.random.seed(1435) # testing different seed + n = 500000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_vars = Y + + + + # running model + covariance_matrix = Sigma(dependent_vars, independent_vars) + print(covariance_matrix) + test_kc = independent_vars.cov() + print(test_kc) + + #self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + + +if __name__ == "__main__": + unittest.main() From 011f5ff0a6d146c335e69d1a9c9fbab93370d038 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 15:31:28 -0500 Subject: [PATCH 64/75] Actually move OLS tests to RIDGE script --- .../paddleboat/test/test_RIDGE.py | 92 ++++++++++++++----- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/test/test_RIDGE.py b/Projects/project_2_packages/paddleboat/test/test_RIDGE.py index 7147f97..83c8eaa 100644 --- a/Projects/project_2_packages/paddleboat/test/test_RIDGE.py +++ b/Projects/project_2_packages/paddleboat/test/test_RIDGE.py @@ -1,30 +1,78 @@ +import sys +sys.path.insert(0,'../OLS') + import numpy as np -import transcripty as tpy import unittest +from OLS import beta_ols, Sigma +import statsmodels.api as sm +import pandas as pd + class TestHPM(unittest.TestCase): - def setUp(self): - self.hpm = tpy.HeterogeneousProbabilityModel( - 0.35, 0.6778, 1.0556, 0.0, 6, 12, 3, 125 - ) - - def test_gpa_irrelevance(self): - self.assertEqual(self.hpm(0.0, 0.0), self.hpm(0.0, 1.0)) - - def test_p_larger_gammamin(self): - """Tests p(a)""" - self.assertGreaterEqual(self.hpm.gamma_min, self.hpm(-100.0, 0.0)) - - def test_p_smaller_one(self): - self.assertLessEqual(1.0, self.hpm(100.0, 0.0)) - - def test_p(self): - gamma_min, gamma_1 = self.hpm.gamma_min, self.hpm.gamma_1 - gamma_2 = self.hpm.gamma_2 - a = 2.5 - p_a = gamma_min + (1 - gamma_min)/(1 + gamma_1*np.exp(-gamma_2*a)) - self.assertAlmostEqual(p_a, self.hpm(a, 0.0)) + + def test_simple_b_ols(self): + np.random.seed(60683) # testing different seed + n = 50000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_vars = Y + + # running model + coeff_estimates = beta_ols(dependent_vars, independent_vars) + self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + + + def test_Sigma(self): + np.random.seed(1435) # testing different seed + n = 500000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_vars = Y + + + + # running model + covariance_matrix = Sigma(dependent_vars, independent_vars) + print(covariance_matrix) + test_kc = independent_vars.cov() + print(test_kc) + + #self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) if __name__ == "__main__": From 5c93e7283f594ec839840d8eb77c33a0943dd335 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sat, 7 Dec 2019 15:33:28 -0500 Subject: [PATCH 65/75] Rename lassos to ridges --- .../paddleboat/LASSO/RIDGE.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/LASSO/RIDGE.py b/Projects/project_2_packages/paddleboat/LASSO/RIDGE.py index 8b0d051..71a223e 100644 --- a/Projects/project_2_packages/paddleboat/LASSO/RIDGE.py +++ b/Projects/project_2_packages/paddleboat/LASSO/RIDGE.py @@ -5,9 +5,9 @@ from scipy.stats import t -def beta_lasso(Y, X, lamb): +def beta_ridge(Y, X, lamb): """ - Compute lasso coeffs + Compute ridge coeffs Parameters ---------- @@ -17,7 +17,7 @@ def beta_lasso(Y, X, lamb): Returns ------- - coefficients : Vector of Lasso coefficients + coefficients : Vector of ridge coefficients Note ---- @@ -25,11 +25,11 @@ def beta_lasso(Y, X, lamb): which are not computationally efficient at O(p^3). SVD would be a more efficient approach. """ - + Z = X.iloc[:, 1:] Z = pd.DataFrame(preprocessing.scale(Z)) Y_c = Y - np.mean(Y) - + left = np.linalg.inv(Z.transpose().dot(Z) + lamb * np.identity(Z.shape[1])) right = Z.transpose().dot(Y_c) coefficients = left.dot(right) @@ -50,15 +50,15 @@ def sse(Y, X, betas): ------- sse : Sum of square errors ''' - + e = betas.dot(X.values.T)-Y sse = np.sum(e**2) return sse -def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): +def ridge_fit(Y, X, lamb, n_iter=100, progress_disable = False): """ - Estimate Lasso standard errors through bootstrapping + Estimate ridge standard errors through bootstrapping Parameters ---------- @@ -70,9 +70,9 @@ def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): Returns ------- - results : Results wrapper with lasso results - coefficients = Lasso coefficients from full sample - bootstrap_coeffs = Lasso coefficients from bootstrapping procedure + results : Results wrapper with ridge results + coefficients = ridge coefficients from full sample + bootstrap_coeffs = ridge coefficients from bootstrapping procedure bootstrap_coeffs_var = Coefficient variance from bootstrapping bootstrap_coeffs_SE = Coefficient standard errors from bootstrapping bootstrap_coeffs_t = T-stats (from bootstrapping SE) @@ -87,7 +87,7 @@ def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): b_index = np.random.choice(range(0, nobs), nobs, replace = True) _Y, _X = pd.DataFrame(Y).iloc[b_index], pd.DataFrame(X).iloc[b_index] - b_beta_hat = beta_lasso(np.array(_Y), _X, lamb) + b_beta_hat = beta_ridge(np.array(_Y), _X, lamb) # Saving coefficient estimates beta_hat_boots[b_iter, :] = b_beta_hat.squeeze() @@ -103,14 +103,14 @@ def lasso_fit(Y, X, lamb, n_iter=100, progress_disable = False): beta_hat_boot_SE = np.sqrt(beta_hat_boot_var) # Bootstrapped t-stats for null that coefficient = 0 - ## note that we use the coefficient estimates from the full sample + ## note that we use the coefficient estimates from the full sample ## but the variance from the bootstrapping procedure - beta_hat_boot_t = beta_lasso(Y, X, lamb) / beta_hat_boot_SE + beta_hat_boot_t = beta_ridge(Y, X, lamb) / beta_hat_boot_SE # Bootstrapped p values from t test (two-sided) beta_hat_boot_p = pd.Series(2 * (1- t.cdf(np.abs(beta_hat_boot_t), df = nobs - K)), beta_hat_boot_t.index) - return Results_wrap(model = print('Lasso, lambda =', lamb), + return Results_wrap(model = print('Lasso, lambda =', lamb), coefficients = beta_lasso(Y, X, lamb), bootstrap_coeffs = beta_hat_boots, bootstrap_coeffs_var = beta_hat_boot_var, @@ -140,7 +140,7 @@ class Results_wrap(object): this holds the summary tables and text, which can be printed or converted to various output formats. ''' - def __init__(self, model, coefficients, + def __init__(self, model, coefficients, cov_type='nonrobust', bootstrap_coeffs=None, bootstrap_coeffs_var=None, bootstrap_coeffs_SE=None, bootstrap_coeffs_t=None, bootstrap_coeffs_p=None): self.model = model self.coefficients = coefficients @@ -176,6 +176,6 @@ def loss(Y, X, lamb, betas): ------- loss : Computed loss """ - + loss = sse(Y, X, betas) + lamb * np.sum(np.abs(betas)) - return loss \ No newline at end of file + return loss From 05f9434bc621c4628404f6f1970f15579c479e78 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Sat, 7 Dec 2019 19:06:56 -0500 Subject: [PATCH 66/75] Corrected tests on OLS. R^2 and Coefficents work, but concerns about Coeffienct Covariance Matrix and Coefficient Standard Errors. --- .../project_2_packages/paddleboat/OLS/OLS.py | 12 ++- .../paddleboat/OLS/testing_OLS.py | 15 --- .../paddleboat/test/test_OLS.py | 95 ++++++++++++++++--- 3 files changed, 90 insertions(+), 32 deletions(-) delete mode 100644 Projects/project_2_packages/paddleboat/OLS/testing_OLS.py diff --git a/Projects/project_2_packages/paddleboat/OLS/OLS.py b/Projects/project_2_packages/paddleboat/OLS/OLS.py index 74a459e..55912d8 100644 --- a/Projects/project_2_packages/paddleboat/OLS/OLS.py +++ b/Projects/project_2_packages/paddleboat/OLS/OLS.py @@ -61,10 +61,11 @@ def Sigma(Y,X): Sigma : var-cov matrix as pandas df ''' e = resids(Y,X) - std_hat = e.dot(e.T)/(X.shape[0]-X.shape[1]) + std_hat = e.dot(e.T)/(X.shape[1]-1) Sigma = std_hat*np.linalg.inv(X.transpose().dot(X).values) return pd.DataFrame(Sigma) + def variance_ols(Y,X): ''' Estimate OLS variance-covariance matrix @@ -98,10 +99,10 @@ def r2_ols(Y, X): ''' y_hat = beta_ols(Y,X).dot(X.values.T) - y_bar = np.mean(y) + y_bar = np.mean(Y) SSR = np.sum((y_hat - y_bar)**2) - SST = np.sum((y - y_bar)**2) + SST = np.sum((Y - y_bar)**2) r2 = SSR / SST return r2 @@ -130,4 +131,7 @@ def least_sq(Y, X): print('R-Squared:', r2_ols(Y,X)) print('') print("Variance-Covariance Matrix:") - return Sigma(Y,X) + print(Sigma(Y,X)) + + + diff --git a/Projects/project_2_packages/paddleboat/OLS/testing_OLS.py b/Projects/project_2_packages/paddleboat/OLS/testing_OLS.py deleted file mode 100644 index 0940cd9..0000000 --- a/Projects/project_2_packages/paddleboat/OLS/testing_OLS.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sat Dec 7 10:02:42 2019 - -@author: caseymcquillan -""" - -import numpy as np -import pandas as pd -import math -import scipy -from OLS import least_sq - - diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index a94639c..c5f88aa 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -4,7 +4,7 @@ import numpy as np import transcripty as tpy import unittest -from OLS import beta_ols, Sigma +from OLS import beta_ols, Sigma, r2_ols, least_sq import statsmodels.api as sm import pandas as pd @@ -12,6 +12,7 @@ class TestHPM(unittest.TestCase): + def test_simple_b_ols(self): np.random.seed(60683) # testing different seed n = 50000 @@ -34,16 +35,49 @@ def test_simple_b_ols(self): independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) independent_vars = sm.add_constant(independent_vars) - dependent_vars = Y + dependent_var = Y # running model - coeff_estimates = beta_ols(dependent_vars, independent_vars) + coeff_estimates = beta_ols(dependent_var, independent_vars) self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) def test_Sigma(self): + np.random.seed(1784) # testing different seed + n = 50000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_var = Y + our_CovMatrix = Sigma(dependent_var, independent_vars) + + model = sm.OLS(dependent_var, independent_vars) + results = model.fit() + stats_CovMatrix = results.cov_params() + + print(our_CovMatrix) + print(stats_CovMatrix) + + + def test_CoefStdErrors(self): np.random.seed(1435) # testing different seed - n = 500000 + n = 50000 ## DGP ## means = [3, -1.5, 1.1, 2.3, -1, 3] cov = [ @@ -63,17 +97,52 @@ def test_Sigma(self): independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) independent_vars = sm.add_constant(independent_vars) - dependent_vars = Y - - + dependent_var = Y + our_CoefStdErrors = np.sqrt(np.diagonal(Sigma(dependent_var, independent_vars))) - # running model - covariance_matrix = Sigma(dependent_vars, independent_vars) - print(covariance_matrix) - test_kc = independent_vars.cov() - print(test_kc) + model = sm.OLS(dependent_var, independent_vars) + results = model.fit() + stats_CoefStdErrors = results.bse + + #self.assertIsNone(np.testing.assert_almost_equal(our_CoefStdErrors, stats_CoefStdErrors, decimal=1)) + + print(our_CoefStdErrors) + print(stats_CoefStdErrors) + + least_sq(dependent_var, independent_vars) + print(results.summary()) + + + def test_r2(self): + np.random.seed(17855) # testing different seed + n = 50000 + ## DGP ## + means = [3, -1.5, 1.1, 2.3, -1, 3] + cov = [ + [1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1]] + + ## Data Generation ## + X1, X2, X3, X4, Z1, Z2 = np.random.multivariate_normal(means, cov, n).T + epsilon = np.random.normal(0, 1, n) + + # True model + Y = 1.5 + 2.5*X1 + 2*X2 + 3*X3 + 6*X4 + epsilon + + independent_vars = pd.DataFrame({'X1' : X1, 'X2' : X2, 'X3' : X3, 'X4' : X4}) + independent_vars = sm.add_constant(independent_vars) + dependent_var = Y + our_r2 = r2_ols(dependent_var, independent_vars) + + model = sm.OLS(dependent_var, independent_vars) + results = model.fit() + stats_r2 = results.rsquared - #self.assertIsNone(np.testing.assert_almost_equal([1.5, 2.5, 2, 3, 6], coeff_estimates, decimal=1)) + self.assertIsNone(np.testing.assert_almost_equal(our_r2, stats_r2, decimal=5)) From e41095afcffbb0772bcac2870b2c08cc55d7d689 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Sun, 8 Dec 2019 08:51:36 -0500 Subject: [PATCH 67/75] Resolving merge conflict. --- Projects/project_2_packages/paddleboat/test/test_OLS.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index fd4e5d3..c5f88aa 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -1,4 +1,3 @@ -<<<<<<< HEAD import sys sys.path.insert(0,'../OLS') From 97be740894db5c12236182d5a54418df704a02ac Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 10:02:24 -0500 Subject: [PATCH 68/75] Add simulation folder/script --- .../paddleboat/simulation/.Rhistory | 0 .../paddleboat/simulation/morning.R | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100755 Projects/project_2_packages/paddleboat/simulation/.Rhistory create mode 100644 Projects/project_2_packages/paddleboat/simulation/morning.R diff --git a/Projects/project_2_packages/paddleboat/simulation/.Rhistory b/Projects/project_2_packages/paddleboat/simulation/.Rhistory new file mode 100755 index 0000000..e69de29 diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.R b/Projects/project_2_packages/paddleboat/simulation/morning.R new file mode 100644 index 0000000..81c953a --- /dev/null +++ b/Projects/project_2_packages/paddleboat/simulation/morning.R @@ -0,0 +1,18 @@ +#----------------------------------------------------------------------------------# +# Performs simulations from Sunday morning +# Authors: Harriet, Casey, Nadav, Joel + +# Notes: +# +#----------------------------------------------------------------------------------# + + +######################################################## +######################## Set-up ######################## +######################################################## + +# load libraries +packages <- c("dplyr", "data.table", "ggplot2", "tidyr") +new.packages <- packages[!(packages %in% installed.packages()[, "Package"])] +if(length(new.packages)) install.packages(new.packages) +lapply(packages, library, character.only = TRUE) From bde2d1042539a7d6c67f72a0f1c8fa245ba5de15 Mon Sep 17 00:00:00 2001 From: Casey McQuillan Date: Sun, 8 Dec 2019 10:05:50 -0500 Subject: [PATCH 69/75] Commenting out print statements. --- Projects/project_2_packages/paddleboat/test/test_OLS.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/test/test_OLS.py b/Projects/project_2_packages/paddleboat/test/test_OLS.py index c5f88aa..848319b 100644 --- a/Projects/project_2_packages/paddleboat/test/test_OLS.py +++ b/Projects/project_2_packages/paddleboat/test/test_OLS.py @@ -106,11 +106,11 @@ def test_CoefStdErrors(self): #self.assertIsNone(np.testing.assert_almost_equal(our_CoefStdErrors, stats_CoefStdErrors, decimal=1)) - print(our_CoefStdErrors) - print(stats_CoefStdErrors) + #print(our_CoefStdErrors) + #print(stats_CoefStdErrors) - least_sq(dependent_var, independent_vars) - print(results.summary()) + #least_sq(dependent_var, independent_vars) + #print(results.summary()) def test_r2(self): From f7fa641c94c8096c6ca219d592ab2b2bc421b52d Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 10:25:58 -0500 Subject: [PATCH 70/75] Pseuocode Jim's algo --- .../paddleboat/simulation/morning.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 Projects/project_2_packages/paddleboat/simulation/morning.py diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.py b/Projects/project_2_packages/paddleboat/simulation/morning.py new file mode 100644 index 0000000..5510ca3 --- /dev/null +++ b/Projects/project_2_packages/paddleboat/simulation/morning.py @@ -0,0 +1,17 @@ +import sys +sys.path.insert(0,'../OLS') + +import numpy as np +import unittest +from OLS import beta_ols, Sigma +import statsmodels.api as sm +import pandas as pd + +def simulation(X, Y): + beta_hat = norm(0,1) + sigma_hat = exp(norm(0,1)) + y_hat = norm(X * beta_hat, sigma_hat) + + fit = OLS.beta_ols(Y, X) + beta_model = OLS.betas(Y,X) + covariance_model = OLS.covariance(Y,X) From 87ac060272af45d0f4f38c93faae773c2b583ef6 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 10:33:13 -0500 Subject: [PATCH 71/75] Many simulations func --- .../paddleboat/simulation/morning.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.py b/Projects/project_2_packages/paddleboat/simulation/morning.py index 03e4f33..a4cbf53 100644 --- a/Projects/project_2_packages/paddleboat/simulation/morning.py +++ b/Projects/project_2_packages/paddleboat/simulation/morning.py @@ -7,6 +7,8 @@ import statsmodels.api as sm import pandas as pd +X = random_matrix + def simulation(X, Y): beta_hat = norm(0,1) sigma_hat = exp(norm(0,1)) @@ -17,3 +19,11 @@ def simulation(X, Y): covariance_model = OLS.sigma(Y,X) a = MVN(beta_model, covariance_model) + + return a + +def many_simulations(X, Y, sims=100): + u = vector(length = sims) + for s in sims: + simulation(X, Y) + u[s] += a From 8669829a601772df447d7d15e11c12844ed148ca Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 10:34:03 -0500 Subject: [PATCH 72/75] Give funcs docstrings --- .../project_2_packages/paddleboat/simulation/morning.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.py b/Projects/project_2_packages/paddleboat/simulation/morning.py index a4cbf53..85175f2 100644 --- a/Projects/project_2_packages/paddleboat/simulation/morning.py +++ b/Projects/project_2_packages/paddleboat/simulation/morning.py @@ -10,6 +10,9 @@ X = random_matrix def simulation(X, Y): + ''' + Draws single simulations + ''' beta_hat = norm(0,1) sigma_hat = exp(norm(0,1)) y_hat = norm(X * beta_hat, sigma_hat) @@ -23,6 +26,9 @@ def simulation(X, Y): return a def many_simulations(X, Y, sims=100): + ''' + Draws many simulations + ''' u = vector(length = sims) for s in sims: simulation(X, Y) From 2fd33aae47e23505128b74cb8f601cdf3e1dce67 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 11:31:04 -0500 Subject: [PATCH 73/75] All early functions --- .../paddleboat/simulation/morning.R | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.R b/Projects/project_2_packages/paddleboat/simulation/morning.R index 81c953a..ec582ad 100644 --- a/Projects/project_2_packages/paddleboat/simulation/morning.R +++ b/Projects/project_2_packages/paddleboat/simulation/morning.R @@ -16,3 +16,72 @@ packages <- c("dplyr", "data.table", "ggplot2", "tidyr") new.packages <- packages[!(packages %in% installed.packages()[, "Package"])] if(length(new.packages)) install.packages(new.packages) lapply(packages, library, character.only = TRUE) + + +######################################################## +####################### Functions ###################### +######################################################## + +alpha <- 3 +delta <- -2 +gamma <- 1 +psi <- 4 + +n <- 1000 + +price <- uniform(0, 1) + + +supply.shocks <- function(epsilon, n=n){ + eta <- rnorm(0, 1) + z <- epsilon + eta + + return(z) +} + + +demand.shocks <- function(epsilon, n=n){ + eta <- rnorm(0, 1) + z <- epsilon + eta + + return(z) +} + + +supply.function <- function(gamma, psi, price){ + epsilon <- rnorm(0, 1) + q <- gamma + psi * price + supply.shocks(epsilon) + + return(q) +} + + +demand.function <- function(alpha, gamma, price, n){ + epsilon <- rnorm(0, 1) + q <- alpha + gamma * price + demand.shocks(epsilon) + + return(q) +} + + +find.market.price <- function(price){ + q_d <- demand.function(alpha, delta, price, n) + q_s <- supply.function(gamma, psi, price, n) + + while (sum(abs(q_d - q_s)) > 0.05) { + p <- p + 0.1 * (q_d - q_s) + } + + return(p) +} + + +markets <- 50 +prices <- punif(0, 1, 50) +market_matrix <- data.frame() + +for (market in markets) { + find.market.price(price = prices[market]) +} + + From d33c6f9b94b76d08fc2876feade54afd9a618447 Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 11:33:47 -0500 Subject: [PATCH 74/75] Minor changes --- Projects/project_2_packages/paddleboat/simulation/morning.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.R b/Projects/project_2_packages/paddleboat/simulation/morning.R index ec582ad..faf09cc 100644 --- a/Projects/project_2_packages/paddleboat/simulation/morning.R +++ b/Projects/project_2_packages/paddleboat/simulation/morning.R @@ -78,7 +78,11 @@ find.market.price <- function(price){ markets <- 50 prices <- punif(0, 1, 50) -market_matrix <- data.frame() +market_matrix <- data.frame( + quantity = vector(markets), + prices = prices, + demand_shocks = demands.shocks(n = markets) +) for (market in markets) { find.market.price(price = prices[market]) From 752750570c812f57f2c93145d55e5ecbd2bbee4c Mon Sep 17 00:00:00 2001 From: joel-becker Date: Sun, 8 Dec 2019 11:58:35 -0500 Subject: [PATCH 75/75] Replace w Jim code --- .../paddleboat/simulation/morning.R | 81 ++++++------------- 1 file changed, 23 insertions(+), 58 deletions(-) diff --git a/Projects/project_2_packages/paddleboat/simulation/morning.R b/Projects/project_2_packages/paddleboat/simulation/morning.R index faf09cc..8d06e2e 100644 --- a/Projects/project_2_packages/paddleboat/simulation/morning.R +++ b/Projects/project_2_packages/paddleboat/simulation/morning.R @@ -22,70 +22,35 @@ lapply(packages, library, character.only = TRUE) ####################### Functions ###################### ######################################################## -alpha <- 3 -delta <- -2 -gamma <- 1 -psi <- 4 +Markets <- 50 -n <- 1000 - -price <- uniform(0, 1) - - -supply.shocks <- function(epsilon, n=n){ - eta <- rnorm(0, 1) - z <- epsilon + eta - - return(z) -} - - -demand.shocks <- function(epsilon, n=n){ - eta <- rnorm(0, 1) - z <- epsilon + eta - - return(z) -} - - -supply.function <- function(gamma, psi, price){ - epsilon <- rnorm(0, 1) - q <- gamma + psi * price + supply.shocks(epsilon) - - return(q) -} - - -demand.function <- function(alpha, gamma, price, n){ - epsilon <- rnorm(0, 1) - q <- alpha + gamma * price + demand.shocks(epsilon) - - return(q) +demand <- function(alpha, delta, price, demand_shock) { + if(delta > 0) { + stop("Demand curves slope downward, sorry AOC") + } + alpha + delta * price + demand_shock } - -find.market.price <- function(price){ - q_d <- demand.function(alpha, delta, price, n) - q_s <- supply.function(gamma, psi, price, n) - - while (sum(abs(q_d - q_s)) > 0.05) { - p <- p + 0.1 * (q_d - q_s) +supply <- function(gamma, psi, price, supply_shock) { + if(psi < 0) { + stop("Supply curves slope downward, sorry el presidente") } - - return(p) + gamma + psi * price + supply_shock } - -markets <- 50 -prices <- punif(0, 1, 50) -market_matrix <- data.frame( - quantity = vector(markets), - prices = prices, - demand_shocks = demands.shocks(n = markets) -) - -for (market in markets) { - find.market.price(price = prices[market]) +alpha <- rnorm(1, 15, 2) +delta <- -exp(rnorm(1, 0, 1)) +gamma <- rnorm(1, -2, 1) +psi <- exp(rnorm(1, 0, 1)) +xi_d <- rnorm(Markets) +xi_s <- rnorm(Markets) +supply_instrument <- xi_s + rnorm(Markets) +prices <- rep(0, Markets) + +while(sum(abs(demand(alpha, delta, prices, xi_d) - + supply(gamma, psi, prices, xi_s))) > 0.01) { + prices <- prices + 0.1*(demand(alpha, delta, prices, xi_d) - + supply(gamma, psi, prices, xi_s)) }