From 3426ae555be69a411f1a4c46074a66392493a278 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Thu, 23 May 2024 23:23:06 +0900 Subject: [PATCH 01/14] feat: Getting started with improving portfolio model class --- finance/ext/models.py | 150 +++++++++++++++++++++++++++++++++++ finance/ext/warehouse.py | 95 ---------------------- tests/ext/test_models.py | 7 ++ tests/samples/portfolio1.yml | 24 ++++++ 4 files changed, 181 insertions(+), 95 deletions(-) create mode 100644 finance/ext/models.py create mode 100644 tests/ext/test_models.py create mode 100644 tests/samples/portfolio1.yml diff --git a/finance/ext/models.py b/finance/ext/models.py new file mode 100644 index 0000000..c69fd0f --- /dev/null +++ b/finance/ext/models.py @@ -0,0 +1,150 @@ +from datetime import datetime + +import yaml + +from finance.utils import date_to_datetime + +from typing import List + + +class Portfolio: + class Transaction: + def __init__(self, date: datetime, ticker: str, quantity: float): + self.date = date + self.ticker = ticker + self.quantity = quantity + + # NOTE: I'd like to mark the return type as List[Transaction], but it + # complains that Transaction is not defined... + @classmethod + def load_transactions(cls, yaml_data: List[dict]): + return [cls(d["date"], d["ticker"], d["quantity"]) for d in yaml_data] + + # TODO: Get rid of dependencies on DataFrame + def __init__( + self, + inventory: dict, + current_prices: dict, + target_weights: dict, + transactions: List[Transaction], + ): + self.inventory = {} # ticker: quantity + self.current_prices = current_prices # ticker: price + self.target_weights = self.normalize_weights(target_weights) # ticker: weight + self.transactions = transactions + + @property + def asset_values(self): + return {t: self.current_prices[t] * q for t, q in self.inventory.items()} + + @property + def net_asset_value(self): + return sum(self.asset_values.values()) + + @property + def current_weights(self): + """Calculate the weights of the current holdings based on the current + price.""" + nav = self.net_asset_value + return {t: v / nav for t, v in self.asset_values.items()} + + def evaluate_inventory(self, evaluated_at=datetime.utcnow()) -> dict: + self.inventory = {} + for record in self.transactions: + if date_to_datetime(record.date) <= evaluated_at: + self.apply_transaction(record) + return self.inventory + + def apply_transaction(self, record: Transaction): + """Reflects the given transaction record to the inventory.""" + self.inventory.setdefault(record.ticker, 0) + self.inventory[record.ticker] += record.quantity + + @classmethod + def load_from_file(cls, path: str, current_prices: dict): + """Loads `inventory` and `target_weights` from a YAML file. + + :param current_prices: This must be injected from outside the class. + """ + with open(path) as fin: + content = yaml.safe_load(fin) + portfolio = content["portfolio"] + transactions = cls.Transaction.load_transactions(content["transactions"]) + return Portfolio( + {}, + current_prices, + portfolio["target_weights"], + transactions, + ) + + def normalize_weights(self, weights: dict): + net_weight = sum(weights.values()) + return {t: v / net_weight for t, v in weights.items()} + + def calc_diff(self): + """Calculate the difference between the target weights and the current + ones.""" + cw = self.current_weights + tw = self.target_weights + all_keys = set(list(cw.keys()) + list(tw.keys())) + + def diff(t, cw, tw): + cw.setdefault(t, 0) + tw.setdefault(t, 0) + return cw[t] - tw[t] + + return {t: diff(t, cw, tw) for t in all_keys} + + # TODO: Incorporate tax and fees + def make_rebalancing_plan(self): + """ + Negative diff means we're short of that asset, so we need to buy more; + whereas positive diff means we need to sell some. + Positive values in rebalance plans means the quantity of the asset to + be purchased. + """ + nav = self.net_asset_value + diff = self.calc_diff() + + def plan(t, diff): + return round((nav * -diff[t]) / self.current_prices[t]) + + return {t: plan(t, diff) for t in diff if t != "_USD"} + + # TODO: Tax on dividends? + # TODO: Transaction fees? + def apply_plan( + self, plan: dict, start_dt: datetime, end_dt: datetime, dividend_records: dict + ): + def apply(t, q): + self.inventory.setdefault(t, 0) + while self.inventory["_USD"] - self.current_prices[t] * q < 0: + if q > 0: + q -= 1 + else: + q += 1 + self.inventory["_USD"] -= self.current_prices[t] * q + if self.inventory["_USD"] < 0: + raise ValueError(f"USD balance cannot be negative: {t}, {q}") + return self.inventory[t] + q + + # 'close' is actually 'adj close', which already includes + # dividends/stock split/capital gains + # self.inventory["_USD"] += self.calc_dividends_sum(start_dt, end_dt, dividend_records) * 0.85 + self.inventory = {t: apply(t, q) for t, q in plan.items()} | { + "_USD": self.inventory["_USD"] + } + return self.inventory + + def calc_dividends_sum( + self, start_dt: datetime, end_dt: datetime, dividend_records: dict + ) -> float: + div_sum = 0.0 + for t, q in self.inventory.items(): + if t in dividend_records: + for div_dt, div_amount in dividend_records[t]: + if start_dt <= div_dt < end_dt: + if q < 0: + raise ValueError(f"Quantity cannot be negative: {t}, {q}") + div_sum += div_amount * q + return div_sum diff --git a/finance/ext/warehouse.py b/finance/ext/warehouse.py index 60828a1..5544d4c 100644 --- a/finance/ext/warehouse.py +++ b/finance/ext/warehouse.py @@ -269,98 +269,3 @@ def map_sector_indices( sector_values = (tickers[i]["sector"][0] for i in combination_indices) return [sector_index_map[s] for s in sector_values] # return [sectors.index(s) for s in sector_values] - - -class Portfolio: - # TODO: Get rid of dependencies on DataFrame - def __init__( - self, - inventory: dict, - current_prices: dict, - target_weights: dict, - ): - self.inventory = inventory # ticker: quantity - self.current_prices = current_prices # ticker: price - self.target_weights = self.normalize_weights(target_weights) # ticker: weight - - @property - def asset_values(self): - return {t: self.current_prices[t] * q for t, q in self.inventory.items()} - - @property - def net_asset_value(self): - return sum(self.asset_values.values()) - - @property - def current_weights(self): - """Calculate the weights of the current holdings based on the current price.""" - nav = self.net_asset_value - return {t: v / nav for t, v in self.asset_values.items()} - - def normalize_weights(self, weights: dict): - net_weight = sum(weights.values()) - return {t: v / net_weight for t, v in weights.items()} - - def calc_diff(self): - """Calculate the difference between the target weights and the current ones.""" - cw = self.current_weights - tw = self.target_weights - all_keys = set(list(cw.keys()) + list(tw.keys())) - - def diff(t, cw, tw): - cw.setdefault(t, 0) - tw.setdefault(t, 0) - return cw[t] - tw[t] - - return {t: diff(t, cw, tw) for t in all_keys} - - # TODO: Incorporate tax and fees - def make_rebalancing_plan(self): - """ - Negative diff means we're short of that asset, so we need to buy more; whereas positive diff means we need to sell some. - Positive values in rebalance plans means the quantity of the asset to be purchased. - """ - nav = self.net_asset_value - diff = self.calc_diff() - - def plan(t, diff): - return round((nav * -diff[t]) / self.current_prices[t]) - - return {t: plan(t, diff) for t in diff if t != "_USD"} - - # TODO: Tax on dividends? - # TODO: Transaction fees? - def apply_plan( - self, plan: dict, start_dt: datetime, end_dt: datetime, dividend_records: dict - ): - def apply(t, q): - self.inventory.setdefault(t, 0) - while self.inventory["_USD"] - self.current_prices[t] * q < 0: - if q > 0: - q -= 1 - else: - q += 1 - self.inventory["_USD"] -= self.current_prices[t] * q - if self.inventory["_USD"] < 0: - raise ValueError(f"USD balance cannot be negative: {t}, {q}") - return self.inventory[t] + q - - # 'close' is actually 'adj close', which already includes dividends/stock split/capital gains - # self.inventory["_USD"] += self.calc_dividends_sum(start_dt, end_dt, dividend_records) * 0.85 - self.inventory = {t: apply(t, q) for t, q in plan.items()} | { - "_USD": self.inventory["_USD"] - } - return self.inventory - - def calc_dividends_sum( - self, start_dt: datetime, end_dt: datetime, dividend_records: dict - ) -> float: - div_sum = 0.0 - for t, q in self.inventory.items(): - if t in dividend_records: - for div_dt, div_amount in dividend_records[t]: - if start_dt <= div_dt < end_dt: - if q < 0: - raise ValueError(f"Quantity cannot be negative: {t}, {q}") - div_sum += div_amount * q - return div_sum diff --git a/tests/ext/test_models.py b/tests/ext/test_models.py new file mode 100644 index 0000000..209abba --- /dev/null +++ b/tests/ext/test_models.py @@ -0,0 +1,7 @@ +from finance.ext.models import Portfolio + + +def test_portfolio(): + pf = Portfolio.load_from_file("tests/samples/portfolio1.yml", {}) + pf.evaluate_inventory() + assert pf.inventory == {"QQQ": 6, "SCHD": 70, "SCHH": 10, "TLT": 15} diff --git a/tests/samples/portfolio1.yml b/tests/samples/portfolio1.yml new file mode 100644 index 0000000..61f97d3 --- /dev/null +++ b/tests/samples/portfolio1.yml @@ -0,0 +1,24 @@ +portfolio: + inventory: + QQQ: 6 + SCHD: 70 + SCHH: 10 + TLT: 15 + target_weights: + QQQ: 2 + SCHD: 6 + SCHH: 1 + TLT: 1 +transactions: + - date: 2020-01-03 + ticker: QQQ + quantity: 6 + - date: 2020-01-03 + ticker: SCHD + quantity: 70 + - date: 2020-01-03 + ticker: SCHH + quantity: 10 + - date: 2020-01-03 + ticker: TLT + quantity: 15 From 1560b0d58fa6255ce9ddaec18eec8e9c2c06fe2a Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Thu, 23 May 2024 23:30:47 +0900 Subject: [PATCH 02/14] refactor: Remove unused code --- finance/ext/models.py | 4 +--- finance/utils.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index c69fd0f..f4bf5c1 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -23,12 +23,11 @@ def load_transactions(cls, yaml_data: List[dict]): # TODO: Get rid of dependencies on DataFrame def __init__( self, - inventory: dict, current_prices: dict, target_weights: dict, transactions: List[Transaction], ): - self.inventory = {} # ticker: quantity + self.inventory: dict[str, float] = {} # ticker: quantity self.current_prices = current_prices # ticker: price self.target_weights = self.normalize_weights(target_weights) # ticker: weight self.transactions = transactions @@ -71,7 +70,6 @@ def load_from_file(cls, path: str, current_prices: dict): portfolio = content["portfolio"] transactions = cls.Transaction.load_transactions(content["transactions"]) return Portfolio( - {}, current_prices, portfolio["target_weights"], transactions, diff --git a/finance/utils.py b/finance/utils.py index 5fc78db..8cc6bff 100644 --- a/finance/utils.py +++ b/finance/utils.py @@ -4,7 +4,6 @@ from math import nan as math_nan import os -import boto3 from logbook import Logger # NOTE: finance.models should not be imported here in order to avoid circular From 7482ba65200d1fa1fad6b7f9d2809adf66169de8 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Thu, 23 May 2024 23:50:40 +0900 Subject: [PATCH 03/14] fix: Broken test cases --- tests/ext/test_ext_models.py | 119 +++++++++++++++++++++++++++++++++++ tests/ext/test_models.py | 7 --- tests/ext/test_warehouse.py | 111 +------------------------------- 3 files changed, 120 insertions(+), 117 deletions(-) create mode 100644 tests/ext/test_ext_models.py delete mode 100644 tests/ext/test_models.py diff --git a/tests/ext/test_ext_models.py b/tests/ext/test_ext_models.py new file mode 100644 index 0000000..f6bbb1e --- /dev/null +++ b/tests/ext/test_ext_models.py @@ -0,0 +1,119 @@ +import pytest + +from finance.ext.models import Portfolio + + +def assert_equals_dict_of_float(d1: dict, d2: dict): + assert d1.keys() == d2.keys() + for k in d1.keys(): + assert abs(d1[k] - d2[k]) < 1e-6 + + +def test_portfolio(): + pf = Portfolio.load_from_file("tests/samples/portfolio1.yml", {}) + pf.evaluate_inventory() + assert pf.inventory == {"QQQ": 6, "SCHD": 70, "SCHH": 10, "TLT": 15} + + +# TODO: Diversify test scenarios +inventory = { + "SPY": 50, + "TLT": 45, + "ARKW": 500, + "REMX": 100, + "_USD": 10000, +} +current_prices = { + "SPY": 436.04, + "TLT": 86.2, + "ARKW": 52.64, + "REMX": 63.25, + "GDX": 28.95, + "_USD": 1, +} +target = { + "SPY": 4, + "TLT": 3, + "GDX": 2, + "ARKW": 1, + "_USD": 1, +} +p1 = Portfolio(current_prices, target, []) +p1.inventory = inventory + + +def test_portfolio_asset_values(): + assert_equals_dict_of_float( + p1.asset_values, + { + "SPY": 21802.0, + "TLT": 3879.0, + "ARKW": 26320.0, + "REMX": 6325.0, + "_USD": 10000, + }, + ) + + +def test_portfolio_net_asset_value(): + assert p1.net_asset_value == 68326 + + +def test_portfolio_current_weights(): + assert_equals_dict_of_float( + p1.current_weights, + { + "ARKW": 0.3852120715393847, + "REMX": 0.09257091004888329, + "SPY": 0.3190879021163247, + "TLT": 0.056771946257647164, + "_USD": 0.14635717003776014, + }, + ) + + +def test_portfolio_calc_diff(): + assert_equals_dict_of_float( + p1.calc_diff(), + { + "SPY": -0.04454846152003894, + "ARKW": 0.2943029806302938, + "TLT": -0.21595532646962554, + "GDX": -0.18181818181818182, + "REMX": 0.09257091004888329, + "_USD": 0.055448079128669225, + }, + ) + + +@pytest.mark.skip +def test_portfolio_make_rebalancing_plan(): + assert_equals_dict_of_float( + p1.make_rebalancing_plan(), + { + "SPY": 6, + "ARKW": -382, + "TLT": 171, + "GDX": 429, + "REMX": -100, + }, + ) + + +@pytest.mark.skip +def test_portfolio_apply_plan(): + plan = p1.make_rebalancing_plan() + p1.apply_plan(plan, parse_dt("2023-01-03"), parse_dt("2023-01-05"), {}) + assert_equals_dict_of_float( + p1.inventory, + { + "SPY": 56, + "TLT": 216, + "ARKW": 118, + "REMX": 0, + "GDX": 429, + "_USD": 6657.49, + }, + ) + expected_usd = (target["_USD"] / sum(target.values())) * p1.net_asset_value + assert 0.9 < p1.inventory["_USD"] / expected_usd < 1.1 diff --git a/tests/ext/test_models.py b/tests/ext/test_models.py deleted file mode 100644 index 209abba..0000000 --- a/tests/ext/test_models.py +++ /dev/null @@ -1,7 +0,0 @@ -from finance.ext.models import Portfolio - - -def test_portfolio(): - pf = Portfolio.load_from_file("tests/samples/portfolio1.yml", {}) - pf.evaluate_inventory() - assert pf.inventory == {"QQQ": 6, "SCHD": 70, "SCHH": 10, "TLT": 15} diff --git a/tests/ext/test_warehouse.py b/tests/ext/test_warehouse.py index 17c6f31..0bd6828 100644 --- a/tests/ext/test_warehouse.py +++ b/tests/ext/test_warehouse.py @@ -3,13 +3,7 @@ import pytest -from finance.ext.warehouse import Portfolio, make_combination_indices - - -def assert_equals_dict_of_float(d1: dict, d2: dict): - assert d1.keys() == d2.keys() - for k in d1.keys(): - assert abs(d1[k] - d2[k]) < 1e-6 +from finance.ext.warehouse import make_combination_indices @pytest.mark.parametrize( @@ -36,106 +30,3 @@ def parse_dt(str_dt, tz=pytz.timezone("America/New_York")): return datetime.strptime(str_dt + " 00:00:00", "%Y-%m-%d %H:%M:%S").replace( tzinfo=tz ) - - -# TODO: Diversify test scenarios -inventory = { - "SPY": 50, - "TLT": 45, - "ARKW": 500, - "REMX": 100, - "_USD": 10000, -} -current_prices = { - "SPY": 436.04, - "TLT": 86.2, - "ARKW": 52.64, - "REMX": 63.25, - "GDX": 28.95, - "_USD": 1, -} -target = { - "SPY": 4, - "TLT": 3, - "GDX": 2, - "ARKW": 1, - "_USD": 1, -} -p1 = Portfolio(inventory, current_prices, target) - - -def test_portfolio_asset_values(): - assert_equals_dict_of_float( - p1.asset_values, - { - "SPY": 21802.0, - "TLT": 3879.0, - "ARKW": 26320.0, - "REMX": 6325.0, - "_USD": 10000, - }, - ) - - -def test_portfolio_net_asset_value(): - assert p1.net_asset_value == 68326 - - -def test_portfolio_current_weights(): - assert_equals_dict_of_float( - p1.current_weights, - { - "ARKW": 0.3852120715393847, - "REMX": 0.09257091004888329, - "SPY": 0.3190879021163247, - "TLT": 0.056771946257647164, - "_USD": 0.14635717003776014, - }, - ) - - -def test_portfolio_calc_diff(): - assert_equals_dict_of_float( - p1.calc_diff(), - { - "SPY": -0.04454846152003894, - "ARKW": 0.2943029806302938, - "TLT": -0.21595532646962554, - "GDX": -0.18181818181818182, - "REMX": 0.09257091004888329, - "_USD": 0.055448079128669225, - }, - ) - - -@pytest.mark.skip -def test_portfolio_make_rebalancing_plan(): - assert_equals_dict_of_float( - p1.make_rebalancing_plan(), - { - "SPY": 6, - "ARKW": -382, - "TLT": 171, - "GDX": 429, - "REMX": -100, - }, - ) - - -@pytest.mark.skip -def test_portfolio_apply_plan(): - plan = p1.make_rebalancing_plan() - p1.apply_plan(plan, parse_dt("2023-01-03"), parse_dt("2023-01-05"), {}) - assert_equals_dict_of_float( - p1.inventory, - { - "SPY": 56, - "TLT": 216, - "ARKW": 118, - "REMX": 0, - "GDX": 429, - "_USD": 6657.49, - }, - ) - expected_usd = (target["_USD"] / sum(target.values())) * p1.net_asset_value - assert 0.9 < p1.inventory["_USD"] / expected_usd < 1.1 From 162783a03764f1d03d94dc2bfa631468942af4b7 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Thu, 23 May 2024 23:54:23 +0900 Subject: [PATCH 04/14] build: Install PyYAML package --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index fe0cb71..c51bc89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ yfinance>=0.2,<0.3 rich>=13.6,<14.0 polars>=0.19.0 lxml>=4.6.0 +PyYAML>=6.0,<6.1 From 27122d6f213e65018abc2e6028da0b7e056bdfed Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Sat, 1 Jun 2024 19:53:32 +0900 Subject: [PATCH 05/14] feat: Evaluate daily NAVs --- finance/ext/models.py | 61 +++++++++++++++++++++++++++++++++++++++---- finance/utils.py | 8 ++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index f4bf5c1..7365dcc 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -1,8 +1,10 @@ -from datetime import datetime +from datetime import datetime, timedelta +import pytz +import pandas as pd import yaml -from finance.utils import date_to_datetime +from finance.utils import date_to_datetime, make_dates from typing import List @@ -18,7 +20,14 @@ def __init__(self, date: datetime, ticker: str, quantity: float): # complains that Transaction is not defined... @classmethod def load_transactions(cls, yaml_data: List[dict]): - return [cls(d["date"], d["ticker"], d["quantity"]) for d in yaml_data] + return [ + cls( + date_to_datetime(d["date"]).replace(tzinfo=pytz.utc), + d["ticker"], + d["quantity"], + ) + for d in yaml_data + ] # TODO: Get rid of dependencies on DataFrame def __init__( @@ -47,10 +56,10 @@ def current_weights(self): nav = self.net_asset_value return {t: v / nav for t, v in self.asset_values.items()} - def evaluate_inventory(self, evaluated_at=datetime.utcnow()) -> dict: + def eval_inventory(self, evaluated_at=datetime.utcnow()) -> dict: self.inventory = {} for record in self.transactions: - if date_to_datetime(record.date) <= evaluated_at: + if record.date <= evaluated_at: self.apply_transaction(record) return self.inventory @@ -59,6 +68,48 @@ def apply_transaction(self, record: Transaction): self.inventory.setdefault(record.ticker, 0) self.inventory[record.ticker] += record.quantity + def eval_daily_inventories(self, from_date: datetime, to_date: datetime): + """ + :param from_date: A timezone aware datetime markig the lower bound (inclusive) + :param to_date: A timezone aware datetime marking the upper bound (exclusive) + """ + for date in make_dates(from_date, to_date): + yield self.eval_inventory(date) + date += timedelta(days=1) + + def eval_daily_nav( + self, from_date: datetime, to_date: datetime, historical: pd.DataFrame + ): + """ + :param from_date: A timezone aware datetime markig the lower bound (inclusive) + :param to_date: A timezone aware datetime marking the upper bound (exclusive) + """ + historical = historical[ + (historical.date >= from_date) & (historical.date < to_date) + ] + dates: pd.Series = historical.groupby("date").head(1).date + + partial = {} + for ( + t, + q, + ) in self.inventory.items(): + partial[t] = ( + historical[historical.symbol == t][["date", "close"]] + .set_index("date") + .rename(columns={"close": t}) + ) + + # TODO: Consider cases where quantites and inventories change as time passes + import pdb + + pdb.set_trace() + + pass + + def eval_nav(self, date: datetime, historical: pd.DataFrame): + return 0 + @classmethod def load_from_file(cls, path: str, current_prices: dict): """Loads `inventory` and `target_weights` from a YAML file. diff --git a/finance/utils.py b/finance/utils.py index 8cc6bff..9aec161 100644 --- a/finance/utils.py +++ b/finance/utils.py @@ -92,6 +92,14 @@ def load_stock_codes(fin): yield code, name +def make_dates(from_date: datetime, to_date: datetime): + """Returns a generator of consecutive dates""" + cursor = from_date + while cursor < to_date: + yield cursor + cursor += timedelta(days=1) + + def make_request_import_stock_values_message(code, start_time, end_time): # type: (str, datetime, datetime) -> dict return { From bb6081bfe51f9980b2dea7e792d60d56562f9f53 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Sat, 1 Jun 2024 20:09:03 +0900 Subject: [PATCH 06/14] test: Fix broken test cases --- finance/ext/models.py | 11 ++--------- tests/ext/test_ext_models.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index 7365dcc..67d55ef 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, UTC import pytz import pandas as pd @@ -56,7 +56,7 @@ def current_weights(self): nav = self.net_asset_value return {t: v / nav for t, v in self.asset_values.items()} - def eval_inventory(self, evaluated_at=datetime.utcnow()) -> dict: + def eval_inventory(self, evaluated_at=datetime.now(UTC)) -> dict: self.inventory = {} for record in self.transactions: if record.date <= evaluated_at: @@ -100,13 +100,6 @@ def eval_daily_nav( .rename(columns={"close": t}) ) - # TODO: Consider cases where quantites and inventories change as time passes - import pdb - - pdb.set_trace() - - pass - def eval_nav(self, date: datetime, historical: pd.DataFrame): return 0 diff --git a/tests/ext/test_ext_models.py b/tests/ext/test_ext_models.py index f6bbb1e..368eea7 100644 --- a/tests/ext/test_ext_models.py +++ b/tests/ext/test_ext_models.py @@ -1,3 +1,4 @@ +import pytz import pytest from finance.ext.models import Portfolio @@ -11,9 +12,17 @@ def assert_equals_dict_of_float(d1: dict, d2: dict): def test_portfolio(): pf = Portfolio.load_from_file("tests/samples/portfolio1.yml", {}) - pf.evaluate_inventory() + pf.eval_inventory() assert pf.inventory == {"QQQ": 6, "SCHD": 70, "SCHH": 10, "TLT": 15} + import pandas as pd + from finance.utils import parse_date + + historical = pd.read_parquet("notebooks/historical/US.parquet") + from_date = parse_date("2023-10-01").replace(tzinfo=pytz.utc) + to_date = parse_date("2023-10-30").replace(tzinfo=pytz.utc) + pf.eval_daily_nav(from_date, to_date, historical) + # TODO: Diversify test scenarios inventory = { From 7dea6584665c37ae21f67199c3d1ee4f9c5b4fdd Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Sat, 1 Jun 2024 20:09:34 +0900 Subject: [PATCH 07/14] test: Revise sample data for test --- tests/samples/portfolio1.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/samples/portfolio1.yml b/tests/samples/portfolio1.yml index 61f97d3..6749cc7 100644 --- a/tests/samples/portfolio1.yml +++ b/tests/samples/portfolio1.yml @@ -21,4 +21,7 @@ transactions: quantity: 10 - date: 2020-01-03 ticker: TLT - quantity: 15 + quantity: 16 + - date: 2023-10-15 + ticker: TLT + quantity: -1 From d7a77264639bea44fc726dc0181d1f8f4b698441 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Tue, 4 Jun 2024 23:17:33 +0900 Subject: [PATCH 08/14] refactor: Use timezone.utc instead of datetime.UTC --- finance/ext/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index 67d55ef..0fb0ba6 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta, UTC +from datetime import datetime, timedelta, timezone import pytz import pandas as pd @@ -56,7 +56,7 @@ def current_weights(self): nav = self.net_asset_value return {t: v / nav for t, v in self.asset_values.items()} - def eval_inventory(self, evaluated_at=datetime.now(UTC)) -> dict: + def eval_inventory(self, evaluated_at=datetime.now(timezone.utc)) -> dict: self.inventory = {} for record in self.transactions: if record.date <= evaluated_at: From 42d21fbce18d84c4a3b011d8b881fc33fcba3f13 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Tue, 4 Jun 2024 23:31:54 +0900 Subject: [PATCH 09/14] test: Skip if data file does not exist --- tests/ext/test_ext_models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ext/test_ext_models.py b/tests/ext/test_ext_models.py index 368eea7..11b38d6 100644 --- a/tests/ext/test_ext_models.py +++ b/tests/ext/test_ext_models.py @@ -1,3 +1,4 @@ +import os import pytz import pytest @@ -10,6 +11,10 @@ def assert_equals_dict_of_float(d1: dict, d2: dict): assert abs(d1[k] - d2[k]) < 1e-6 +@pytest.mark.skipif( + not os.path.exists("notebooks/historical/US.parquet"), + reason="Data file does not exist", +) def test_portfolio(): pf = Portfolio.load_from_file("tests/samples/portfolio1.yml", {}) pf.eval_inventory() From 9d507fac378f47a436f53316bb3745f4cd78c037 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Wed, 5 Jun 2024 00:51:43 +0900 Subject: [PATCH 10/14] feat: Evaluate daily NAVs --- finance/ext/models.py | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index 0fb0ba6..8f8aca4 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -74,32 +74,50 @@ def eval_daily_inventories(self, from_date: datetime, to_date: datetime): :param to_date: A timezone aware datetime marking the upper bound (exclusive) """ for date in make_dates(from_date, to_date): - yield self.eval_inventory(date) + yield date, self.eval_inventory(date) date += timedelta(days=1) def eval_daily_nav( self, from_date: datetime, to_date: datetime, historical: pd.DataFrame ): - """ + """Evaluate daily NAVs to make a DataFrame that looks like the following: + + ticker1 | quantity | ticker2 | quantity | ... + date1 | price1 | quantity1 | price2 | quantity2 | ... + date2 | price1 | quantity1 | price2 | quantity2 | ... + :param from_date: A timezone aware datetime markig the lower bound (inclusive) :param to_date: A timezone aware datetime marking the upper bound (exclusive) """ historical = historical[ (historical.date >= from_date) & (historical.date < to_date) ] - dates: pd.Series = historical.groupby("date").head(1).date - - partial = {} - for ( - t, - q, - ) in self.inventory.items(): - partial[t] = ( - historical[historical.symbol == t][["date", "close"]] + # dates: pd.Series = historical.groupby("date").head(1).date + daily_inventories = { + date.strftime("%Y%m%d"): inventory + for date, inventory in pf.eval_daily_inventories(from_date, to_date) + } + + all_tickers = set() + for inventory in daily_inventories.values(): + all_tickers.update(inventory.keys()) + + daily_prices = {} + for ticker in all_tickers: + daily_prices[ticker] = historical[historical.symbol == ticker][ + ["date", "close"] + ] + daily_prices[ticker][f"{ticker}_quantity"] = daily_prices[ticker].apply( + lambda x: daily_inventories[x.date.strftime("%Y%m%d")][ticker], axis=1 + ) + daily_prices[ticker] = ( + daily_prices[ticker] .set_index("date") - .rename(columns={"close": t}) + .rename(columns={"close": f"{ticker}_close"}) ) + return daily_prices + def eval_nav(self, date: datetime, historical: pd.DataFrame): return 0 From 929a7f445f434aa0c4190ff632f5b5b37eb06471 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Tue, 30 Sep 2025 22:26:12 +0900 Subject: [PATCH 11/14] feat: Filter out invalid tickers --- finance/__main__.py | 2 +- finance/ext/warehouse.py | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/finance/__main__.py b/finance/__main__.py index 53c6e9e..c0247f5 100644 --- a/finance/__main__.py +++ b/finance/__main__.py @@ -46,7 +46,7 @@ def refresh_tickers( from finance.ext.warehouse import refresh_tickers_and_historical_data tickers = pd.read_parquet(tickers_source) - tickers = tickers[tickers.status != "delisted"] + tickers = tickers[(tickers.status != "delisted") & (tickers.status != "invalid")] if strategy == "all": symbols_ = tickers["symbol"].to_list() elif strategy == "oldest": diff --git a/finance/ext/warehouse.py b/finance/ext/warehouse.py index 5544d4c..d987171 100644 --- a/finance/ext/warehouse.py +++ b/finance/ext/warehouse.py @@ -22,6 +22,10 @@ log = Logger(__file__) +class InvalidDataException(Exception): + pass + + def concat_dataframes( df1: pd.DataFrame, df2: pd.DataFrame, @@ -72,9 +76,14 @@ def preprocess_profile(profile: dict, symbol: str, region: str, updated_at: date profile["long_business_summary"] = profile.pop("longBusinessSummary") if "close" not in profile: - profile["close"] = profile[ - "previousClose" - ] # Not sure if these two are the same + if "previousClose" in profile: + profile["close"] = profile[ + "previousClose" + ] # Not sure if these two are the same + else: + raise InvalidDataException( + f"{symbol} is missing 'close' and 'previousClose'. Could be delisted." + ) if profile["quote_type"] == "ETF": profile["market_cap"] = profile.pop("totalAssets") else: @@ -173,6 +182,12 @@ def refresh_tickers_and_historical_data( # TODO: Define enum instead of using string literals tickers.loc[row_indexer, "status"] = "delisted" tickers.to_parquet(tickers_target_path) + except InvalidDataException as e: + log.warn(f"{e}") + row_indexer = tickers.symbol == symbol + tickers.loc[row_indexer, "updated_at"] = datetime.utcnow() + tickers.loc[row_indexer, "status"] = "invalid" + tickers.to_parquet(tickers_target_path) except Exception as e: log.warn(f"{symbol}: {e}") with open(skip_marker_path, "w") as fout: From 338c00926e9dd53955b4e734b15d99919daf9e43 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Thu, 6 Nov 2025 14:48:16 +0900 Subject: [PATCH 12/14] build: Resolve version conflict --- finance/ext/models.py | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index 8f8aca4..d1a0de1 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -95,7 +95,7 @@ def eval_daily_nav( # dates: pd.Series = historical.groupby("date").head(1).date daily_inventories = { date.strftime("%Y%m%d"): inventory - for date, inventory in pf.eval_daily_inventories(from_date, to_date) + for date, inventory in self.eval_daily_inventories(from_date, to_date) } all_tickers = set() diff --git a/requirements.txt b/requirements.txt index c51bc89..89aaa21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ rich>=13.6,<14.0 polars>=0.19.0 lxml>=4.6.0 PyYAML>=6.0,<6.1 +urllib3>=1.25.4,<1.27 From a21e3c4f49976044bb64c6590337247dc9b4faf5 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Sat, 8 Nov 2025 00:03:48 +0900 Subject: [PATCH 13/14] feat: Subcommands --- README.rst | 69 ++++++++++++- finance/__main__.py | 176 ++++++++++++++++++++++++-------- finance/ext/profile/__init__.py | 1 + requirements.txt | 1 + 4 files changed, 203 insertions(+), 44 deletions(-) diff --git a/README.rst b/README.rst index 460dca6..5a6e3e8 100644 --- a/README.rst +++ b/README.rst @@ -87,7 +87,74 @@ Daily Net Asset Values Usage ----- -(TODO: 사용법 계속 채워넣기) +Refresh Tickers and Historical Data +************************************ + +The ``refresh-tickers`` command downloads daily price data for tickers and updates +both the ticker metadata and historical price data. It has four subcommands for +different use cases: + +**Refresh Specific Symbols** (``static``) + +Download data for specific ticker symbols: + +.. code:: + + finance refresh-tickers static SPY IVI QQQ --output US.parquet + +Options: + +- ``--output, -o``: Output parquet file for historical data (required) +- ``--tickers``: Input/output tickers parquet file (default: ``tickers.parquet``) +- ``--staging-dir``: Staging directory for intermediate files (default: ``.``) +- ``--region, -r``: Region code, e.g., US, KR (default: ``US``) + +**Refresh Oldest Tickers** (``oldest``) + +Refresh the N most stale tickers based on the ``updated_at`` timestamp: + +.. code:: + + finance refresh-tickers oldest --count 30 --output US.parquet + +Options: + +- ``--count, -n``: Number of oldest tickers to refresh (default: 25) +- ``--output, -o``: Output parquet file for historical data (required) +- ``--tickers``: Input/output tickers parquet file (default: ``tickers.parquet``) +- ``--staging-dir``: Staging directory for intermediate files (default: ``.``) +- ``--region, -r``: Region code (default: ``US``) + +**Refresh Random Sample** (``random``) + +Refresh a random sample of tickers: + +.. code:: + + finance refresh-tickers random --count 50 --output US.parquet + +Options: + +- ``--count, -n``: Number of random tickers to sample (default: 25) +- ``--output, -o``: Output parquet file for historical data (required) +- ``--tickers``: Input/output tickers parquet file (default: ``tickers.parquet``) +- ``--staging-dir``: Staging directory for intermediate files (default: ``.``) +- ``--region, -r``: Region code (default: ``US``) + +**Refresh All Tickers** (``all``) + +Refresh all tickers in the database (use with caution): + +.. code:: + + finance refresh-tickers all --output US.parquet + +Options: + +- ``--output, -o``: Output parquet file for historical data (required) +- ``--tickers``: Input/output tickers parquet file (default: ``tickers.parquet``) +- ``--staging-dir``: Staging directory for intermediate files (default: ``.``) +- ``--region, -r``: Region code (default: ``US``) Search For Listings On Naver Finance ************************************ diff --git a/finance/__main__.py b/finance/__main__.py index c0247f5..0334fc2 100644 --- a/finance/__main__.py +++ b/finance/__main__.py @@ -1,4 +1,5 @@ """CLI commands for finance data processing (no database dependency).""" + import os from typing import List @@ -15,61 +16,150 @@ def cli(): pass -@cli.command() -@click.argument("tickers_source") -@click.argument("historical_source") -@click.argument("tickers_target") -@click.argument("historical_target") -@click.option("-r", "--region", default="US", help="Region") -@click.option( - "-s", "--strategy", default="oldest", help="all | oldest | random | static" -) -@click.option("-k", "--sample-count", default=25) -@click.option("--symbols", type=str) -def refresh_tickers( - tickers_source: str, - historical_source: str, - tickers_target: str, - historical_target: str, - region, - strategy: str, - sample_count: int, - symbols: str, -): - """Refreshes tickers and historical data. +@cli.group() +def refresh_tickers(): + """Refreshes tickers and historical data.""" + pass - :param source: Source file name - :param symbols: Comma separated strings (without spaces in between) - """ - import random + +def _refresh_tickers_common( + region: str, + tickers_input: str, + historical_output: str, + staging_dir: str, + symbols_list: List[str], +): + """Common logic for all refresh-tickers subcommands.""" import pandas as pd from finance.ext.warehouse import refresh_tickers_and_historical_data - tickers = pd.read_parquet(tickers_source) + tickers = pd.read_parquet(tickers_input) tickers = tickers[(tickers.status != "delisted") & (tickers.status != "invalid")] - if strategy == "all": - symbols_ = tickers["symbol"].to_list() - elif strategy == "oldest": - symbols_ = tickers.sort_values("updated_at")["symbol"].to_list() - symbols_ = symbols_[:sample_count] - elif strategy == "random": - symbols_ = tickers["symbol"].to_list() - symbols_ = random.sample(symbols_, sample_count) - elif strategy == "static": - symbols_ = symbols.split(",") - else: - raise NotImplementedError(f"Strategy: {strategy}") + + # Use tickers_input as output if not specified otherwise + tickers_output = tickers_input refresh_tickers_and_historical_data( region, tickers, - historical_source, - tickers_target, - historical_target, - symbols_, + staging_dir, + tickers_output, + historical_output, + symbols_list, ) +@refresh_tickers.command() +@click.argument("symbols", nargs=-1, required=True) +@click.option( + "--output", "-o", required=True, help="Output parquet file for historical data" +) +@click.option( + "--tickers", default="tickers.parquet", help="Input/output tickers parquet file" +) +@click.option( + "--staging-dir", default=".", help="Staging directory for intermediate files" +) +@click.option("--region", "-r", default="US", help="Region code (e.g., US, KR)") +def static(symbols: tuple, output: str, tickers: str, staging_dir: str, region: str): + """Refresh specific ticker symbols. + + Example: + finance refresh-tickers static SPY IVI QQQ --output US.parquet + """ + symbols_list = list(symbols) + _refresh_tickers_common(region, tickers, output, staging_dir, symbols_list) + + +@refresh_tickers.command() +@click.option("--count", "-n", default=25, help="Number of random tickers to sample") +@click.option( + "--output", "-o", required=True, help="Output parquet file for historical data" +) +@click.option( + "--tickers", default="tickers.parquet", help="Input/output tickers parquet file" +) +@click.option( + "--staging-dir", default=".", help="Staging directory for intermediate files" +) +@click.option("--region", "-r", default="US", help="Region code (e.g., US, KR)") +def random(count: int, output: str, tickers: str, staging_dir: str, region: str): + """Refresh a random sample of tickers. + + Example: + finance refresh-tickers random --count 50 --output US.parquet + """ + import random as random_module + import pandas as pd + + tickers_df = pd.read_parquet(tickers) + tickers_df = tickers_df[ + (tickers_df.status != "delisted") & (tickers_df.status != "invalid") + ] + symbols_list = tickers_df["symbol"].to_list() + symbols_list = random_module.sample(symbols_list, count) + + _refresh_tickers_common(region, tickers, output, staging_dir, symbols_list) + + +@refresh_tickers.command() +@click.option("--count", "-n", default=25, help="Number of oldest tickers to refresh") +@click.option( + "--output", "-o", required=True, help="Output parquet file for historical data" +) +@click.option( + "--tickers", default="tickers.parquet", help="Input/output tickers parquet file" +) +@click.option( + "--staging-dir", default=".", help="Staging directory for intermediate files" +) +@click.option("--region", "-r", default="US", help="Region code (e.g., US, KR)") +def oldest(count: int, output: str, tickers: str, staging_dir: str, region: str): + """Refresh the oldest (most stale) tickers based on updated_at timestamp. + + Example: + finance refresh-tickers oldest --count 30 --output US.parquet + """ + import pandas as pd + + tickers_df = pd.read_parquet(tickers) + tickers_df = tickers_df[ + (tickers_df.status != "delisted") & (tickers_df.status != "invalid") + ] + symbols_list = tickers_df.sort_values("updated_at")["symbol"].to_list() + symbols_list = symbols_list[:count] + + _refresh_tickers_common(region, tickers, output, staging_dir, symbols_list) + + +@refresh_tickers.command() +@click.option( + "--output", "-o", required=True, help="Output parquet file for historical data" +) +@click.option( + "--tickers", default="tickers.parquet", help="Input/output tickers parquet file" +) +@click.option( + "--staging-dir", default=".", help="Staging directory for intermediate files" +) +@click.option("--region", "-r", default="US", help="Region code (e.g., US, KR)") +def all(output: str, tickers: str, staging_dir: str, region: str): + """Refresh all tickers in the database. + + Example: + finance refresh-tickers all --output US.parquet + """ + import pandas as pd + + tickers_df = pd.read_parquet(tickers) + tickers_df = tickers_df[ + (tickers_df.status != "delisted") & (tickers_df.status != "invalid") + ] + symbols_list = tickers_df["symbol"].to_list() + + _refresh_tickers_common(region, tickers, output, staging_dir, symbols_list) + + @cli.command() @click.argument("tickers_source") @click.argument("historical_source") diff --git a/finance/ext/profile/__init__.py b/finance/ext/profile/__init__.py index a062d89..7386a4c 100644 --- a/finance/ext/profile/__init__.py +++ b/finance/ext/profile/__init__.py @@ -1,6 +1,7 @@ """ Extracts company profiles """ + from finance.ext.profile.naver_finance import fetch_naver_profile diff --git a/requirements.txt b/requirements.txt index 89aaa21..4d28816 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ boto3>=1.5.26 numpy>=1.21.0 pandas>=2.0.0 pandas-datareader>=0.10.0 +pyarrow>=10.0 types-click>=7.1,<8.0 yfinance>=0.2,<0.3 rich>=13.6,<14.0 From a212b84aedc0049d7464245c3ca3ce28ae981c01 Mon Sep 17 00:00:00 2001 From: Sumin Byeon Date: Sat, 8 Nov 2025 00:04:11 +0900 Subject: [PATCH 14/14] refactor: Deal with empty rows --- finance/ext/models.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/finance/ext/models.py b/finance/ext/models.py index d1a0de1..a33fca0 100644 --- a/finance/ext/models.py +++ b/finance/ext/models.py @@ -104,16 +104,18 @@ def eval_daily_nav( daily_prices = {} for ticker in all_tickers: - daily_prices[ticker] = historical[historical.symbol == ticker][ - ["date", "close"] - ] - daily_prices[ticker][f"{ticker}_quantity"] = daily_prices[ticker].apply( - lambda x: daily_inventories[x.date.strftime("%Y%m%d")][ticker], axis=1 + df = historical[historical.symbol == ticker][["date", "close"]].copy() + + # Skip tickers with no historical data in the date range + if df.empty: + continue + + df[f"{ticker}_quantity"] = df.apply( + lambda x: daily_inventories[x.date.strftime("%Y%m%d")].get(ticker, 0), + axis=1, ) - daily_prices[ticker] = ( - daily_prices[ticker] - .set_index("date") - .rename(columns={"close": f"{ticker}_close"}) + daily_prices[ticker] = df.set_index("date").rename( + columns={"close": f"{ticker}_close"} ) return daily_prices