From f73c6061b6bd1c56deb2c0967ea725476e8c8b9b Mon Sep 17 00:00:00 2001
From: Manan <manan.singh@digantara.co.in>
Date: Mon, 6 Jan 2025 15:57:04 +0530
Subject: [PATCH 1/6] feat: refresh F10.7 and ap data handling with partial
 data support and automatic updates

---
 pymsis/utils.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 3 deletions(-)

diff --git a/pymsis/utils.py b/pymsis/utils.py
index 725c07c..70345d3 100644
--- a/pymsis/utils.py
+++ b/pymsis/utils.py
@@ -1,9 +1,12 @@
 """Utilities for obtaining input datasets."""
 
+import os
+import csv
 import urllib.request
 import warnings
 from io import BytesIO
 from pathlib import Path
+from datetime import datetime, timedelta
 
 import numpy as np
 import numpy.typing as npt
@@ -14,6 +17,9 @@
 _DATA_FNAME: str = "SW-All.csv"
 _F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_DATA_FNAME}"
 _F107_AP_PATH: Path = Path(pymsis.__file__).parent / _DATA_FNAME
+_PARTIAL_DATA_FNAME: str = "SW-Last5Years.csv"
+_PARTIAL_F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_PARTIAL_DATA_FNAME}"
+_PARTIAL_F107_AP_PATH: Path = Path(pymsis.__file__).parent / _PARTIAL_DATA_FNAME
 _DATA: dict[str, npt.NDArray] | None = None
 
 
@@ -47,9 +53,54 @@ def download_f107_ap() -> None:
     with _F107_AP_PATH.open("wb") as f:
         f.write(req.read())
 
+def _refresh_f107_ap_data(last_obs_date: np.datetime64):
+    """
+    Refresh exising SW_All file after last_obs_date
+
+    Parameters
+    ----------
+    last_obs_date : datetime of last observed parameter
+    """
+    warnings.warn(f"Refreshing data using partial ap and F10.7 data from {_PARTIAL_F107_AP_URL}")
+    req = urllib.request.urlopen(_PARTIAL_F107_AP_URL)
+    with _PARTIAL_F107_AP_PATH.open("wb") as f:
+        f.write(req.read())
+
+    # Store all observed dates in existing_data
+    with open(_F107_AP_PATH, "r") as f:
+        reader = csv.DictReader(f)
+        existing_data = [row for row in reader if np.datetime64(row['DATE']) <= last_obs_date]
+
+    # Store data new dates in updated_data
+    updated_data = []
+    if os.path.exists(_PARTIAL_F107_AP_PATH):
+        with open(_PARTIAL_F107_AP_PATH, "r") as f:
+            reader = csv.DictReader(f)
+            updated_data = [row for row in reader]
+
+    # Merge existing_data and new_data into all_data
+    all_data = {row['DATE']: row for row in existing_data}
+    all_data.update({row['DATE']: row for row in updated_data})
 
-def _load_f107_ap_data() -> dict[str, npt.NDArray]:
-    """Load data from disk, if it isn't present go out and download it first."""
+    # Sort all_data by date
+    sorted_data = sorted(all_data.values(), key=lambda x: np.datetime64(x['DATE']))
+    with open(_F107_AP_PATH, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=sorted_data[0].keys())
+        writer.writeheader()
+        writer.writerows(sorted_data)
+
+    os.remove(_PARTIAL_F107_AP_PATH)
+
+def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
+    """
+    Load data from disk:
+        - If it isn't present go out and download it first
+        - If present but not up to date, go out and refresh it
+
+    Parameters
+    ----------
+    end_date : datetime of last epoch
+    """
     if not _F107_AP_PATH.exists():
         download_f107_ap()
 
@@ -92,6 +143,7 @@ def _load_f107_ap_data() -> dict[str, npt.NDArray]:
     # so we can't just go back in line lengths)
     with _F107_AP_PATH.open() as fin:
         with BytesIO() as fout:
+            last_obs_date: np.datetime64|None = None
             for line in fin:
                 if "PRM" in line:
                     # We don't want the monthly predicted values
@@ -99,12 +151,30 @@ def _load_f107_ap_data() -> dict[str, npt.NDArray]:
                 if ",,,,,,,," in line:
                     # We don't want lines with missing values
                     continue
+                if ",OBS," in line:
+                    # Capture last observed date
+                    last_obs_date = np.datetime64(line.split(",")[0])
                 fout.write(line.encode("utf-8"))
             fout.seek(0)
             arr = np.loadtxt(
                 fout, delimiter=",", dtype=dtype, usecols=usecols, skiprows=1
             )  # type: ignore
 
+    # Check if the file needs to be refreshed after parsing
+    if last_obs_date is not None:
+        file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
+        if (
+            last_obs_date < end_date
+            and datetime.now() - file_mod_time >= timedelta(hours=3)
+        ):
+            # Refresh file if:
+            # - requested date is beyond the end of current file
+            # - file hasn't been refresh in the last 3 hours
+            _refresh_f107_ap_data(last_obs_date)
+
+            # Re-parse the file after refresh
+            return _load_f107_ap_data(end_date)
+
     # transform each day's 8 3-hourly ap values into a single column
     ap = np.empty(len(arr) * 8, dtype=float)
     daily_ap = arr["Ap"].astype(float)
@@ -207,7 +277,7 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA
             |     prior to current time
     """
     dates = np.asarray(dates, dtype=np.datetime64)
-    data = _DATA or _load_f107_ap_data()
+    data = _DATA or _load_f107_ap_data(dates[-1])
 
     data_start = data["dates"][0]
     data_end = data["dates"][-1]

From 812270eec03f29bd97930bd72f1f471cdff80511 Mon Sep 17 00:00:00 2001
From: Manan <manan.singh@digantara.co.in>
Date: Mon, 6 Jan 2025 16:11:33 +0530
Subject: [PATCH 2/6] lint: fix linting issues

---
 pymsis/__init__.py |  2 +-
 pymsis/utils.py    | 49 ++++++++++++++++++++++++----------------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/pymsis/__init__.py b/pymsis/__init__.py
index 05d45fa..1b92db7 100644
--- a/pymsis/__init__.py
+++ b/pymsis/__init__.py
@@ -7,4 +7,4 @@
 
 __version__ = importlib.metadata.version("pymsis")
 
-__all__ = ["__version__", "Variable", "calculate"]
+__all__ = ["Variable", "__version__", "calculate"]
diff --git a/pymsis/utils.py b/pymsis/utils.py
index 70345d3..0fe1e9e 100644
--- a/pymsis/utils.py
+++ b/pymsis/utils.py
@@ -1,12 +1,12 @@
 """Utilities for obtaining input datasets."""
 
-import os
 import csv
+import os
 import urllib.request
 import warnings
+from datetime import datetime, timedelta
 from io import BytesIO
 from pathlib import Path
-from datetime import datetime, timedelta
 
 import numpy as np
 import numpy.typing as npt
@@ -53,37 +53,42 @@ def download_f107_ap() -> None:
     with _F107_AP_PATH.open("wb") as f:
         f.write(req.read())
 
-def _refresh_f107_ap_data(last_obs_date: np.datetime64):
+
+def _refresh_f107_ap_data(last_obs_date: np.datetime64) -> None:
     """
-    Refresh exising SW_All file after last_obs_date
+    Refresh exising SW_All file after last_obs_date.
 
     Parameters
     ----------
     last_obs_date : datetime of last observed parameter
     """
-    warnings.warn(f"Refreshing data using partial ap and F10.7 data from {_PARTIAL_F107_AP_URL}")
+    warnings.warn(
+        f"Refreshing data using partial ap and F10.7 data from {_PARTIAL_F107_AP_URL}"
+    )
     req = urllib.request.urlopen(_PARTIAL_F107_AP_URL)
     with _PARTIAL_F107_AP_PATH.open("wb") as f:
         f.write(req.read())
 
     # Store all observed dates in existing_data
-    with open(_F107_AP_PATH, "r") as f:
+    with open(_F107_AP_PATH) as f:
         reader = csv.DictReader(f)
-        existing_data = [row for row in reader if np.datetime64(row['DATE']) <= last_obs_date]
+        existing_data = [
+            row for row in reader if np.datetime64(row["DATE"]) <= last_obs_date
+        ]
 
     # Store data new dates in updated_data
     updated_data = []
     if os.path.exists(_PARTIAL_F107_AP_PATH):
-        with open(_PARTIAL_F107_AP_PATH, "r") as f:
+        with open(_PARTIAL_F107_AP_PATH) as f:
             reader = csv.DictReader(f)
             updated_data = [row for row in reader]
 
     # Merge existing_data and new_data into all_data
-    all_data = {row['DATE']: row for row in existing_data}
-    all_data.update({row['DATE']: row for row in updated_data})
+    all_data = {row["DATE"]: row for row in existing_data}
+    all_data.update({row["DATE"]: row for row in updated_data})
 
     # Sort all_data by date
-    sorted_data = sorted(all_data.values(), key=lambda x: np.datetime64(x['DATE']))
+    sorted_data = sorted(all_data.values(), key=lambda x: np.datetime64(x["DATE"]))
     with open(_F107_AP_PATH, "w", newline="") as f:
         writer = csv.DictWriter(f, fieldnames=sorted_data[0].keys())
         writer.writeheader()
@@ -91,11 +96,13 @@ def _refresh_f107_ap_data(last_obs_date: np.datetime64):
 
     os.remove(_PARTIAL_F107_AP_PATH)
 
+
 def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
     """
-    Load data from disk:
-        - If it isn't present go out and download it first
-        - If present but not up to date, go out and refresh it
+    Load data from disk.
+
+        - If it isn't present go out and download it first.
+        - If present but not up to date, go out and refresh it.
 
     Parameters
     ----------
@@ -143,13 +150,10 @@ def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
     # so we can't just go back in line lengths)
     with _F107_AP_PATH.open() as fin:
         with BytesIO() as fout:
-            last_obs_date: np.datetime64|None = None
+            last_obs_date: np.datetime64 | None = None
             for line in fin:
-                if "PRM" in line:
-                    # We don't want the monthly predicted values
-                    continue
-                if ",,,,,,,," in line:
-                    # We don't want lines with missing values
+                if "PRM" in line or ",,,,,,,," in line:
+                    # We don't want the monthly predicted values or missing values
                     continue
                 if ",OBS," in line:
                     # Capture last observed date
@@ -163,9 +167,8 @@ def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
     # Check if the file needs to be refreshed after parsing
     if last_obs_date is not None:
         file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
-        if (
-            last_obs_date < end_date
-            and datetime.now() - file_mod_time >= timedelta(hours=3)
+        if last_obs_date < end_date and datetime.now() - file_mod_time >= timedelta(
+            hours=3
         ):
             # Refresh file if:
             # - requested date is beyond the end of current file

From 84eecb6b4f5ffca643d45f57c27e92120463160c Mon Sep 17 00:00:00 2001
From: Manan <manan.singh@digantara.co.in>
Date: Thu, 23 Jan 2025 14:59:11 +0530
Subject: [PATCH 3/6] feat: remove partial data functionality and get last date
 from warnings

---
 pymsis/utils.py          | 90 +++++-----------------------------------
 tools/download_source.py |  6 +--
 2 files changed, 14 insertions(+), 82 deletions(-)

diff --git a/pymsis/utils.py b/pymsis/utils.py
index 0fe1e9e..173e2a3 100644
--- a/pymsis/utils.py
+++ b/pymsis/utils.py
@@ -1,10 +1,9 @@
 """Utilities for obtaining input datasets."""
 
-import csv
 import os
 import urllib.request
 import warnings
-from datetime import datetime, timedelta
+from datetime import datetime
 from io import BytesIO
 from pathlib import Path
 
@@ -17,9 +16,6 @@
 _DATA_FNAME: str = "SW-All.csv"
 _F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_DATA_FNAME}"
 _F107_AP_PATH: Path = Path(pymsis.__file__).parent / _DATA_FNAME
-_PARTIAL_DATA_FNAME: str = "SW-Last5Years.csv"
-_PARTIAL_F107_AP_URL: str = f"https://celestrak.org/SpaceData/{_PARTIAL_DATA_FNAME}"
-_PARTIAL_F107_AP_PATH: Path = Path(pymsis.__file__).parent / _PARTIAL_DATA_FNAME
 _DATA: dict[str, npt.NDArray] | None = None
 
 
@@ -54,60 +50,8 @@ def download_f107_ap() -> None:
         f.write(req.read())
 
 
-def _refresh_f107_ap_data(last_obs_date: np.datetime64) -> None:
-    """
-    Refresh exising SW_All file after last_obs_date.
-
-    Parameters
-    ----------
-    last_obs_date : datetime of last observed parameter
-    """
-    warnings.warn(
-        f"Refreshing data using partial ap and F10.7 data from {_PARTIAL_F107_AP_URL}"
-    )
-    req = urllib.request.urlopen(_PARTIAL_F107_AP_URL)
-    with _PARTIAL_F107_AP_PATH.open("wb") as f:
-        f.write(req.read())
-
-    # Store all observed dates in existing_data
-    with open(_F107_AP_PATH) as f:
-        reader = csv.DictReader(f)
-        existing_data = [
-            row for row in reader if np.datetime64(row["DATE"]) <= last_obs_date
-        ]
-
-    # Store data new dates in updated_data
-    updated_data = []
-    if os.path.exists(_PARTIAL_F107_AP_PATH):
-        with open(_PARTIAL_F107_AP_PATH) as f:
-            reader = csv.DictReader(f)
-            updated_data = [row for row in reader]
-
-    # Merge existing_data and new_data into all_data
-    all_data = {row["DATE"]: row for row in existing_data}
-    all_data.update({row["DATE"]: row for row in updated_data})
-
-    # Sort all_data by date
-    sorted_data = sorted(all_data.values(), key=lambda x: np.datetime64(x["DATE"]))
-    with open(_F107_AP_PATH, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=sorted_data[0].keys())
-        writer.writeheader()
-        writer.writerows(sorted_data)
-
-    os.remove(_PARTIAL_F107_AP_PATH)
-
-
-def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
-    """
-    Load data from disk.
-
-        - If it isn't present go out and download it first.
-        - If present but not up to date, go out and refresh it.
-
-    Parameters
-    ----------
-    end_date : datetime of last epoch
-    """
+def _load_f107_ap_data() -> dict[str, npt.NDArray]:
+    """Load data from disk. If it isn't present go out and download it first."""
     if not _F107_AP_PATH.exists():
         download_f107_ap()
 
@@ -150,40 +94,22 @@ def _load_f107_ap_data(end_date: np.datetime64) -> dict[str, npt.NDArray]:
     # so we can't just go back in line lengths)
     with _F107_AP_PATH.open() as fin:
         with BytesIO() as fout:
-            last_obs_date: np.datetime64 | None = None
             for line in fin:
                 if "PRM" in line or ",,,,,,,," in line:
                     # We don't want the monthly predicted values or missing values
                     continue
-                if ",OBS," in line:
-                    # Capture last observed date
-                    last_obs_date = np.datetime64(line.split(",")[0])
                 fout.write(line.encode("utf-8"))
             fout.seek(0)
             arr = np.loadtxt(
                 fout, delimiter=",", dtype=dtype, usecols=usecols, skiprows=1
             )  # type: ignore
 
-    # Check if the file needs to be refreshed after parsing
-    if last_obs_date is not None:
-        file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
-        if last_obs_date < end_date and datetime.now() - file_mod_time >= timedelta(
-            hours=3
-        ):
-            # Refresh file if:
-            # - requested date is beyond the end of current file
-            # - file hasn't been refresh in the last 3 hours
-            _refresh_f107_ap_data(last_obs_date)
-
-            # Re-parse the file after refresh
-            return _load_f107_ap_data(end_date)
-
     # transform each day's 8 3-hourly ap values into a single column
     ap = np.empty(len(arr) * 8, dtype=float)
     daily_ap = arr["Ap"].astype(float)
     dates = np.repeat(arr["date"], 8).astype("datetime64[m]")
     for i in range(8):
-        ap[i::8] = arr[f"ap{i+1}"]
+        ap[i::8] = arr[f"ap{i + 1}"]
         dates[i::8] += i * np.timedelta64(3, "h")
 
     # data file has missing values as negatives
@@ -280,7 +206,13 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA
             |     prior to current time
     """
     dates = np.asarray(dates, dtype=np.datetime64)
-    data = _DATA or _load_f107_ap_data(dates[-1])
+    data = _DATA or _load_f107_ap_data()
+    if dates[-1] > data["dates"][~np.repeat(data["warn_data"], 8)][-1]:
+        file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
+        # Don't refresh if file was updated in the last 1 hour
+        if (datetime.now() - file_mod_time).seconds > 60 * 60:
+            download_f107_ap()
+            data = _load_f107_ap_data()
 
     data_start = data["dates"][0]
     data_end = data["dates"][-1]
diff --git a/tools/download_source.py b/tools/download_source.py
index 5304d51..7faeadf 100644
--- a/tools/download_source.py
+++ b/tools/download_source.py
@@ -24,7 +24,7 @@ def get_source():
     if not Path("src/msis2.0/msis_init.F90").exists():
         # No source code yet, so go download and extract it
         try:
-            warnings.warn("Downloading the MSIS2.0 source code from " f"{MSIS20_FILE}")
+            warnings.warn(f"Downloading the MSIS2.0 source code from {MSIS20_FILE}")
             with urllib.request.urlopen(MSIS20_FILE) as stream:
                 tf = tarfile.open(fileobj=stream, mode="r|gz")
                 tf.extractall(path=Path("src/msis2.0"))
@@ -49,7 +49,7 @@ def get_source():
     if not Path("src/msis2.1/msis_init.F90").exists():
         # No source code yet, so go download and extract it
         try:
-            warnings.warn("Downloading the MSIS2.1 source code from " f"{MSIS21_FILE}")
+            warnings.warn(f"Downloading the MSIS2.1 source code from {MSIS21_FILE}")
             with urllib.request.urlopen(MSIS21_FILE) as stream:
                 tf = tarfile.open(fileobj=stream, mode="r|gz")
                 tf.extractall(path=Path("src/msis2.1"))
@@ -76,7 +76,7 @@ def get_source():
         local_msis00_path.parent.mkdir(parents=True, exist_ok=True)
         # No source code yet, so go download and extract it
         try:
-            warnings.warn("Downloading the MSIS-00 source code from " f"{MSIS00_FILE}")
+            warnings.warn(f"Downloading the MSIS-00 source code from {MSIS00_FILE}")
 
             with urllib.request.urlopen(MSIS00_FILE) as response:
                 with open(local_msis00_path, "wb") as f:

From b79daa59770facdc8c910813bb3b07ddd0e8015b Mon Sep 17 00:00:00 2001
From: Manan <manan.singh@digantara.co.in>
Date: Thu, 23 Jan 2025 15:00:07 +0530
Subject: [PATCH 4/6] fix(utils): improve docstring for _load_f107_ap_data
 function

---
 pymsis/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymsis/utils.py b/pymsis/utils.py
index 173e2a3..2519771 100644
--- a/pymsis/utils.py
+++ b/pymsis/utils.py
@@ -51,7 +51,7 @@ def download_f107_ap() -> None:
 
 
 def _load_f107_ap_data() -> dict[str, npt.NDArray]:
-    """Load data from disk. If it isn't present go out and download it first."""
+    """Load data from disk, if it isn't present go out and download it first."""
     if not _F107_AP_PATH.exists():
         download_f107_ap()
 

From c7ba8c38f0da238d87db5ef6000998674f8b4be4 Mon Sep 17 00:00:00 2001
From: Manan <manan.singh@digantara.co.in>
Date: Fri, 24 Jan 2025 18:16:52 +0530
Subject: [PATCH 5/6] fix(utils): update date comparison to use max date for
 data refresh logic

---
 pymsis/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pymsis/utils.py b/pymsis/utils.py
index 2519771..7f1a37d 100644
--- a/pymsis/utils.py
+++ b/pymsis/utils.py
@@ -207,7 +207,9 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA
     """
     dates = np.asarray(dates, dtype=np.datetime64)
     data = _DATA or _load_f107_ap_data()
-    if dates[-1] > data["dates"][~np.repeat(data["warn_data"], 8)][-1]:
+    # If our requested data time is after the cached values we have,
+    # go and download a new file to refresh the local file cache
+    if dates.max() > data["dates"][~np.repeat(data["warn_data"], 8)][-1]:
         file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
         # Don't refresh if file was updated in the last 1 hour
         if (datetime.now() - file_mod_time).seconds > 60 * 60:

From f12ec35d6caab27aad119608d0205ac11814e7aa Mon Sep 17 00:00:00 2001
From: Greg Lucas <greg.m.lucas@gmail.com>
Date: Mon, 27 Jan 2025 21:57:19 -0700
Subject: [PATCH 6/6] TST/MNT: Add tests for refresh data download util

Add tests to make sure the downloads are triggered appropriately.
This required updating the location to download to a separate temporary
directory for each test. Before we were overwriting the actual test file
and that caused issues in subsequent tests when going to load from that.

Change the condition to only refresh if the data being requested is
before the current time AND after the last date in the file.
---
 pymsis/utils.py     | 34 +++++++++++++---------------------
 pyproject.toml      |  4 ++++
 tests/conftest.py   | 14 ++++----------
 tests/test_utils.py | 41 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/pymsis/utils.py b/pymsis/utils.py
index 7f1a37d..c2e78f6 100644
--- a/pymsis/utils.py
+++ b/pymsis/utils.py
@@ -1,9 +1,7 @@
 """Utilities for obtaining input datasets."""
 
-import os
 import urllib.request
 import warnings
-from datetime import datetime
 from io import BytesIO
 from pathlib import Path
 
@@ -45,8 +43,7 @@ def download_f107_ap() -> None:
        Space Weather, https://doi.org/10.1029/2020SW002641
     """
     warnings.warn(f"Downloading ap and F10.7 data from {_F107_AP_URL}")
-    req = urllib.request.urlopen(_F107_AP_URL)
-    with _F107_AP_PATH.open("wb") as f:
+    with _F107_AP_PATH.open("wb") as f, urllib.request.urlopen(_F107_AP_URL) as req:
         f.write(req.read())
 
 
@@ -92,17 +89,14 @@ def _load_f107_ap_data() -> dict[str, npt.NDArray]:
     # Use a buffer to read in and load so we can quickly get rid of
     # the extra "PRD" lines at the end of the file (unknown length
     # so we can't just go back in line lengths)
-    with _F107_AP_PATH.open() as fin:
-        with BytesIO() as fout:
-            for line in fin:
-                if "PRM" in line or ",,,,,,,," in line:
-                    # We don't want the monthly predicted values or missing values
-                    continue
-                fout.write(line.encode("utf-8"))
-            fout.seek(0)
-            arr = np.loadtxt(
-                fout, delimiter=",", dtype=dtype, usecols=usecols, skiprows=1
-            )  # type: ignore
+    with _F107_AP_PATH.open() as fin, BytesIO() as fout:
+        for line in fin:
+            if "PRM" in line or ",,,,,,,," in line:
+                # We don't want the monthly predicted values or missing values
+                continue
+            fout.write(line.encode("utf-8"))
+        fout.seek(0)
+        arr = np.loadtxt(fout, delimiter=",", dtype=dtype, usecols=usecols, skiprows=1)  # type: ignore
 
     # transform each day's 8 3-hourly ap values into a single column
     ap = np.empty(len(arr) * 8, dtype=float)
@@ -209,12 +203,10 @@ def get_f107_ap(dates: npt.ArrayLike) -> tuple[npt.NDArray, npt.NDArray, npt.NDA
     data = _DATA or _load_f107_ap_data()
     # If our requested data time is after the cached values we have,
     # go and download a new file to refresh the local file cache
-    if dates.max() > data["dates"][~np.repeat(data["warn_data"], 8)][-1]:
-        file_mod_time = datetime.fromtimestamp(os.path.getmtime(_F107_AP_PATH))
-        # Don't refresh if file was updated in the last 1 hour
-        if (datetime.now() - file_mod_time).seconds > 60 * 60:
-            download_f107_ap()
-            data = _load_f107_ap_data()
+    last_time_in_file = data["dates"][7::8][~data["warn_data"]].max()
+    if np.any((dates > last_time_in_file) & (dates < np.datetime64("now"))):
+        download_f107_ap()
+        data = _load_f107_ap_data()
 
     data_start = data["dates"][0]
     data_end = data["dates"][-1]
diff --git a/pyproject.toml b/pyproject.toml
index 213f762..81464fd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,10 @@ testpaths = [
 addopts = [
     "--import-mode=importlib",
 ]
+filterwarnings = [
+# Ignore warnings loading from file specifically
+    'ignore:Downloading ap and F10.7 data from file:UserWarning',
+]
 
 [tool.cibuildwheel]
 # skip Python <3.10
diff --git a/tests/conftest.py b/tests/conftest.py
index bbe64d9..9048616 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,21 +6,15 @@
 
 
 @pytest.fixture(autouse=True)
-def local_path(monkeypatch):
-    # Update the data location to our test data
-    test_file = Path(__file__).parent / "f107_ap_test_data.txt"
+def _path_setup(monkeypatch, tmp_path):
     # Monkeypatch the url and expected download location, so we aren't
     # dependent on an internet connection.
-    monkeypatch.setattr(utils, "_F107_AP_PATH", test_file)
-    return test_file
-
+    monkeypatch.setattr(utils, "_F107_AP_PATH", tmp_path / "f107_ap_test_data.txt")
 
-@pytest.fixture(autouse=True)
-def remote_path(monkeypatch, local_path):
     # Update the remote URL to point to a local file system test path
     # by prepending file:// so that it can be opened by urlopen()
-    test_url = local_path.absolute().as_uri()
+    test_file = Path(__file__).parent / "f107_ap_test_data.txt"
+    test_url = test_file.absolute().as_uri()
     # Monkeypatch the url and expected download location, so we aren't
     # dependent on an internet connection.
     monkeypatch.setattr(utils, "_F107_AP_URL", test_url)
-    return test_url
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 7c13cae..76cf2fc 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,3 +1,6 @@
+from pathlib import Path
+from unittest.mock import patch
+
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose, assert_array_equal
@@ -130,3 +133,41 @@ def test_get_f107_ap_interpolated_warns(dates):
         UserWarning, match="There is data that was either interpolated or"
     ):
         utils.get_f107_ap(dates)
+
+
+@patch("pymsis.utils.download_f107_ap")
+def test_auto_refresh(download_data_mock, monkeypatch):
+    test_file = Path(__file__).parent / "f107_ap_test_data.txt"
+    # Monkeypatch the url and expected download location, so we aren't
+    # dependent on an internet connection.
+    monkeypatch.setattr(utils, "_F107_AP_PATH", test_file)
+
+    def call_with_time(time):
+        try:
+            utils.get_f107_ap(time)
+        except ValueError:
+            # There is no data in our test file for this, so we will error later
+            # But this is enough to trigger an attempt at a refresh
+            pass
+
+    # Should not trigger a refresh, data before the time in the file
+    call_with_time(np.datetime64("1990-12-31T23:00"))
+    assert download_data_mock.call_count == 0
+
+    # Final observed time in the file
+    call_with_time(np.datetime64("2000-12-29T21:00"))
+    assert download_data_mock.call_count == 0
+
+    # One hour beyond our current time shouldn't trigger a refresh
+    # there would be no data to get for that time period
+    call_with_time(np.datetime64("now") + np.timedelta64(1, "h"))
+    assert download_data_mock.call_count == 0
+
+    # Within the predicted data in the file should try to get a refresh
+    with pytest.warns(UserWarning, match="There is data that was either"):
+        call_with_time(np.datetime64("2000-12-30T00:00"))
+    assert download_data_mock.call_count == 1
+
+    # Should trigger a refresh, after the data in the file but before current time
+    call_with_time(np.datetime64("2005-01-01T00:00"))
+    assert download_data_mock.call_count == 2  # noqa: PLR2004