diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4043a28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +observations +results +scratch.ipynb +paleo_data_cache +.DS_Store +pangeo-cmip6.csv +__pycache__ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 8c09af4..e3ea21b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -90,10 +90,30 @@ chmod +x run_benchmark.sh **Paleoclimate data download:** ```bash -cd paleo_scrips/paleo_data_cache -python paleo_data_cache.py --paleo-period lgm --data-cache-dir path/to/paleo_scrips/paleo_data_cache +cd paleo_scripts +python download_paleo.py --source observations +python download_paleo.py --source cmip6 --model AWI-ESM-1-1-LR --period lgm +python download_paleo.py --source cmip6 --model all --period all ``` +**Paleoclimate data processing:** +```bash +cd paleo_scripts +python process_paleo.py --source observations +python process_paleo.py --source cmip6 --period lgm +``` + +**Paleoclimate benchmark (spatial RMSE/MAE/CRPS):** +```bash +cd paleo_scripts +python paleo_benchmark.py --model AWI-ESM-1-1-LR --period lgm +python paleo_benchmark.py --model all --period all +python paleo_benchmark.py --model MIROC-ES2L --period lgm --use-picontrol +``` + +PI reference for anomaly computation: lgmDA Holocene (default) or model piControl (`--use-picontrol`). +Precipitation benchmarks (Bartlein MAP, Scussolini LIG) require `--use-picontrol` and processed `pr` data. + ## Architecture ### Data Flow diff --git a/paleo_scrips/paleo_benchmarks.ipynb b/paleo_scrips/paleo_benchmarks.ipynb deleted file mode 100644 index bb161ae..0000000 --- a/paleo_scrips/paleo_benchmarks.ipynb +++ /dev/null @@ -1,438 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "34c6c309", - "metadata": {}, - "outputs": [], - "source": [ - "import xarray as xr\n", - "import pandas as pd\n", - "import numpy as np\n", - "import glob\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import glob\n", - "import os\n", - "\n", - "import cartopy.crs as ccrs\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "id": "db423239", - "metadata": {}, - "source": [ - "# make annual maps for each period" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab7d5232", - "metadata": {}, - "outputs": [], - "source": [ - "root_path = 'paleo_data_cache'\n", - "# eocene\n", - "eocene_models = glob.glob(f'{root_path}/deepmip/dap.ceda.ac.uk/badc/cmip6/data/CMIP6Plus/DeepMIP/deepmip-eocene-p1/*/*/*-x*/*/climatology/*')\n", - "\n", - "# pliocene\n", - "pliocene_models = glob.glob(f'{root_path}/*/midPliocene-eoi400_tas_annual.nc')\n", - "# lig127k\n", - "lig127k_models = glob.glob(f'{root_path}/*/lig127k_tas_annual.nc')\n", - "# LGM\n", - "lgm_models = glob.glob(f'{root_path}/*/lgm_tas_annual.nc')\n", - "# midH\n", - "midHolocene_models = glob.glob(f'{root_path}/*/midHolocene_tas_annual.nc')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29fd269f", - "metadata": {}, - "outputs": [], - "source": [ - "# period = 'eocene'\n", - "# for file in eocene_models:\n", - "# model = file.split('/')[17]\n", - "# co2 = file.split('/')[18].split('-')[-1]\n", - "\n", - "# ds = xr.open_dataset(file)\n", - " \n", - "# if \"t\" in ds.dims:\n", - "# ds = ds.rename({'t':'time','latitude':'lat','longitude':'lon'})\n", - "# if \"time_counter\" in ds.dims:\n", - "# ds = ds.rename({'time_counter':'time'})\n", - " \n", - "# ds = ds.mean(dim='time')\n", - "\n", - "# fig, axis = plt.subplots(1, 1, figsize=(9,5),subplot_kw=dict(projection=ccrs.Robinson(central_longitude=180)))\n", - "\n", - "# cax = (ds['tas'] - 273.15).plot(\n", - "# ax=axis,transform=ccrs.PlateCarree(), \n", - "# vmin = -60,\n", - "# vmax = 60,\n", - "# cmap = 'RdBu_r',\n", - "# add_colorbar=False\n", - "# )\n", - "# cbar = fig.colorbar(cax, ticks=[-60,-30, 0, 30,60])\n", - "# axis.coastlines() # cartopy function\n", - "\n", - "# axis.set_title(f\"{model} CO2{co2}\",fontsize=20)\n", - "\n", - "# os.makedirs(f'results/paleo/{period}_maps',exist_ok=True)\n", - "# fig.savefig(f'results/paleo/{period}_maps/{model}_{float(co2[1:])}.png', bbox_inches='tight', pad_inches=0.1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c8478718", - "metadata": {}, - "outputs": [], - "source": [ - "# period = 'pliocene' # run this for each non eocene period\n", - "# for file in pliocene_models:\n", - "# model = file.split('/')[8]\n", - "\n", - "# ds = xr.open_dataset(file)\n", - "\n", - "# fig, axis = plt.subplots(1, 1, figsize=(9,5),subplot_kw=dict(projection=ccrs.Robinson(central_longitude=180)))\n", - "\n", - "# cax = (ds['tas'] - 273.15).plot(\n", - "# ax=axis,transform=ccrs.PlateCarree(), \n", - "# vmin = -60,\n", - "# vmax = 60,\n", - "# cmap = 'RdBu_r',\n", - "# add_colorbar=False\n", - "# )\n", - "# cbar = fig.colorbar(cax, ticks=[-60,-30, 0, 30,60])\n", - "# axis.coastlines() # cartopy function\n", - "\n", - "# axis.set_title(model,fontsize=20)\n", - "\n", - "# os.makedirs(f'results/paleo/{period}_maps',exist_ok=True)\n", - "# fig.savefig(f'results/paleo/{period}_maps/{model}.png', bbox_inches='tight', pad_inches=0.1)" - ] - }, - { - "cell_type": "markdown", - "id": "1cd2797a", - "metadata": {}, - "source": [ - "for seasonal plots, do zonal means and make csv." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8626505d", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(\"https://cmip6.storage.googleapis.com/pangeo-cmip6.csv\")\n", - "piC_tas_df = df[(df['activity_id'] == 'CMIP') & (df['experiment_id'] == 'piControl')& (df['table_id'] == 'Amon')& (df['variable_id'] == 'tas')]#['table_id'].unique()\n", - "\n", - "piC_eocene = glob.glob(f'{root_path}/deepmip/dap.ceda.ac.uk/badc/cmip6/data/CMIP6Plus/DeepMIP/deepmip-eocene-p1/*/*/*-PI/*/climatology/*')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dc4ef903", - "metadata": {}, - "outputs": [], - "source": [ - "### MONTHLY AVERAGES FOR EVERY MODEL AND TIME PERIOD ####\n", - "regions = {\n", - " 'global':[-90,90],\n", - " 'northern_hemisphere':[0,90],\n", - " 'tropics':[-30,30],\n", - " 'southern_hemisphere':[-90,0],\n", - "}\n", - "\n", - "monthly_files = glob.glob(f'{root_path}/*/*_tas_monthly.nc')\n", - "_results = []\n", - "\n", - "for region, bnds in regions.items():\n", - " for file in monthly_files:\n", - " model = file.split('/')[8]\n", - " period = file.split('/')[9].split('_')[0]\n", - "\n", - " ds = xr.open_dataset(file)\n", - " \n", - " ds_slice = ds.sel(lat = slice(bnds[0],bnds[1]))\n", - " ds_zmean = ds_slice['tas'].weighted(ds_slice['weight'].fillna(0)).mean(dim=['lat','lon'])\n", - "\n", - " # get pi control to calculate anomaly\n", - " piC_ds = xr.open_zarr(piC_tas_df[piC_tas_df['source_id'] == model].iloc[0]['zstore'],chunks={})\n", - " piC_zmean = piC_ds.sel(lat = slice(bnds[0],bnds[1]))['tas'].weighted(ds_slice['weight'].fillna(0)).mean(dim=['lat','lon'])\n", - " piC_zmean = piC_zmean.groupby('time.month').mean()\n", - " \n", - " df_zmean = ds_zmean.to_dataframe().reset_index()\n", - " df_zmean = df_zmean.merge(piC_zmean.to_dataframe().reset_index()[['month','tas']].rename(columns={'tas':'tas_pi'}),on='month')\n", - "\n", - " df_zmean['model'] = model\n", - " df_zmean['period'] = period\n", - " df_zmean['region'] = region\n", - " df_zmean['co2_exp'] = np.nan\n", - " _results.append(df_zmean)\n", - "\n", - " for file in eocene_models:\n", - " model = file.split('/')[17]\n", - " co2 = file.split('/')[18].split('-')[-1]\n", - " pi_file = [x for x in piC_eocene if model in x]\n", - "\n", - " ds = xr.open_dataset(file)\n", - " ds_pi = xr.open_dataset(pi_file[0])\n", - " \n", - " if \"t\" in ds.dims:\n", - " ds = ds.rename({'t':'time','latitude':'lat','longitude':'lon'})\n", - " ds_pi = ds_pi.rename({'t':'time','latitude':'lat','longitude':'lon'})\n", - " if \"time_counter\" in ds.dims:\n", - " ds = ds.rename({'time_counter':'time'})\n", - " ds_pi = ds_pi.rename({'time_counter':'time'})\n", - "\n", - " ds = ds.sortby('lat')\n", - " ds_pi = ds_pi.sortby('lat')\n", - " \n", - " ds = ds.groupby('time.month').mean()\n", - " ds_pi = ds_pi.groupby('time.month').mean()\n", - "\n", - " ds_slice = ds.sel(lat = slice(bnds[0],bnds[1]))\n", - " ds_pi_slice = ds_pi.sel(lat = slice(bnds[0],bnds[1]))\n", - "\n", - " weights = np.cos(np.deg2rad(ds_slice.lat))\n", - " weights = weights.expand_dims({\"lon\": ds_slice.lon})\n", - "\n", - " ds_zmean = ds_slice['tas'].weighted(weights.fillna(0)).mean(dim=['lat','lon'])\n", - " ds_pi_zmean = ds_pi_slice['tas'].weighted(weights.fillna(0)).mean(dim=['lat','lon'])\n", - "\n", - " df_zmean = ds_zmean.to_dataframe().reset_index()\n", - " df_zmean = df_zmean.merge(ds_pi_zmean.to_dataframe().reset_index()[['month','tas']].rename(columns={'tas':'tas_pi'}),on='month')\n", - "\n", - " df_zmean['model'] = model\n", - " df_zmean['period'] = 'eocene'\n", - " df_zmean['region'] = region\n", - " df_zmean['co2_exp'] = co2\n", - " _results.append(df_zmean)\n", - "\n", - "seasonal_paleo_results = pd.concat(_results)\n", - "# seasonal_paleo_results[(seasonal_paleo_results['period'] == 'lig127k') & (seasonal_paleo_results['region'] == 'global')].pivot_table(index='month',columns='model',values = 'tas').plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b39a0f97", - "metadata": {}, - "outputs": [], - "source": [ - "seasonal_paleo_results['co2_exp'] = seasonal_paleo_results['co2_exp'].str[1:].astype(float)\n", - "\n", - "seasonal_paleo_results['tas_anom'] = seasonal_paleo_results['tas'] - seasonal_paleo_results['tas_pi']\n", - "seasonal_paleo_results['tas'] = seasonal_paleo_results['tas'] - 273.15\n", - "seasonal_paleo_results['tas_pi'] = seasonal_paleo_results['tas_pi'] - 273.15\n", - "\n", - "seasonal_obs = pd.read_csv('paleo_observations/processed/monthly_mean_zonal_obs.csv').drop(columns=['Unnamed: 0'])\n", - "seasonal_obs = seasonal_obs[['month','period','region','tas_anom','error']].rename(columns={'tas_anom':'tas_obs'})\n", - "monthly_zonal_means = seasonal_paleo_results.merge(seasonal_obs,on=['month','period','region'],how='outer')\n", - "monthly_zonal_means['mae'] = (monthly_zonal_means['tas_anom'] - monthly_zonal_means['tas_obs']).abs()\n", - "monthly_zonal_means.drop(columns=['height']).to_csv('../results/paleo/monthly_zonal_means.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "44c681aa", - "metadata": {}, - "outputs": [], - "source": [ - "### ANNUAL AVERAGES FOR EVERY MODEL AND TIME PERIOD ####\n", - "\n", - "df = pd.read_csv(\"https://cmip6.storage.googleapis.com/pangeo-cmip6.csv\")\n", - "piC_tas_df = df[(df['activity_id'] == 'CMIP') & (df['experiment_id'] == 'piControl')& (df['table_id'] == 'Amon')& (df['variable_id'] == 'tas')]#['table_id'].unique()\n", - "\n", - "annual_files = glob.glob(f'{root_path}/*/*_tas_annual.nc')\n", - "_results = []\n", - "\n", - "for file in annual_files:\n", - " model = file.split('/')[8]\n", - " period = file.split('/')[9].split('_')[0]\n", - "\n", - " ds = xr.open_dataset(file)\n", - "\n", - " # get pi control to calculate anomaly\n", - " piC_ds = xr.open_zarr(piC_tas_df[piC_tas_df['source_id'] == model].iloc[0]['zstore'],chunks={})\n", - " ds_anom = ds - piC_ds.mean(dim='time').drop_vars(['height','lat_bnds','lon_bnds','time_bnds'],errors='ignore')\n", - "\n", - " ds_zmean = ds_anom['tas'].weighted(ds['weight'].fillna(0)).mean(dim=['lat','lon'])\n", - "\n", - " df_zmean = pd.DataFrame({\n", - " 'tas':[ds_zmean.values.tolist()],\n", - " 'model':[model],\n", - " 'period':[period],\n", - " 'region':['global'],\n", - " })\n", - " _results.append(df_zmean)\n", - "\n", - "annual_paleo_results = pd.concat(_results)\n", - "\n", - "annual_paleo_results['period_idx'] = 1\n", - "annual_paleo_results.loc[annual_paleo_results['period'] == 'lig127k','period_idx'] = 2\n", - "annual_paleo_results.loc[annual_paleo_results['period'] == 'lgm','period_idx'] = 3\n", - "annual_paleo_results.loc[annual_paleo_results['period'] == 'midHolocene','period_idx'] = 4\n", - "\n", - "\n", - "# get eocene annual results\n", - "eeco_files = glob.glob('paleo_data_cache/deepmip/dap.ceda.ac.uk/badc/cmip6/data/CMIP6Plus/DeepMIP/deepmip-eocene-p1/*/*/*/*/*/*')\n", - "\n", - "eeco_models_df = pd.concat([\n", - " pd.DataFrame(eeco_files).rename(columns={0:'local_path'}),\n", - " pd.DataFrame(eeco_files)[0].str.split('/',expand=True)[[17,18]].rename(columns={17:'model',18:'experiment'})\n", - "],axis=1)\n", - "\n", - "eeco_models_df['experiment_id'] = eeco_models_df['experiment'].str[-1]\n", - "eeco_models_df.loc[eeco_models_df['experiment_id'] == 'I','experiment_id'] = 0\n", - "eeco_models_df['experiment_id'] = eeco_models_df['experiment_id'].astype(int)\n", - "\n", - "eeco_exp_models = eeco_models_df[~(eeco_models_df['experiment_id'] == 0)] \n", - "\n", - "_results = []\n", - "for row in eeco_exp_models.itertuples(index=False):\n", - " model = row.model\n", - " eeco_path = row.local_path\n", - " pi_path = eeco_models_df[(eeco_models_df['model'] == model) & (eeco_models_df['experiment_id'] == 0)].iloc[0]['local_path']\n", - "\n", - " ds_piC = xr.open_dataset(pi_path,chunks={},decode_times=False)\n", - " ds_eeco = xr.open_dataset(eeco_path,chunks={},decode_times=False)\n", - "\n", - "\n", - " if \"t\" in ds_piC.dims:\n", - " ds_piC = ds_piC.rename({'t':'time','latitude':'lat','longitude':'lon'})\n", - " ds_eeco = ds_eeco.rename({'t':'time','latitude':'lat','longitude':'lon'})\n", - " if \"time_counter\" in ds_piC.dims:\n", - " ds_piC = ds_piC.rename({'time_counter':'time'})\n", - " ds_eeco = ds_eeco.rename({'time_counter':'time'})\n", - "\n", - " ds_piC = ds_piC.mean(dim='time').drop_vars(['height','lat_bnds','lon_bnds'],errors='ignore')\n", - " ds_eeco = ds_eeco.mean(dim='time').drop_vars(['height','lat_bnds','lon_bnds'],errors='ignore')\n", - "\n", - " ds_anom = ds_eeco - ds_piC\n", - "\n", - " weights = np.cos(np.deg2rad(ds_anom.lat))\n", - " weights = weights.expand_dims({\"lon\": ds_anom.lon})\n", - " weights.name = 'areacella'\n", - "\n", - " zmean = ds_anom.weighted(weights.fillna(0)).mean().compute()\n", - "\n", - " df_zmean = pd.DataFrame({\n", - " 'tas':[zmean['tas'].values.tolist()],\n", - " 'model':[model],\n", - " 'period':['eocene'],\n", - " 'region':['global'],\n", - " 'co2_exp':[row.experiment_id],\n", - " })\n", - " _results.append(df_zmean)\n", - "annual_eocene_results = pd.concat(_results)\n", - "annual_eocene_results['period_idx'] = 0\n", - "\n", - "annual_paleo_results = pd.concat([\n", - " annual_eocene_results,\n", - " annual_paleo_results\n", - "])\n", - "annual_paleo_results = annual_paleo_results.rename(columns={'tas':'tas_anom'})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a636d523", - "metadata": {}, - "outputs": [], - "source": [ - "# replicate IPCC fig 7.19\n", - "paleo_sat_avgs = pd.read_csv('paleo_observations/processed/annual_mean_global_obs.csv')\n", - "\n", - "fig, ax = plt.subplots()\n", - "\n", - "ax.errorbar(np.arange(1,6), paleo_sat_avgs['tas_anom'], yerr=paleo_sat_avgs['error'], fmt='o',label='Observations')\n", - "plt.scatter(x=annual_paleo_results['period_idx'] + 1.2,y=annual_paleo_results['tas_anom'],label='Models',color='orange')\n", - "\n", - "ax.set_xticks(np.arange(1,6))\n", - "ax.set_xticklabels(paleo_sat_avgs['period'],rotation=45);\n", - "ax.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86b0ec03", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "mae_results = annual_paleo_results.merge(paleo_sat_avgs[['period','tas_anom']].rename(columns={'tas_anom':'tas_obs'}))\n", - "mae_results['mae'] = (mae_results['tas_anom'] - mae_results['tas_obs']).abs()\n", - "# add normalized MAE\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4ce976e", - "metadata": {}, - "outputs": [], - "source": [ - "eocene_median_mae = mae_results[['period','co2_exp','mae']].groupby(['period','co2_exp']).median().reset_index().rename(columns={'mae':'median_mae'})\n", - "\n", - "non_eocene_median_mae = mae_results[['period','mae']].groupby('period').median().reset_index().rename(columns={'mae':'median_mae'})\n", - "non_eocene_median_mae = non_eocene_median_mae[non_eocene_median_mae['period'] != 'eocene']\n", - "non_eocene_median_mae['co2_exp'] = np.nan\n", - "\n", - "mae_results = mae_results.merge(\n", - " pd.concat([eocene_median_mae,non_eocene_median_mae]),\n", - " on=['period','co2_exp'],\n", - " how='outer'\n", - ")\n", - "\n", - "mae_results['nmae'] = mae_results['mae'] / mae_results['median_mae']\n", - "\n", - "mae_results.to_csv('../results/paleo/annual_zonal_means.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2dbfe586", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "backend_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/paleo_scrips/paleo_data_cache/ACCESS-ESM1-5/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/ACCESS-ESM1-5/lig127k_wget_script.sh deleted file mode 100755 index ca34002..0000000 --- a/paleo_scrips/paleo_data_cache/ACCESS-ESM1-5/lig127k_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 15:54:20 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.lig127k.r1i1p1f1.Amon.tas.gn.v20191206|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/ACCESS-ESM1-5/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/ACCESS-ESM1-5/midHolocene_wget_script.sh deleted file mode 100755 index a8bc1fb..0000000 --- a/paleo_scrips/paleo_data_cache/ACCESS-ESM1-5/midHolocene_wget_script.sh +++ /dev/null @@ -1,1041 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:37:46 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.midHolocene.r1i1p1f1.Amon.tas.gn.v20210422|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.midHolocene.r1i1p1f1.Amon.tas.gn.v20210422|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/lgm_wget_script.sh b/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/lgm_wget_script.sh deleted file mode 100644 index d54102c..0000000 --- a/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/lgm_wget_script.sh +++ /dev/null @@ -1,394 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/08 13:37:04 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.lgm.r1i1p1f1.Amon.tas.gn.v20200212|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 20 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/lig127k_wget_script.sh deleted file mode 100755 index 0d76f4b..0000000 --- a/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/lig127k_wget_script.sh +++ /dev/null @@ -1,394 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 15:50:08 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.lig127k.r1i1p1f1.Amon.tas.gn.v20200212|esgf-node.ornl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 20 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/midHolocene_wget_script.sh deleted file mode 100755 index b0c5f1f..0000000 --- a/paleo_scrips/paleo_data_cache/AWI-ESM-1-1-LR/midHolocene_wget_script.sh +++ /dev/null @@ -1,1060 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:36:57 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.midHolocene.r1i1p1f1.Amon.tas.gn.v20200212|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.midHolocene.r1i1p1f1.Amon.tas.gn.v20200212|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/CESM2-FV2/lgm_wget_script.sh b/paleo_scrips/paleo_data_cache/CESM2-FV2/lgm_wget_script.sh deleted file mode 100755 index c9ba600..0000000 --- a/paleo_scrips/paleo_data_cache/CESM2-FV2/lgm_wget_script.sh +++ /dev/null @@ -1,384 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/08 13:37:19 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCAR.CESM2-FV2.lgm.r1i2p2f1.Amon.tas.gn.v20220915|esgf-node.ornl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 10 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/CESM2-WACCM-FV2/lgm_wget_script.sh b/paleo_scrips/paleo_data_cache/CESM2-WACCM-FV2/lgm_wget_script.sh deleted file mode 100755 index ba6ac4d..0000000 --- a/paleo_scrips/paleo_data_cache/CESM2-WACCM-FV2/lgm_wget_script.sh +++ /dev/null @@ -1,376 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/08 13:40:33 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCAR.CESM2-WACCM-FV2.lgm.r1i2p2f1.Amon.tas.gn.v20220915|esgf-data.ucar.edu' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 2 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/CESM2/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/CESM2/lig127k_wget_script.sh deleted file mode 100755 index 39cae4f..0000000 --- a/paleo_scrips/paleo_data_cache/CESM2/lig127k_wget_script.sh +++ /dev/null @@ -1,388 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 15:56:09 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCAR.CESM2.lig127k.r1i1p1f1.Amon.tas.gn.v20190923|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 14 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/CESM2/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/CESM2/midHolocene_wget_script.sh deleted file mode 100755 index 4df648e..0000000 --- a/paleo_scrips/paleo_data_cache/CESM2/midHolocene_wget_script.sh +++ /dev/null @@ -1,1054 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:39:22 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NCAR.CESM2.midHolocene.r1i1p1f1.Amon.tas.gn.v20190923|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NCAR.CESM2.midHolocene.r1i1p1f1.Amon.tas.gn.v20190923|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/CESM2/midPliocene-eoi400_wget_script.sh b/paleo_scrips/paleo_data_cache/CESM2/midPliocene-eoi400_wget_script.sh deleted file mode 100755 index 2df4f04..0000000 --- a/paleo_scrips/paleo_data_cache/CESM2/midPliocene-eoi400_wget_script.sh +++ /dev/null @@ -1,398 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/07 19:21:42 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCAR.CESM2.midPliocene-eoi400.r1i1p1f1.Amon.tas.gn.v20200110|esgf-node.ornl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 24 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/EC-Earth3-LR/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/EC-Earth3-LR/midHolocene_wget_script.sh deleted file mode 100755 index 98eb639..0000000 --- a/paleo_scrips/paleo_data_cache/EC-Earth3-LR/midHolocene_wget_script.sh +++ /dev/null @@ -1,1243 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:36:25 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.midHolocene.r1i1p1f1.Amon.tas.gr.v20200409|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.midHolocene.r1i1p1f1.Amon.tas.gr.v20200409|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/EC-Earth3-LR/midPliocene-eoi400_wget_script.sh b/paleo_scrips/paleo_data_cache/EC-Earth3-LR/midPliocene-eoi400_wget_script.sh deleted file mode 100755 index b4cf704..0000000 --- a/paleo_scrips/paleo_data_cache/EC-Earth3-LR/midPliocene-eoi400_wget_script.sh +++ /dev/null @@ -1,574 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/07 19:19:36 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.midPliocene-eoi400.r1i1p1f1.Amon.tas.gr.v20200322|esgf-node.ornl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 200 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/FGOALS-f3-L/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/FGOALS-f3-L/lig127k_wget_script.sh deleted file mode 100755 index 15b76bb..0000000 --- a/paleo_scrips/paleo_data_cache/FGOALS-f3-L/lig127k_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 17:03:11 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.CAS.FGOALS-f3-L.lig127k.r1i1p1f1.Amon.tas.gr.v20191025|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/FGOALS-f3-L/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/FGOALS-f3-L/midHolocene_wget_script.sh deleted file mode 100755 index c2730ed..0000000 --- a/paleo_scrips/paleo_data_cache/FGOALS-f3-L/midHolocene_wget_script.sh +++ /dev/null @@ -1,1041 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:37:23 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.CAS.FGOALS-f3-L.midHolocene.r1i1p1f1.Amon.tas.gr.v20191025|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.CAS.FGOALS-f3-L.midHolocene.r1i1p1f1.Amon.tas.gr.v20191025|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/FGOALS-g3/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/FGOALS-g3/lig127k_wget_script.sh deleted file mode 100755 index 09ae103..0000000 --- a/paleo_scrips/paleo_data_cache/FGOALS-g3/lig127k_wget_script.sh +++ /dev/null @@ -1,424 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 15:51:12 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.CAS.FGOALS-g3.lig127k.r1i1p1f1.Amon.tas.gn.v20191030|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 50 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/FGOALS-g3/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/FGOALS-g3/midHolocene_wget_script.sh deleted file mode 100755 index a51e377..0000000 --- a/paleo_scrips/paleo_data_cache/FGOALS-g3/midHolocene_wget_script.sh +++ /dev/null @@ -1,1090 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:37:33 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.CAS.FGOALS-g3.midHolocene.r1i1p1f1.Amon.tas.gn.v20191024|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.CAS.FGOALS-g3.midHolocene.r1i1p1f1.Amon.tas.gn.v20191024|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/GISS-E2-1-G/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/GISS-E2-1-G/lig127k_wget_script.sh deleted file mode 100755 index 6bbbe00..0000000 --- a/paleo_scrips/paleo_data_cache/GISS-E2-1-G/lig127k_wget_script.sh +++ /dev/null @@ -1,378 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 15:55:17 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.lig127k.r1i1p1f1.Amon.tas.gn.v20190916|dpesgf03.nccs.nasa.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 4 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/GISS-E2-1-G/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/GISS-E2-1-G/midHolocene_wget_script.sh deleted file mode 100755 index b97067d..0000000 --- a/paleo_scrips/paleo_data_cache/GISS-E2-1-G/midHolocene_wget_script.sh +++ /dev/null @@ -1,1042 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:38:58 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.midHolocene.r1i1p1f1.Amon.tas.gn.v20190916|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.midHolocene.r1i1p1f1.Amon.tas.gn.v20190916|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/GISS-E2-1-G/midPliocene-eoi400_wget_script.sh b/paleo_scrips/paleo_data_cache/GISS-E2-1-G/midPliocene-eoi400_wget_script.sh deleted file mode 100755 index c2eb056..0000000 --- a/paleo_scrips/paleo_data_cache/GISS-E2-1-G/midPliocene-eoi400_wget_script.sh +++ /dev/null @@ -1,376 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/07 19:21:17 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.midPliocene-eoi400.r1i1p1f1.Amon.tas.gn.v20190626|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 2 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/lig127k_wget_script.sh deleted file mode 100755 index ff1c9ba..0000000 --- a/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/lig127k_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 16:33:46 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NERC.HadGEM3-GC31-LL.lig127k.r1i1p1f1.Amon.tas.gn.v20210114|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/midHolocene_wget_script.sh deleted file mode 100755 index 09258a5..0000000 --- a/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/midHolocene_wget_script.sh +++ /dev/null @@ -1,1041 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:39:43 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NERC.HadGEM3-GC31-LL.midHolocene.r1i1p1f1.Amon.tas.gn.v20210111|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NERC.HadGEM3-GC31-LL.midHolocene.r1i1p1f1.Amon.tas.gn.v20210111|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/midPliocene-eoi400_wget_script.sh b/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/midPliocene-eoi400_wget_script.sh deleted file mode 100755 index 01560df..0000000 --- a/paleo_scrips/paleo_data_cache/HadGEM3-GC31-LL/midPliocene-eoi400_wget_script.sh +++ /dev/null @@ -1,376 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/07 19:22:16 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NERC.HadGEM3-GC31-LL.midPliocene-eoi400.r1i1p1f1.Amon.tas.gn.v20201222|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 2 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/INM-CM4-8/lgm_wget_script.sh b/paleo_scrips/paleo_data_cache/INM-CM4-8/lgm_wget_script.sh deleted file mode 100755 index 39ae6cf..0000000 --- a/paleo_scrips/paleo_data_cache/INM-CM4-8/lgm_wget_script.sh +++ /dev/null @@ -1,376 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/08 13:37:08 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.INM.INM-CM4-8.lgm.r1i1p1f1.Amon.tas.gr1.v20190802|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 2 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/INM-CM4-8/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/INM-CM4-8/lig127k_wget_script.sh deleted file mode 100755 index 9faf3aa..0000000 --- a/paleo_scrips/paleo_data_cache/INM-CM4-8/lig127k_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 17:04:35 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.INM.INM-CM4-8.lig127k.r1i1p1f1.Amon.tas.gr1.v20190802|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/INM-CM4-8/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/INM-CM4-8/midHolocene_wget_script.sh deleted file mode 100755 index 4180a91..0000000 --- a/paleo_scrips/paleo_data_cache/INM-CM4-8/midHolocene_wget_script.sh +++ /dev/null @@ -1,1042 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:38:02 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.INM.INM-CM4-8.midHolocene.r1i1p1f1.Amon.tas.gr1.v20190802|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.INM.INM-CM4-8.midHolocene.r1i1p1f1.Amon.tas.gr1.v20190802|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/lig127k_wget_script.sh deleted file mode 100755 index 7d05461..0000000 --- a/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/lig127k_wget_script.sh +++ /dev/null @@ -1,378 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 16:36:28 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.lig127k.r1i1p1f1.Amon.tas.gr.v20180926|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 4 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/midHolocene_wget_script.sh deleted file mode 100755 index 87373d6..0000000 --- a/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/midHolocene_wget_script.sh +++ /dev/null @@ -1,1044 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:38:14 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f1.Amon.tas.gr.v20180926|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f1.Amon.tas.gr.v20180926|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/midPliocene-eoi400_wget_script.sh b/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/midPliocene-eoi400_wget_script.sh deleted file mode 100755 index db05ccd..0000000 --- a/paleo_scrips/paleo_data_cache/IPSL-CM6A-LR/midPliocene-eoi400_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/07 19:21:07 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midPliocene-eoi400.r1i1p1f1.Amon.tas.gr.v20190118|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/MIROC-ES2L/lgm_wget_script.sh b/paleo_scrips/paleo_data_cache/MIROC-ES2L/lgm_wget_script.sh deleted file mode 100644 index 74d4a4d..0000000 --- a/paleo_scrips/paleo_data_cache/MIROC-ES2L/lgm_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/08 13:37:10 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.MIROC.MIROC-ES2L.lgm.r1i1p1f2.Amon.tas.gn.v20191002|esgf-node.ornl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/MPI-ESM1-2-LR/lgm_wget_script.sh b/paleo_scrips/paleo_data_cache/MPI-ESM1-2-LR/lgm_wget_script.sh deleted file mode 100755 index c2327b4..0000000 --- a/paleo_scrips/paleo_data_cache/MPI-ESM1-2-LR/lgm_wget_script.sh +++ /dev/null @@ -1,399 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/08 13:37:15 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.MPI-M.MPI-ESM1-2-LR.lgm.r1i1p1f1.Amon.tas.gn.v20190710|esgf-node.ornl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 25 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/MPI-ESM1-2-LR/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/MPI-ESM1-2-LR/midHolocene_wget_script.sh deleted file mode 100755 index 220c062..0000000 --- a/paleo_scrips/paleo_data_cache/MPI-ESM1-2-LR/midHolocene_wget_script.sh +++ /dev/null @@ -1,1065 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:38:34 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.MPI-M.MPI-ESM1-2-LR.midHolocene.r1i1p1f1.Amon.tas.gn.v20190710|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.MPI-M.MPI-ESM1-2-LR.midHolocene.r1i1p1f1.Amon.tas.gn.v20190710|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/MRI-ESM2-0/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/MRI-ESM2-0/midHolocene_wget_script.sh deleted file mode 100755 index df08f5b..0000000 --- a/paleo_scrips/paleo_data_cache/MRI-ESM2-0/midHolocene_wget_script.sh +++ /dev/null @@ -1,1041 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:38:45 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.MRI.MRI-ESM2-0.midHolocene.r1i1p1f1.Amon.tas.gn.v20190919|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.MRI.MRI-ESM2-0.midHolocene.r1i1p1f1.Amon.tas.gn.v20190919|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NESM3/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/NESM3/lig127k_wget_script.sh deleted file mode 100755 index 883e943..0000000 --- a/paleo_scrips/paleo_data_cache/NESM3/lig127k_wget_script.sh +++ /dev/null @@ -1,375 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 16:34:44 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NUIST.NESM3.lig127k.r1i1p1f1.Amon.tas.gn.v20190909|esg.lasg.ac.cn' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 1 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NESM3/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/NESM3/midHolocene_wget_script.sh deleted file mode 100755 index da91d6f..0000000 --- a/paleo_scrips/paleo_data_cache/NESM3/midHolocene_wget_script.sh +++ /dev/null @@ -1,1041 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:36:44 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NUIST.NESM3.midHolocene.r1i1p1f1.Amon.tas.gn.v20190813|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NUIST.NESM3.midHolocene.r1i1p1f1.Amon.tas.gn.v20190813|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NorESM1-F/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/NorESM1-F/lig127k_wget_script.sh deleted file mode 100755 index ae3ad57..0000000 --- a/paleo_scrips/paleo_data_cache/NorESM1-F/lig127k_wget_script.sh +++ /dev/null @@ -1,394 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 15:57:19 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCC.NorESM1-F.lig127k.r1i1p1f1.Amon.tas.gn.v20190920|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 20 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NorESM1-F/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/NorESM1-F/midHolocene_wget_script.sh deleted file mode 100755 index f4b17e5..0000000 --- a/paleo_scrips/paleo_data_cache/NorESM1-F/midHolocene_wget_script.sh +++ /dev/null @@ -1,1060 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:22:42 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NCC.NorESM1-F.midHolocene.r1i1p1f1.Amon.tas.gn.v20190920|esgf3.dkrz.de -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NCC.NorESM1-F.midHolocene.r1i1p1f1.Amon.tas.gn.v20190920|esgf3.dkrz.de' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NorESM1-F/midPliocene-eoi400_wget_script.sh b/paleo_scrips/paleo_data_cache/NorESM1-F/midPliocene-eoi400_wget_script.sh deleted file mode 100755 index 3c8e728..0000000 --- a/paleo_scrips/paleo_data_cache/NorESM1-F/midPliocene-eoi400_wget_script.sh +++ /dev/null @@ -1,394 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/07 19:21:52 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCC.NorESM1-F.midPliocene-eoi400.r1i1p1f1.Amon.tas.gn.v20190920|eagle.alcf.anl.gov' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 20 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NorESM2-LM/lig127k_wget_script.sh b/paleo_scrips/paleo_data_cache/NorESM2-LM/lig127k_wget_script.sh deleted file mode 100755 index a569920..0000000 --- a/paleo_scrips/paleo_data_cache/NorESM2-LM/lig127k_wget_script.sh +++ /dev/null @@ -1,384 +0,0 @@ -#!/bin/bash -############################################################################## -# ESGF wget download script -# -# Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 16:32:38 -# Search URL: http://nimbus.llnl.gov/wget -# Request method: POST -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=0.4 -CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' -request_method='POST' -url_params=( - 'CMIP6.PMIP.NCC.NorESM2-LM.lig127k.r1i1p1f1.Amon.tas.gn.v20191108|esgf-data04.diasjp.net' -) - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts 'F:w:iuUnSpdvqh' OPT; do - case $OPT in - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - S) skip_checksum=1;; # : Skip file checksum - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - post_data=$(IFS="&" ; echo "${url_params[*]}") - new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - $wget -O "$file" $url || { failed=1; break; } - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if ((skip_checksum)); then - echo "Skipping check of file checksum" - break - fi - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - unset failed - fi - - done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -# -# MAIN -# - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -cat <<'EOF-MESSAGE' -Script created for 10 file(s) -(The count won't match if you manually edit this file!) - -EOF-MESSAGE -sleep 1 - -check_os - -download - -dedup_cache_ - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/NorESM2-LM/midHolocene_wget_script.sh b/paleo_scrips/paleo_data_cache/NorESM2-LM/midHolocene_wget_script.sh deleted file mode 100755 index 9c2dbd5..0000000 --- a/paleo_scrips/paleo_data_cache/NorESM2-LM/midHolocene_wget_script.sh +++ /dev/null @@ -1,1050 +0,0 @@ -#!/bin/bash -############################################################################## -# ESG Federation download script -# -# Template version: 1.2 -# Generated by esgf-data.dkrz.de - 2025/08/01 14:40:54 -# Search URL: https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NCC.NorESM2-LM.midHolocene.r1i1p1f1.Amon.tas.gn.v20191108|esgf.ceda.ac.uk -# -############################################################################### -# first be sure it's bash... anything out of bash or sh will break -# and the test will assure we are not using sh instead of bash -if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" - echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." - echo "Trying to recover automatically..." - sleep 1 - /bin/bash $0 $@ - exit $? -fi - -version=1.3.2 -CACHE_FILE=.$(basename $0).status -openId= -search_url='https://esgf-data.dkrz.de/esg-search/wget?dataset_id=CMIP6.PMIP.NCC.NorESM2-LM.midHolocene.r1i1p1f1.Amon.tas.gn.v20191108|esgf.ceda.ac.uk' - -#These are the embedded files to be downloaded -download_files="$(cat < 10#${ver2[i]})) - then - return 1 - fi - if ((10#${ver1[i]} < 10#${ver2[i]})) - then - return 2 - fi - done - return 0 -} - -check_commands() { - #check wget - local MIN_WGET_VERSION=1.10 - vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION - case $? in - 2) #lower - wget -V - echo - echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 - exit 1 - esac -} - -usage() { - echo "Usage: $(basename $0) [flags] [openid] [username]" - echo "Flags is one of:" - sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 - echo - echo "This command stores the states of the downloads in .$0.status" - echo "For more information check the website: http://esgf.org/wiki/ESGF_wget" -} - -#defaults -debug=0 -clean_work=1 - -#parse flags -while getopts ':c:pfF:o:w:isuUndvqhHI:T' OPT; do - case $OPT in - H) skip_security=1 && use_http_sec=1;; # : Authenticate with OpenID (username,) and password, without the need for a certificate. - T) force_TLSv1=1;; # : Forces wget to use TLSv1. - c) ESG_CREDENTIALS="$OPTARG";; # : use this certificate for authentication. - f) force=1;; # : force certificate retrieval (defaults to only once per day); for certificate-less authentication (see -H option), this flag will force login and refresh cookies. - F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) - o) openId="$OPTARG";; #: Provide OpenID instead of interactively asking for it. - I) username_supplied="$OPTARG";; # : Explicitly set user ID. By default, the user ID is extracted from the last component of the OpenID URL. Use this flag to override this behaviour. - w) output="$OPTARG";; # : Write embedded files into a file and exit - i) insecure=1;; # : set insecure mode, i.e. don't check server certificate - s) skip_security=1 && use_cookies_for_http_basic_auth_start=1;; # : completely skip security. It will only work if the accessed data is not secured at all. -- works only if the accessed data is unsecured or a certificate exists or cookies are saved (latter applies to -H option only). - u) update=1;; # : Issue the search again and see if something has changed. - U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) - n) dry_run=1;; # : Don't download any files, just report. - p) clean_work=0;; # : preserve data that failed checksum - d) verbose=1;debug=1;; # : display debug information - v) verbose=1;; # : be more verbose - q) quiet=1;; # : be less verbose - h) usage && exit 0;; # : displays this help - \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; - \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; - esac -done -shift $(($OPTIND - 1)) - -#setup input as desired by the user -if [[ "$input_file" ]]; then - if [[ "$input_file" == '-' ]]; then - download_files="$(cat)" #read from STDIN - exec 0$output - exit -fi - - -#assure we have everything we need -check_commands - -if ((update)); then - echo "Checking the server for changes..." - new_wget="$(wget "$search_url" -qO -)" - compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" - if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then - echo "No changes detected." - else - echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" - counter=0 - while [[ -f $0.old.$counter ]]; do ((counter++)); done - mv $0 $0.old.$counter - echo "$new_wget" > $0 - fi - exit 0 -fi - - -############################################################################## -check_java() { - if ! type java >& /dev/null; then - echo "Java could not be found." >&2 - return 1 - fi - if java -version 2>&1|grep openjdk >/dev/null; then - openjdk=1; - else - openjdk=0; - fi - jversion=($(jversion=$(java -version 2>&1 | awk '/version/ {gsub("\"","");print $3}'); echo ${jversion//./ })) - mVer=${jversion[1]} - if [ $openjdk -eq 1 ]; then - mVer=${jversion[0]} - if ((mVer<5)); then - echo "Openjdk detected. Version 9+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - else - - if ((mVer<5)); then - echo "Java version 1.5+ is required for retrieving the certificate." >&2 - echo "Current version seems older: $(java -version | head -n1) " >&2 - return 1 - fi - fi -} - -check_myproxy_logon() { - if ! type myproxy-logon >& /dev/null; then - echo "myproxy-logon could not be found." >&2 - return 1 - fi - echo "myproxy-logon found" >&2 -} - -proxy_to_java() { - local proxy_user proxy_pass proxy_server proxy_port - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$http_proxy) - local JAVA_PROXY= - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttp.proxyPort=$proxy_port" - eval $(sed 's#^\(https\?://\)\?\(\([^:@]*\)\(:\([^@]*\)\)\?@\)\?\([^:/]*\)\(:\([0-9]*\)\)\?.*#proxy_user=\3;proxy_pass=\5;proxy_server=\6;proxy_port=\8#'<<<$https_proxy) - [[ "$proxy_server" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyHost=$proxy_server" - [[ "$proxy_port" ]] && JAVA_PROXY=$JAVA_PROXY" -Dhttps.proxyPort=$proxy_port" - - echo "$JAVA_PROXY" -} - -# get certificates from github -get_certificates() { - # don't if this was already done today - [[ -z $force && "$(find $ESG_CERT_DIR -type d -mtime -1 2>/dev/null)" ]] && return 0 - echo -n "Retrieving Federation Certificates..." >&2 - - if ! wget -O $ESG_HOME/esg-truststore.ts --no-check-certificate https://github.com/ESGF/esgf-dist/raw/master/installer/certs/esg-truststore.ts; then - echo "Could not fetch esg-truststore"; - return 1 - fi - - if ! wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar -O - -q | tar x -C $ESG_HOME; then - #certificates tarred into esg_trusted_certificates. (if it breaks, let the user know why - wget --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/esg_trusted_certificates.tar - echo "Could't update certs!" >&2 - return 1 - else - #if here everythng went fine. Replace old cert with this ones - [[ -d $ESG_CERT_DIR ]] && rm -r $ESG_CERT_DIR || mkdir -p $(dirname $ESG_CERT_DIR) - mv $ESG_HOME/esg_trusted_certificates $ESG_CERT_DIR - touch $ESG_CERT_DIR - echo "done!" >&2 - fi - -} - -# Retrieve ESG credentials -unset pass -get_credentials() { - if check_java - then - use_java=1 - else - use_java=0 - echo "No suitable java for obtaining certificate - checking for myproxy-logon instead" >&2 - check_myproxy_logon || exit 1 - fi - #get all certificates - get_certificates - - if [[ -z "$(find $MYPROXY_GETCERT -type f -mtime -1 2>/dev/null)" ]]; then - echo -n "(Downloading $MYPROXY_GETCERT... " - mkdir -p $(dirname $MYPROXY_GETCERT) - if wget -q --no-check-certificate https://raw.githubusercontent.com/ESGF/esgf-dist/master/installer/certs/getcert.jar -O $MYPROXY_GETCERT;then - echo 'done)' - touch $MYPROXY_GETCERT - else - echo 'failed)' - fi - fi - - #if the user already defined one, use it - if [[ -z $openId ]]; then - #try to parse the last valid value if any - [[ -f "$MYPROXY_STATUS" ]] && openId=$(awk -F= '/^OpenID/ {gsub("\\\\", ""); print $2}' $MYPROXY_STATUS) - if [[ -z $openId ]]; then - #no OpenID, we need to ask the user - echo -n "Please give your OpenID (Example: https://myserver/example/username) ? " - else - #Allow the user to change it if desired - echo -n "Please give your OpenID (hit ENTER to accept default: $openId)? " - fi - read -e - [[ "$REPLY" ]] && openId="$REPLY" - else - ((verbose)) && echo "Using user defined OpenID $openId (to change use -o )" - fi - - if grep -q ceda.ac.uk <<<$openId; then - username=${openId##*/} - echo -n "Please give your username if different [$username]: " - read -e - [[ "$REPLY" ]] && username="$REPLY" - fi - - - - if [ $use_java -eq 1 ] - then - local args= - #get password - [[ ! "$pass" ]] && read -sp "MyProxy Password? " pass - [[ "$openId" ]] && args=$args" --oid $openId" - [[ "$pass" ]] && args=$args" -P $pass" - [[ "$username" ]] && args=$args" -l $username" - - echo -n $'\nRetrieving Credentials...' >&2 - if ! java $(proxy_to_java) -jar $MYPROXY_GETCERT $args --ca-directory $ESG_CERT_DIR --output $ESG_CREDENTIALS ; then - echo "Certificate could not be retrieved" - exit 1 - fi - echo "done!" >&2 - else - args=`openid_to_myproxy_args $openId $username` || exit 1 - if ! myproxy-logon $args -b -o $ESG_CREDENTIALS - then - echo "Certificate could not be retrieved" - exit 1 - fi - cp $HOME/.globus/certificates/* $ESG_CERT_DIR/ - fi -} - -openid_to_myproxy_args() { - python - </dev/null; then - #check openssl and certificate - if ! openssl x509 -checkend $CERT_EXPIRATION_WARNING -noout -in $ESG_CERT 2>/dev/null; then - echo "The certificate expires in less than $((CERT_EXPIRATION_WARNING / 60 / 60)) hour(s). Renewing..." - get_credentials - else - #ok, certificate is fine - return 0 - fi - fi -} - -# -# Detect ESG credentials -# -find_credentials() { - - #is X509_USER_PROXY or $HOME/.esg/credential.pem - if [[ -f "$ESG_CREDENTIALS" ]]; then - # file found, proceed. - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - elif [[ -f "$X509_USER_CERT" && -f "$X509_USER_KEY" ]]; then - # second try, use these certificates. - ESG_CERT="$X509_USER_CERT" - ESG_KEY="$X509_USER_KEY" - else - # If credentials are not present, just point to where they should go - echo "No ESG Credentials found in $ESG_CREDENTIALS" >&2 - ESG_CERT="$ESG_CREDENTIALS" - ESG_KEY="$ESG_CREDENTIALS" - #they will be retrieved later one - fi - - - #chek openssl and certificate - if (which openssl &>/dev/null); then - if ( openssl version | grep 'OpenSSL 1\.0' ); then - echo '** WARNING: ESGF Host certificate checking might not be compatible with OpenSSL 1.0+' - fi - check_cert || { (($?==1)); exit 1; } - fi - - if [[ $CHECK_SERVER_CERT == "Yes" ]]; then - [[ -d "$ESG_CERT_DIR" ]] || { echo "CA certs not found. Aborting."; exit 1; } - PKI_WGET_OPTS="--ca-directory=$ESG_CERT_DIR" - fi - - #some wget version complain if there's no file present - [[ -f $COOKIE_JAR ]] || touch $COOKIE_JAR - - PKI_WGET_OPTS="$PKI_WGET_OPTS --certificate=$ESG_CERT --private-key=$ESG_KEY --save-cookies=$COOKIE_JAR --load-cookies=$COOKIE_JAR --ca-certificate=$ESG_CERT" - -} - -check_chksum() { - local file="$1" - local chk_type=$2 - local chk_value=$3 - local local_chksum=Unknown - - case $chk_type in - md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; - sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; - *) echo "Can't verify checksum." && return 0;; - esac - - #verify - ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 - echo $local_chksum -} - -#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) -md5sum_() { - hash -r - if type md5sum >& /dev/null; then - echo $(md5sum $@) - else - echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') - fi -} - -#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) -sha256sum_() { - hash -r - if type sha256sum >& /dev/null; then - echo $(sha256sum $@) - elif type shasum >& /dev/null; then - echo $(shasum -a 256 $@) - else - echo $(sha2 -q -256 $@) - fi -} - -get_mod_time_() { - if ((MACOSX)); then - #on a mac modtime is stat -f %m - echo "$(stat -f %m $@)" - else - #on linux (cygwin) modtime is stat -c %Y - echo "$(stat -c %Y $@)" - fi - return 0; -} - -remove_from_cache() { - local entry="$1" - local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" - echo "$tmp_file" > "$CACHE_FILE" - unset cached -} - -#Download data from node using cookies and not certificates. -download_http_sec() -{ - #The data to be downloaded. - data=" $url" - filename="$file" - - #Wget args. - if ((insecure)) - then - wget_args=" --no-check-certificate --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - else - wget_args=" --ca-directory=$WGET_TRUSTED_CERTIFICATES --cookies=on --keep-session-cookies --save-cookies $COOKIES_FOLDER/wcookies.txt " - fi - - if ((use_cookies_for_http_basic_auth_start)) || ((use_cookies_for_http_basic_auth)) - then - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - fi - - if((force_TLSv1)) - then - wget_args=" $wget_args"" --secure-protocol=TLSv1 " - fi - - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - wget_args="$wget_args $ESGF_WGET_OPTS" - fi - - - #use cookies for the next downloads - use_cookies_for_http_basic_auth=1; - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "wget $wget_args $data\n" - fi - - - #Try to download the data. - command="wget $wget_args -O $filename $data" - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Extract orp service from url ? - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" == 1 )) && - if echo "$http_resp" | grep -q "/esg-orp/" - then - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - orp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - - #Use cookies for transaction with orp. - wget_args=" $wget_args"" --load-cookies $COOKIES_FOLDER/wcookies.txt" - - #Download data using either http basic auth or http login form. - if [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - download_http_sec_open_id - else - download_http_sec_decide_service - fi - else - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo "ERROR : http request to OpenID Relying Party service failed." - failed=1 - fi - fi -} - - -#Function that decides which implementaion of idp to use. -download_http_sec_decide_service() -{ - #find claimed id - - pos=$(echo "$openid_c" | egrep -o '/' | wc -l) - username_c=$(echo "$openid_c" | cut -d'/' -f "$(($pos + 1))") - esgf_uri=$(echo "$openid_c" | egrep -o '/esgf-idp/openid/') - - host=$(echo "$openid_c" | cut -d'/' -f 3) - #test ceda first. - - if [[ -z "$esgf_uri" ]] - then - openid_c_tmp="https://""$host""/openid/" - else - openid_c_tmp="https://""$host""/esgf-idp/openid/" - fi - - command="wget "$openid_c_tmp" --no-check-certificate ${force_TLSv1:+--secure-protocol=TLSv1} -O-" - - if [[ ! -z "$ESGF_WGET_OPTS" ]] - then - command="$command $ESGF_WGET_OPTS" - fi - - #Debug message. - if ((debug)) - then - echo -e "\nExecuting:\n" - echo -e "$command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - if echo "$http_resp" | grep -q "[application/xrds+xml]" \ - && echo "$http_resp" | grep -q "200 OK" \ - && (( cmd_exit_status == 0 )) - then - openid_c=$openid_c_tmp - download_http_sec_open_id - else - if [[ -z "$esgf_uri" ]] - then - echo "ERROR : HTTP request to OpenID Relying Party service failed." - failed=1 - else - download_http_sec_cl_id - fi - fi -} - - -download_http_sec_retry() -{ - echo -e "\nRetrying....\n" - #Retry in case that last redirect did not work, this happens with older version of wget. - command="wget $wget_args $data" - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - if echo "$http_resp" | grep -q "401 Unauthorized" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || echo "$http_resp" | grep -q "Connection timed out." \ - || echo "$http_resp" | grep -q "no-check-certificate" \ - || (( $cmd_exit_status != 0 )) - then - echo -e "\nERROR : Retry failed.\n" - #rm "$filename" - failed=1 - fi #if retry failed. -} - -#Function for downloading data using the claimed id. -download_http_sec_cl_id() -{ - #Http request for sending openid to the orp service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" $wget_args -O- https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - - #Extract orp service from openid ? - #Evaluate response.If redirected to idp service send the credentials. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( redirects == 2 )) && - if echo "$http_resp" | grep -q "login.htm" && (( cmd_exit_status == 0 )) - then - - urls=$(echo "$http_resp" | egrep -o 'https://[^ ]+' | cut -d'/' -f 3) - idp_service=$(echo "$urls" | tr '\n' ' ' | cut -d' ' -f 2) - - command="wget --post-data password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$idp_service/esgf-idp/idp/login.htm" - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "wget $command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 5 )) \ - if echo "$http_resp" | grep -q "text/html" \ - || echo "$http_resp" | grep -q "403: Forbidden" \ - || (( cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi - - else - echo "ERROR : HTTP request to OpenID Provider service failed." - failed=1 - fi #if redirected to idp. -} - - - -download_http_sec_open_id() -{ - #Http request for sending openid to the orp web service. - command="wget --post-data \"openid_identifier=$openid_c&rememberOpenid=on\" --header=\"esgf-idea-agent-type:basic_auth\" --http-user=\"$username_c\" --http-password=\"$password_c\" $wget_args ${quiet:+-q} ${quiet:--v} -O $filename https://$orp_service/esg-orp/j_spring_openid_security_check.htm " - - - #Debug message. - if ((debug)) - then - echo -e "Executing:\n" - echo -e "$command\n" - fi - - #Execution of command. - http_resp=$(eval $command 2>&1) - cmd_exit_status="$?" - - - if ((debug)) - then - echo -e "\nHTTP response:\n $http_resp\n" - fi - - #Evaluate response. - #redirects=$(echo "$http_resp" | egrep -c ' 302 ') - #(( "$redirects" != 7 )) || - if echo "$http_resp" | grep -q "text/html" || (( $cmd_exit_status != 0 )) - then - rm "$filename" - download_http_sec_retry - fi #if error during http basic authentication. - -} - - -download() { - wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v} -c ${force_TLSv1:+--secure-protocol=TLSv1} $PKI_WGET_OPTS" - - while read line - do - # read csv here document into proper variables - eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) - - #Process the file - echo -n "$file ..." - - #get the cached entry if any. - cached="$(grep -e "^$file" "$CACHE_FILE")" - - #if we have the cache entry but no file, clean it. - if [[ ! -f $file && "$cached" ]]; then - #the file was removed, clean the cache - remove_from_cache "$file" - unset cached - fi - - #check it wasn't modified - if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then - if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then - echo "Already downloaded and verified" - continue - elif ((update_files)); then - #user want's to overwrite newer files - rm $file - remove_from_cache "$file" - unset cached - else - #file on server is different from what we have. - echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" - continue - fi - fi - unset chksum_err_value chksum_err_count - - while : ; do - # (if we had the file size, we could check before trying to complete) - echo "Downloading" - [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" - if ((dry_run)); then - #all important info was already displayed, if in dry_run mode just abort - #No status will be stored - break - else - if ((use_http_sec)) - then - download_http_sec - if ((failed)) - then - break - fi - else - $wget -O "$file" $url || { failed=1; break; } - fi - fi - - #check if file is there - if [[ -f $file ]]; then - ((debug)) && echo file found - if [[ ! "$chksum" ]]; then - echo "Checksum not provided, can't verify file integrity" - break - fi - result_chksum=$(check_chksum "$file" $chksum_type $chksum) - if [[ "$result_chksum" != "$chksum" ]]; then - echo " $chksum_type failed!" - if ((clean_work)); then - if !((chksum_err_count)); then - chksum_err_value=$result_chksum - chksum_err_count=2 - elif ((checksum_err_count--)); then - if [[ "$result_chksum" != "$chksum_err_value" ]]; then - #this is a real transmission problem - chksum_err_value=$result_chksum - chksum_err_count=2 - fi - else - #ok if here we keep getting the same "different" checksum - echo "The file returns always a different checksum!" - echo "Contact the data owner to verify what is happening." - echo - sleep 1 - break - fi - - rm $file - #try again - echo -n " re-trying..." - continue - else - echo " don't use -p or remove manually." - fi - else - echo " $chksum_type ok. done!" - echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE - fi - fi - #done! - break - done - - if ((failed)); then - echo "download failed" - # most common failure is certificate expiration, so check this - #if we have the pasword we can retrigger download - ((!skip_security)) && [[ "$pass" ]] && check_cert - unset failed - fi - -done <<<"$download_files" - -} - -dedup_cache_() { - local file=${1:-${CACHE_FILE}} - ((debug)) && echo "dedup'ing cache ${file} ..." - local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) - ((DEBUG)) && echo "$tmp" - echo "$tmp" > $file - ((debug)) && echo "(cache dedup'ed)" -} - -http_basic_auth_func_info_message() -{ - echo "********************************************************************************" - echo "* *" - echo "* Note that new functionality to allow authentication without the need for *" - echo "* certificates is available with this version of the wget script. To enable, *" - echo "* use the \"-H\" option and enter your OpenID and password when prompted: *" - echo "* *" - echo "* $ "$(basename "$0")" -H [options...] *" - echo "* *" - echo "* For a full description of the available options use the help option: *" - echo "* *" - echo "* $ "$(basename "$0")" -h *" - echo "* *" - echo "********************************************************************************" -} - -# -# MAIN -# - -if ((!use_http_sec)) -then - http_basic_auth_func_info_message -fi - -echo "Running $(basename $0) version: $version" -((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" -echo "Use $(basename $0) -h for help."$'\n' - -((debug)) && cat< 1)) || (("$#" == 1)) ) - then - openid_c=$1 - else - read -p "Enter your openid : " openid_c - fi - - - #Read username. - if [[ ! -z "$username_supplied" ]] - then - username_c="$username_supplied" - elif (("$#" == 2)) - then - username_c=$2 - elif [[ "$openid_c" == */openid/ || "$openid_c" == */openid ]] - then - read -p "Enter username : " username_c - fi - - #Read password. - read -s -p "Enter password : " password_c - echo -e "\n" - - fi #use cookies - -fi #use_http_sec - - -#do we have old results? Create the file if not -[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE - -#clean the force parameter if here (at htis point we already have the certificate) -unset force - -download - -dedup_cache_ - - -echo "done" diff --git a/paleo_scrips/paleo_data_cache/paleo_data_cache.py b/paleo_scrips/paleo_data_cache/paleo_data_cache.py deleted file mode 100644 index 7e69f21..0000000 --- a/paleo_scrips/paleo_data_cache/paleo_data_cache.py +++ /dev/null @@ -1,263 +0,0 @@ -import argparse -import logging -import os -import sys -import glob -from pathlib import Path -from typing import List, Optional - -import numpy as np -import xarray as xr - - -def setup_logging(log_level: str = "INFO", log_file: Optional[str] = None) -> None: - """Set up logging configuration.""" - log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - - handlers = [logging.StreamHandler(sys.stdout)] - if log_file: - handlers.append(logging.FileHandler(log_file)) - - logging.basicConfig( - level=getattr(logging, log_level.upper()), - format=log_format, - handlers=handlers - ) - - -def find_model_folders(data_cache_dir: Path, paleo_period: str) -> List[str]: - """Find model folders containing wget scripts for the specified period.""" - search_pattern = data_cache_dir / "*" / f"{paleo_period}*.sh" - model_folders = glob.glob(str(search_pattern)) - - logging.info(f"Found {len(model_folders)} model folders for period '{paleo_period}'") - for folder in model_folders: - logging.debug(f"Model folder: {folder}") - - return model_folders - - -def download_data(wget_file: str) -> bool: - """Download data using wget script.""" - logging.info(f"Downloading data using script: {wget_file}") - - try: - # Make script executable - os.system(f"chmod +x {wget_file}") - # Execute download script - exit_code = os.system(f"{wget_file}") - - if exit_code == 0: - logging.info("Data download completed successfully") - return True - else: - logging.error(f"Download failed with exit code: {exit_code}") - return False - except Exception as e: - logging.error(f"Error during download: {e}") - return False - - -def load_netcdf_files(model_dir: str) -> Optional[xr.Dataset]: - """Load and merge NetCDF files with temperature data.""" - nc_files = glob.glob(f"{model_dir}tas*") - - if not nc_files: - logging.warning(f"No temperature files found in {model_dir}") - return None - - logging.info(f"Loading {len(nc_files)} NetCDF files") - - # Variables to drop if they exist - drop_vars = ["time_bnds", "lat_bnds", "lon_bnds", "height"] - - try: - # First attempt without cftime - ds = xr.open_mfdataset(nc_files, chunks={}).drop_vars(drop_vars, errors="ignore") - logging.info("Successfully loaded dataset without cftime") - except Exception as e: - logging.warning(f"Failed to load without cftime: {e}") - try: - # Second attempt with cftime for non-standard calendars - ds = xr.open_mfdataset(nc_files, use_cftime=True, chunks={}).drop_vars(drop_vars, errors="ignore") - logging.info("Successfully loaded dataset with cftime") - except Exception as e2: - logging.error(f"Failed to load dataset: {e2}") - return None - - return ds - - -def calculate_area_weights(ds: xr.Dataset) -> xr.Dataset: - """Calculate area weights based on latitude.""" - logging.info("Calculating area weights") - weights = np.cos(np.deg2rad(ds.lat)) - weights = weights.expand_dims({"lon": ds.lon}) - weights.name = "areacella" - return weights - - -def process_temperature_data(ds: xr.Dataset, model_dir: str, paleo_period: str) -> None: - """Process temperature data and save annual and monthly statistics.""" - logging.info("Processing temperature data") - - # Calculate area weights - weights = calculate_area_weights(ds) - - # Annual statistics - logging.info("Calculating annual mean and standard deviation") - ds_mean_annual = ds.mean(dim="time") - ds_std_annual = ds.std(dim="time").rename({"tas": "tas_std"}) - - annual_output = f"{model_dir}{paleo_period}_tas_annual.nc" - logging.info(f"Saving annual statistics to: {annual_output}") - xr.merge([ds_mean_annual, ds_std_annual, weights.to_dataset(name="weight")]).to_netcdf(annual_output) - - # Monthly statistics - logging.info("Calculating monthly mean and standard deviation") - ds_mean_mon = ds.groupby("time.month").mean() - ds_std_mon = ds.groupby("time.month").std().rename({"tas": "tas_std"}) - - monthly_output = f"{model_dir}{paleo_period}_tas_monthly.nc" - logging.info(f"Saving monthly statistics to: {monthly_output}") - xr.merge([ds_mean_mon, ds_std_mon, weights.to_dataset(name="weight")]).to_netcdf(monthly_output) - - -def cleanup_files(model_dir: str) -> None: - """Remove temporary NetCDF files after processing.""" - nc_files = glob.glob(f"{model_dir}tas*") - logging.info(f"Cleaning up {len(nc_files)} temporary files") - - for file in nc_files: - try: - os.remove(file) - logging.debug(f"Removed: {file}") - except Exception as e: - logging.warning(f"Failed to remove {file}: {e}") - - -def download_eocene_data() -> None: - """Handle special case for Eocene data download.""" - logging.info("Downloading Eocene data via direct wget") - wget_command = ( - 'wget -e robots=off --mirror --no-parent -r --accept "tas_*mean.nc" ' - 'https://dap.ceda.ac.uk/badc/cmip6/data/CMIP6Plus/DeepMIP/deepmip-eocene-p1/' - ) - - exit_code = os.system(wget_command) - if exit_code == 0: - logging.info("Eocene data download completed successfully") - else: - logging.error(f"Eocene data download failed with exit code: {exit_code}") - - -def process_paleo_period(data_cache_dir: Path, paleo_period: str, skip_download: bool = False) -> None: - """Process data for a specific paleoclimate period.""" - logging.info(f"Processing paleoclimate period: {paleo_period}") - - # Special handling for Eocene - if paleo_period == 'eocene': - download_eocene_data() - return - - # Find model folders - model_folders = find_model_folders(data_cache_dir, paleo_period) - - if not model_folders: - logging.error(f"No model folders found for period '{paleo_period}'") - return - - # Process each model - for wget_file in model_folders: - model_dir = "/".join(wget_file.split("/")[:-1]) + "/" - model_name = Path(wget_file).parent.name - logging.info(f"Processing model: {model_name}") - - # Download data unless skipped - if not skip_download: - if not download_data(wget_file): - logging.error(f"Skipping model {model_name} due to download failure") - continue - - # Load and process data - ds = load_netcdf_files(model_dir) - if ds is None: - logging.error(f"Skipping model {model_name} due to data loading failure") - continue - - try: - process_temperature_data(ds, model_dir, paleo_period) - logging.info(f"Successfully processed model: {model_name}") - except Exception as e: - logging.error(f"Failed to process model {model_name}: {e}") - continue - finally: - if not skip_download: - cleanup_files(model_dir) - - -def main() -> None: - """Main function with argument parsing and execution.""" - parser = argparse.ArgumentParser( - description="Process paleoclimate temperature data from CMIP6 models", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument( - "--paleo-period", - type=str, - choices=["lgm", "midHolocene", "lig127k", "midPliocene-eoi400", "eocene"], - help="Paleoclimate period to process" - ) - - parser.add_argument( - "--data-cache-dir", - type=Path, - required=True, - help="Path to the paleoclimate data cache directory" - ) - - parser.add_argument( - "--skip-download", - action="store_true", - help="Skip data download step (assume data already exists)" - ) - - parser.add_argument( - "--log-level", - choices=["DEBUG", "INFO", "WARNING", "ERROR"], - default="INFO", - help="Logging level" - ) - - parser.add_argument( - "--log-file", - type=str, - help="Path to log file (default: log to stdout only)" - ) - - args = parser.parse_args() - - # Setup logging - setup_logging(args.log_level, args.log_file) - - # Validate data cache directory - if not args.data_cache_dir.exists(): - logging.error(f"Data cache directory does not exist: {args.data_cache_dir}") - sys.exit(1) - - logging.info("Starting paleoclimate data processing") - logging.info(f"Period: {args.paleo_period}") - logging.info(f"Data cache directory: {args.data_cache_dir}") - logging.info(f"Skip download: {args.skip_download}") - - try: - process_paleo_period(args.data_cache_dir, args.paleo_period, args.skip_download) - logging.info("Processing completed successfully") - except Exception as e: - logging.error(f"Processing failed: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/paleo_scrips/prep_paleo_obs.ipynb b/paleo_scrips/prep_paleo_obs.ipynb deleted file mode 100644 index 68028b0..0000000 --- a/paleo_scrips/prep_paleo_obs.ipynb +++ /dev/null @@ -1,444 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 22, - "id": "7de9f289", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import xarray as xr\n", - "import pandas as pd\n", - "\n", - "import os\n", - "import subprocess\n", - "import glob\n", - "\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "id": "e12473da", - "metadata": {}, - "source": [ - "### Collect paleoclimate observations for the following periods:\n", - "- Eocene ([paper](https://www.nature.com/articles/s43247-024-01531-3#Fig2)) ([download](https://data.ceda.ac.uk/badc/ar6_wg1/data/ch_07/ch7_fig19/v20230118))\n", - "- Pliocene ([paper](https://www.nature.com/articles/s43247-024-01531-3#Fig2)) ([download](https://data.ceda.ac.uk/badc/ar6_wg1/data/ch_07/ch7_fig19/v20230118))\n", - "- Last Inter Glacial (127k) ([paper](https://cp.copernicus.org/articles/17/63/2021/#section4&gid=1&pid=1)) (download)\n", - "- Last Glacial Maximum ([paper](https://www.nature.com/articles/s41586-020-2617-x)) ([download](https://github.com/jesstierney/lgmDA))\n", - "- Mid Holocene ([paper](https://www.nature.com/articles/s41597-020-0530-7)) ([download](https://www.ncei.noaa.gov/access/paleo-search/study/29712))" - ] - }, - { - "cell_type": "markdown", - "id": "1fbab751", - "metadata": {}, - "source": [ - "# Pliocene and Eocene\n", - "\n", - "only include those models that carried out simulations in the range ×4–×8 preindustrial levels of CO2, in accordance with CO2 proxy estimates for the EECO3. The exception is CESM2.1slab, which we include for context and which was run at ×3. ([ref](https://www.nature.com/articles/s43247-024-01531-3#Fig2))\n", - "\n", - "Model name Model generation ECS GMST\n", - "- CCSM3h 8x PMIP3/CMIP5 2.05.0 23.31\n", - "- CESM1.2 CAM5 6xCO2 PMIP4/CMIP6 2.0 10#${ver2[i]})) + then + return 1 + fi + if ((10#${ver1[i]} < 10#${ver2[i]})) + then + return 2 + fi + done + return 0 +} + +check_commands() { + #check wget + local MIN_WGET_VERSION=1.10 + vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION + case $? in + 2) #lower + wget -V + echo + echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 + exit 1 + esac +} + +usage() { + echo "Usage: $(basename $0) [flags]" + echo "Flags is one of:" + sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 + echo + echo "This command stores the states of the downloads in .$0.status" +} + +#defaults +debug=0 +clean_work=1 + +#parse flags +while getopts 'F:w:iuUnSpdvqh' OPT; do + case $OPT in + F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) + w) output="$OPTARG";; # : Write embedded files into a file and exit + i) insecure=1;; # : set insecure mode, i.e. don't check server certificate + u) update=1;; # : Issue the search again and see if something has changed. + U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) + n) dry_run=1;; # : Don't download any files, just report. + S) skip_checksum=1;; # : Skip file checksum + p) clean_work=0;; # : preserve data that failed checksum + d) verbose=1;debug=1;; # : display debug information + v) verbose=1;; # : be more verbose + q) quiet=1;; # : be less verbose + h) usage && exit 0;; # : displays this help + \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; + \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; + esac +done +shift $(($OPTIND - 1)) + +#setup input as desired by the user +if [[ "$input_file" ]]; then + if [[ "$input_file" == '-' ]]; then + download_files="$(cat)" #read from STDIN + exec 0$output + exit +fi + +#assure we have everything we need +check_commands + +if ((update)); then + echo "Checking the server for changes..." + post_data=$(IFS="&" ; echo "${url_params[*]}") + new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" + compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" + if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then + echo "No changes detected." + else + echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" + counter=0 + while [[ -f $0.old.$counter ]]; do ((counter++)); done + mv $0 $0.old.$counter + echo "$new_wget" > $0 + fi + exit 0 +fi + +check_chksum() { + local file="$1" + local chk_type=$2 + local chk_value=$3 + local local_chksum=Unknown + + case $chk_type in + md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; + sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; + *) echo "Can't verify checksum." && return 0;; + esac + + #verify + ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 + echo $local_chksum +} + +#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) +md5sum_() { + hash -r + if type md5sum >& /dev/null; then + echo $(md5sum $@) + else + echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') + fi +} + +#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) +sha256sum_() { + hash -r + if type sha256sum >& /dev/null; then + echo $(sha256sum $@) + elif type shasum >& /dev/null; then + echo $(shasum -a 256 $@) + else + echo $(sha2 -q -256 $@) + fi +} + +get_mod_time_() { + if ((MACOSX)); then + #on a mac modtime is stat -f %m + echo "$(stat -f %m $@)" + else + #on linux (cygwin) modtime is stat -c %Y + echo "$(stat -c %Y $@)" + fi + return 0; +} + +remove_from_cache() { + local entry="$1" + local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" + echo "$tmp_file" > "$CACHE_FILE" + unset cached +} + +download() { + wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" + + while read line + do + # read csv here document into proper variables + eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) + + # Route into per-model subdirectory: pr_Amon_{MODEL}_lgm_... → raw/models/{MODEL}/ + model=$(echo "$file" | cut -d_ -f3) + file="${TARGET_DIR}/${model}/${file}" + + #Process the file + echo -n "$file ..." + + #get the cached entry if any. + cached="$(grep -e "^$file" "$CACHE_FILE")" + + #if we have the cache entry but no file, clean it. + if [[ ! -f $file && "$cached" ]]; then + #the file was removed, clean the cache + remove_from_cache "$file" + unset cached + fi + + #check it wasn't modified + if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then + if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then + echo "Already downloaded and verified" + continue + elif ((update_files)); then + #user want's to overwrite newer files + rm $file + remove_from_cache "$file" + unset cached + else + #file on server is different from what we have. + echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" + continue + fi + fi + unset chksum_err_value chksum_err_count + + while : ; do + # (if we had the file size, we could check before trying to complete) + echo "Downloading" + [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" + if ((dry_run)); then + #all important info was already displayed, if in dry_run mode just abort + #No status will be stored + break + else + $wget -O "$file" $url || { failed=1; break; } + fi + + #check if file is there + if [[ -f $file ]]; then + ((debug)) && echo file found + if ((skip_checksum)); then + echo "Skipping check of file checksum" + break + fi + if [[ ! "$chksum" ]]; then + echo "Checksum not provided, can't verify file integrity" + break + fi + result_chksum=$(check_chksum "$file" $chksum_type $chksum) + if [[ "$result_chksum" != "$chksum" ]]; then + echo " $chksum_type failed!" + if ((clean_work)); then + if !((chksum_err_count)); then + chksum_err_value=$result_chksum + chksum_err_count=2 + elif ((checksum_err_count--)); then + if [[ "$result_chksum" != "$chksum_err_value" ]]; then + #this is a real transmission problem + chksum_err_value=$result_chksum + chksum_err_count=2 + fi + else + #ok if here we keep getting the same "different" checksum + echo "The file returns always a different checksum!" + echo "Contact the data owner to verify what is happening." + echo + sleep 1 + break + fi + + rm $file + #try again + echo -n " re-trying..." + continue + else + echo " don't use -p or remove manually." + fi + else + echo " $chksum_type ok. done!" + echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE + fi + fi + #done! + break + done + + if ((failed)); then + echo "download failed" + unset failed + fi + + done <<<"$download_files" + +} + +dedup_cache_() { + local file=${1:-${CACHE_FILE}} + ((debug)) && echo "dedup'ing cache ${file} ..." + local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) + ((DEBUG)) && echo "$tmp" + echo "$tmp" > $file + ((debug)) && echo "(cache dedup'ed)" +} + +#do we have old results? Create the file if not +[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE + +# +# MAIN +# + +echo "Running $(basename $0) version: $version" +((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" +echo "Use $(basename $0) -h for help."$'\n' + +cat <<'EOF-MESSAGE' +Script created for 50 file(s) +(The count won't match if you manually edit this file!) + +EOF-MESSAGE +sleep 1 + +check_os + +download + +dedup_cache_ + +echo "done" diff --git a/paleo_scripts/download_model_data/lgm_tas.sh b/paleo_scripts/download_model_data/lgm_tas.sh new file mode 100755 index 0000000..6d4b901 --- /dev/null +++ b/paleo_scripts/download_model_data/lgm_tas.sh @@ -0,0 +1,440 @@ +#!/bin/bash +############################################################################## +# ESGF wget download script +# +# Template version: 0.4 +# Generated by metagrid.esgf-west.org - 2026/05/12 16:49:12 +# Search URL: http://metagrid.esgf-west.org/proxy/wget +# Request method: POST +# +############################################################################### +# first be sure it's bash... anything out of bash or sh will break +# and the test will assure we are not using sh instead of bash +if [ $BASH ] && [ `basename $BASH` != bash ]; then + echo "######## This is a bash script! ##############" + echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." + echo "Trying to recover automatically..." + sleep 1 + /bin/bash $0 $@ + exit $? +fi + +version=0.4 +CACHE_FILE=.$(basename $0).status +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TARGET_DIR="${SCRIPT_DIR}/../paleo_data_cache/raw/models" +search_url='http://metagrid.esgf-west.org/proxy/wget' +request_method='POST' +url_params=( + '['CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.lgm.r1i1p1f1.Amon.tas.gn.v20200212|esgf-node.ornl.gov', 'CMIP6.PMIP.INM.INM-CM4-8.lgm.r1i1p1f1.Amon.tas.gr1.v20190802|eagle.alcf.anl.gov', 'CMIP6.PMIP.MIROC.MIROC-ES2L.lgm.r1i1p1f2.Amon.tas.gn.v20191002|eagle.alcf.anl.gov', 'CMIP6.PMIP.MPI-M.MPI-ESM1-2-LR.lgm.r1i1p1f1.Amon.tas.gn.v20190710|esgf-node.ornl.gov', 'CMIP6.PMIP.NCAR.CESM2-FV2.lgm.r1i2p2f1.Amon.tas.gn.v20220915|esgf-node.ornl.gov', 'CMIP6.PMIP.NCAR.CESM2-WACCM-FV2.lgm.r1i2p2f1.Amon.tas.gn.v20220915|esgf-node.ornl.gov']' +) + +#These are the embedded files to be downloaded +download_files="$(cat < 10#${ver2[i]})) + then + return 1 + fi + if ((10#${ver1[i]} < 10#${ver2[i]})) + then + return 2 + fi + done + return 0 +} + +check_commands() { + #check wget + local MIN_WGET_VERSION=1.10 + vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION + case $? in + 2) #lower + wget -V + echo + echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 + exit 1 + esac +} + +usage() { + echo "Usage: $(basename $0) [flags]" + echo "Flags is one of:" + sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 + echo + echo "This command stores the states of the downloads in .$0.status" +} + +#defaults +debug=0 +clean_work=1 + +#parse flags +while getopts 'F:w:iuUnSpdvqh' OPT; do + case $OPT in + F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) + w) output="$OPTARG";; # : Write embedded files into a file and exit + i) insecure=1;; # : set insecure mode, i.e. don't check server certificate + u) update=1;; # : Issue the search again and see if something has changed. + U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) + n) dry_run=1;; # : Don't download any files, just report. + S) skip_checksum=1;; # : Skip file checksum + p) clean_work=0;; # : preserve data that failed checksum + d) verbose=1;debug=1;; # : display debug information + v) verbose=1;; # : be more verbose + q) quiet=1;; # : be less verbose + h) usage && exit 0;; # : displays this help + \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; + \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; + esac +done +shift $(($OPTIND - 1)) + +#setup input as desired by the user +if [[ "$input_file" ]]; then + if [[ "$input_file" == '-' ]]; then + download_files="$(cat)" #read from STDIN + exec 0$output + exit +fi + +#assure we have everything we need +check_commands + +if ((update)); then + echo "Checking the server for changes..." + post_data=$(IFS="&" ; echo "${url_params[*]}") + new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" + compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" + if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then + echo "No changes detected." + else + echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" + counter=0 + while [[ -f $0.old.$counter ]]; do ((counter++)); done + mv $0 $0.old.$counter + echo "$new_wget" > $0 + fi + exit 0 +fi + +check_chksum() { + local file="$1" + local chk_type=$2 + local chk_value=$3 + local local_chksum=Unknown + + case $chk_type in + md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; + sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; + *) echo "Can't verify checksum." && return 0;; + esac + + #verify + ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 + echo $local_chksum +} + +#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) +md5sum_() { + hash -r + if type md5sum >& /dev/null; then + echo $(md5sum $@) + else + echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') + fi +} + +#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) +sha256sum_() { + hash -r + if type sha256sum >& /dev/null; then + echo $(sha256sum $@) + elif type shasum >& /dev/null; then + echo $(shasum -a 256 $@) + else + echo $(sha2 -q -256 $@) + fi +} + +get_mod_time_() { + if ((MACOSX)); then + #on a mac modtime is stat -f %m + echo "$(stat -f %m $@)" + else + #on linux (cygwin) modtime is stat -c %Y + echo "$(stat -c %Y $@)" + fi + return 0; +} + +remove_from_cache() { + local entry="$1" + local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" + echo "$tmp_file" > "$CACHE_FILE" + unset cached +} + +download() { + wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" + + while read line + do + # read csv here document into proper variables + eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) + + # Route into per-model subdirectory: tas_Amon_{MODEL}_lgm_... → raw/models/{MODEL}/ + model=$(echo "$file" | cut -d_ -f3) + file="${TARGET_DIR}/${model}/${file}" + + #Process the file + echo -n "$file ..." + + #get the cached entry if any. + cached="$(grep -e "^$file" "$CACHE_FILE")" + + #if we have the cache entry but no file, clean it. + if [[ ! -f $file && "$cached" ]]; then + #the file was removed, clean the cache + remove_from_cache "$file" + unset cached + fi + + #check it wasn't modified + if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then + if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then + echo "Already downloaded and verified" + continue + elif ((update_files)); then + #user want's to overwrite newer files + rm $file + remove_from_cache "$file" + unset cached + else + #file on server is different from what we have. + echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" + continue + fi + fi + unset chksum_err_value chksum_err_count + + while : ; do + # (if we had the file size, we could check before trying to complete) + echo "Downloading" + [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" + if ((dry_run)); then + #all important info was already displayed, if in dry_run mode just abort + #No status will be stored + break + else + $wget -O "$file" $url || { failed=1; break; } + fi + + #check if file is there + if [[ -f $file ]]; then + ((debug)) && echo file found + if ((skip_checksum)); then + echo "Skipping check of file checksum" + break + fi + if [[ ! "$chksum" ]]; then + echo "Checksum not provided, can't verify file integrity" + break + fi + result_chksum=$(check_chksum "$file" $chksum_type $chksum) + if [[ "$result_chksum" != "$chksum" ]]; then + echo " $chksum_type failed!" + if ((clean_work)); then + if !((chksum_err_count)); then + chksum_err_value=$result_chksum + chksum_err_count=2 + elif ((checksum_err_count--)); then + if [[ "$result_chksum" != "$chksum_err_value" ]]; then + #this is a real transmission problem + chksum_err_value=$result_chksum + chksum_err_count=2 + fi + else + #ok if here we keep getting the same "different" checksum + echo "The file returns always a different checksum!" + echo "Contact the data owner to verify what is happening." + echo + sleep 1 + break + fi + + rm $file + #try again + echo -n " re-trying..." + continue + else + echo " don't use -p or remove manually." + fi + else + echo " $chksum_type ok. done!" + echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE + fi + fi + #done! + break + done + + if ((failed)); then + echo "download failed" + unset failed + fi + + done <<<"$download_files" + +} + +dedup_cache_() { + local file=${1:-${CACHE_FILE}} + ((debug)) && echo "dedup'ing cache ${file} ..." + local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) + ((DEBUG)) && echo "$tmp" + echo "$tmp" > $file + ((debug)) && echo "(cache dedup'ed)" +} + +#do we have old results? Create the file if not +[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE + +# +# MAIN +# + +echo "Running $(basename $0) version: $version" +((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" +echo "Use $(basename $0) -h for help."$'\n' + +cat <<'EOF-MESSAGE' +Script created for 60 file(s) +(The count won't match if you manually edit this file!) + +EOF-MESSAGE +sleep 1 + +check_os + +download + +dedup_cache_ + +echo "done" diff --git a/paleo_scripts/download_model_data/lig127k_pr.sh b/paleo_scripts/download_model_data/lig127k_pr.sh new file mode 100755 index 0000000..0b60119 --- /dev/null +++ b/paleo_scripts/download_model_data/lig127k_pr.sh @@ -0,0 +1,718 @@ +#!/bin/bash +############################################################################## +# ESGF wget download script +# +# Template version: 0.4 +# Generated by metagrid.esgf-west.org - 2026/05/12 17:40:08 +# Search URL: http://metagrid.esgf-west.org/proxy/wget +# Request method: POST +# +############################################################################### +# first be sure it's bash... anything out of bash or sh will break +# and the test will assure we are not using sh instead of bash +if [ $BASH ] && [ `basename $BASH` != bash ]; then + echo "######## This is a bash script! ##############" + echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." + echo "Trying to recover automatically..." + sleep 1 + /bin/bash $0 $@ + exit $? +fi + +version=0.4 +CACHE_FILE=.$(basename $0).status +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TARGET_DIR="${SCRIPT_DIR}/../paleo_data_cache/raw/models" +search_url='http://metagrid.esgf-west.org/proxy/wget' +request_method='POST' +url_params=( + '['CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.lig127k.r1i1p1f1.Amon.pr.gn.v20200212|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-f3-L.lig127k.r1i1p1f1.Amon.pr.gr.v20191025|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-g3.lig127k.r1i1p1f1.Amon.pr.gn.v20191030|esgf-node.ornl.gov', 'CMIP6.PMIP.CNRM-CERFACS.CNRM-CM6-1.lig127k.r1i1p1f2.Amon.pr.gr.v20200212|esgf-node.ornl.gov', 'CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.lig127k.r1i1p1f1.Amon.pr.gn.v20191206|esgf-node.ornl.gov', 'CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.lig127k.r1i1p1f1.Amon.pr.gr.v20200409|esgf-node.ornl.gov', 'CMIP6.PMIP.INM.INM-CM4-8.lig127k.r1i1p1f1.Amon.pr.gr1.v20190802|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.lig127k.r1i1p1f1.Amon.pr.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.MIROC.MIROC-ES2L.lig127k.r1i1p1f2.Amon.pr.gn.v20191118|esgf-node.ornl.gov', 'CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.lig127k.r1i1p1f1.Amon.pr.gn.v20190916|esgf-node.ornl.gov', 'CMIP6.PMIP.NCAR.CESM2.lig127k.r1i1p1f1.Amon.pr.gn.v20190923|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM1-F.lig127k.r1i1p1f1.Amon.pr.gn.v20190920|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM2-LM.lig127k.r1i1p1f1.Amon.pr.gn.v20191108|esgf-node.ornl.gov', 'CMIP6.PMIP.NERC.HadGEM3-GC31-LL.lig127k.r1i1p1f1.Amon.pr.gn.v20210114|esgf-node.ornl.gov', 'CMIP6.PMIP.NUIST.NESM3.lig127k.r1i1p1f1.Amon.pr.gn.v20190909|esgf-node.ornl.gov']' +) + +#These are the embedded files to be downloaded +download_files="$(cat < 10#${ver2[i]})) + then + return 1 + fi + if ((10#${ver1[i]} < 10#${ver2[i]})) + then + return 2 + fi + done + return 0 +} + +check_commands() { + #check wget + local MIN_WGET_VERSION=1.10 + vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION + case $? in + 2) #lower + wget -V + echo + echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 + exit 1 + esac +} + +usage() { + echo "Usage: $(basename $0) [flags]" + echo "Flags is one of:" + sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 + echo + echo "This command stores the states of the downloads in .$0.status" +} + +#defaults +debug=0 +clean_work=1 + +#parse flags +while getopts 'F:w:iuUnSpdvqh' OPT; do + case $OPT in + F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) + w) output="$OPTARG";; # : Write embedded files into a file and exit + i) insecure=1;; # : set insecure mode, i.e. don't check server certificate + u) update=1;; # : Issue the search again and see if something has changed. + U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) + n) dry_run=1;; # : Don't download any files, just report. + S) skip_checksum=1;; # : Skip file checksum + p) clean_work=0;; # : preserve data that failed checksum + d) verbose=1;debug=1;; # : display debug information + v) verbose=1;; # : be more verbose + q) quiet=1;; # : be less verbose + h) usage && exit 0;; # : displays this help + \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; + \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; + esac +done +shift $(($OPTIND - 1)) + +#setup input as desired by the user +if [[ "$input_file" ]]; then + if [[ "$input_file" == '-' ]]; then + download_files="$(cat)" #read from STDIN + exec 0$output + exit +fi + +#assure we have everything we need +check_commands + +if ((update)); then + echo "Checking the server for changes..." + post_data=$(IFS="&" ; echo "${url_params[*]}") + new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" + compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" + if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then + echo "No changes detected." + else + echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" + counter=0 + while [[ -f $0.old.$counter ]]; do ((counter++)); done + mv $0 $0.old.$counter + echo "$new_wget" > $0 + fi + exit 0 +fi + +check_chksum() { + local file="$1" + local chk_type=$2 + local chk_value=$3 + local local_chksum=Unknown + + case $chk_type in + md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; + sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; + *) echo "Can't verify checksum." && return 0;; + esac + + #verify + ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 + echo $local_chksum +} + +#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) +md5sum_() { + hash -r + if type md5sum >& /dev/null; then + echo $(md5sum $@) + else + echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') + fi +} + +#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) +sha256sum_() { + hash -r + if type sha256sum >& /dev/null; then + echo $(sha256sum $@) + elif type shasum >& /dev/null; then + echo $(shasum -a 256 $@) + else + echo $(sha2 -q -256 $@) + fi +} + +get_mod_time_() { + if ((MACOSX)); then + #on a mac modtime is stat -f %m + echo "$(stat -f %m $@)" + else + #on linux (cygwin) modtime is stat -c %Y + echo "$(stat -c %Y $@)" + fi + return 0; +} + +remove_from_cache() { + local entry="$1" + local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" + echo "$tmp_file" > "$CACHE_FILE" + unset cached +} + +download() { + wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" + + while read line + do + # read csv here document into proper variables + eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) + + # Route into per-model subdirectory: pr_Amon_{MODEL}_lig127k_... → raw/models/{MODEL}/ + model=$(echo "$file" | cut -d_ -f3) + file="${TARGET_DIR}/${model}/${file}" + + #Process the file + echo -n "$file ..." + + #get the cached entry if any. + cached="$(grep -e "^$file" "$CACHE_FILE")" + + #if we have the cache entry but no file, clean it. + if [[ ! -f $file && "$cached" ]]; then + #the file was removed, clean the cache + remove_from_cache "$file" + unset cached + fi + + #check it wasn't modified + if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then + if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then + echo "Already downloaded and verified" + continue + elif ((update_files)); then + #user want's to overwrite newer files + rm $file + remove_from_cache "$file" + unset cached + else + #file on server is different from what we have. + echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" + continue + fi + fi + unset chksum_err_value chksum_err_count + + while : ; do + # (if we had the file size, we could check before trying to complete) + echo "Downloading" + [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" + if ((dry_run)); then + #all important info was already displayed, if in dry_run mode just abort + #No status will be stored + break + else + $wget -O "$file" $url || { failed=1; break; } + fi + + #check if file is there + if [[ -f $file ]]; then + ((debug)) && echo file found + if ((skip_checksum)); then + echo "Skipping check of file checksum" + break + fi + if [[ ! "$chksum" ]]; then + echo "Checksum not provided, can't verify file integrity" + break + fi + result_chksum=$(check_chksum "$file" $chksum_type $chksum) + if [[ "$result_chksum" != "$chksum" ]]; then + echo " $chksum_type failed!" + if ((clean_work)); then + if !((chksum_err_count)); then + chksum_err_value=$result_chksum + chksum_err_count=2 + elif ((checksum_err_count--)); then + if [[ "$result_chksum" != "$chksum_err_value" ]]; then + #this is a real transmission problem + chksum_err_value=$result_chksum + chksum_err_count=2 + fi + else + #ok if here we keep getting the same "different" checksum + echo "The file returns always a different checksum!" + echo "Contact the data owner to verify what is happening." + echo + sleep 1 + break + fi + + rm $file + #try again + echo -n " re-trying..." + continue + else + echo " don't use -p or remove manually." + fi + else + echo " $chksum_type ok. done!" + echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE + fi + fi + #done! + break + done + + if ((failed)); then + echo "download failed" + unset failed + fi + + done <<<"$download_files" + +} + +dedup_cache_() { + local file=${1:-${CACHE_FILE}} + ((debug)) && echo "dedup'ing cache ${file} ..." + local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) + ((DEBUG)) && echo "$tmp" + echo "$tmp" > $file + ((debug)) && echo "(cache dedup'ed)" +} + +#do we have old results? Create the file if not +[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE + +# +# MAIN +# + +echo "Running $(basename $0) version: $version" +((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" +echo "Use $(basename $0) -h for help."$'\n' + +cat <<'EOF-MESSAGE' +Script created for 338 file(s) +(The count won't match if you manually edit this file!) + +EOF-MESSAGE +sleep 1 + +check_os + +download + +dedup_cache_ + +echo "done" diff --git a/paleo_scrips/paleo_data_cache/EC-Earth3-LR/lig127k_wget_script.sh b/paleo_scripts/download_model_data/lig127k_tas.sh similarity index 65% rename from paleo_scrips/paleo_data_cache/EC-Earth3-LR/lig127k_wget_script.sh rename to paleo_scripts/download_model_data/lig127k_tas.sh index 0b11aa6..19c0be2 100755 --- a/paleo_scrips/paleo_data_cache/EC-Earth3-LR/lig127k_wget_script.sh +++ b/paleo_scripts/download_model_data/lig127k_tas.sh @@ -3,15 +3,15 @@ # ESGF wget download script # # Template version: 0.4 -# Generated by nimbus.llnl.gov - 2025/08/05 16:37:21 -# Search URL: http://nimbus.llnl.gov/wget +# Generated by metagrid.esgf-west.org - 2026/05/12 16:56:40 +# Search URL: http://metagrid.esgf-west.org/proxy/wget # Request method: POST -# +# ############################################################################### # first be sure it's bash... anything out of bash or sh will break # and the test will assure we are not using sh instead of bash if [ $BASH ] && [ `basename $BASH` != bash ]; then - echo "######## This is a bash script! ##############" + echo "######## This is a bash script! ##############" echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." echo "Trying to recover automatically..." sleep 1 @@ -21,14 +21,20 @@ fi version=0.4 CACHE_FILE=.$(basename $0).status -search_url='http://nimbus.llnl.gov/wget' +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TARGET_DIR="${SCRIPT_DIR}/../paleo_data_cache/raw/models" +search_url='http://metagrid.esgf-west.org/proxy/wget' request_method='POST' url_params=( - 'CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.lig127k.r1i1p1f1.Amon.tas.gr.v20200409|eagle.alcf.anl.gov' + '['CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.lig127k.r1i1p1f1.Amon.tas.gn.v20200212|eagle.alcf.anl.gov', 'CMIP6.PMIP.CAS.FGOALS-f3-L.lig127k.r1i1p1f1.Amon.tas.gr.v20191025|eagle.alcf.anl.gov', 'CMIP6.PMIP.CAS.FGOALS-g3.lig127k.r1i1p1f1.Amon.tas.gn.v20191030|esgf-node.ornl.gov', 'CMIP6.PMIP.CNRM-CERFACS.CNRM-CM6-1.lig127k.r1i1p1f2.Amon.tas.gr.v20200212|esgf-node.ornl.gov', 'CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.lig127k.r1i1p1f1.Amon.tas.gn.v20191206|esgf-node.ornl.gov', 'CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.lig127k.r1i1p1f1.Amon.tas.gr.v20200409|eagle.alcf.anl.gov', 'CMIP6.PMIP.INM.INM-CM4-8.lig127k.r1i1p1f1.Amon.tas.gr1.v20190802|eagle.alcf.anl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.lig127k.r1i1p1f1.Amon.tas.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.MIROC.MIROC-ES2L.lig127k.r1i1p1f2.Amon.tas.gn.v20191118|eagle.alcf.anl.gov', 'CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.lig127k.r1i1p1f1.Amon.tas.gn.v20190916|eagle.alcf.anl.gov', 'CMIP6.PMIP.NCAR.CESM2.lig127k.r1i1p1f1.Amon.tas.gn.v20190923|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM1-F.lig127k.r1i1p1f1.Amon.tas.gn.v20190920|eagle.alcf.anl.gov', 'CMIP6.PMIP.NCC.NorESM2-LM.lig127k.r1i1p1f1.Amon.tas.gn.v20191108|eagle.alcf.anl.gov', 'CMIP6.PMIP.NERC.HadGEM3-GC31-LL.lig127k.r1i1p1f1.Amon.tas.gn.v20210114|eagle.alcf.anl.gov', 'CMIP6.PMIP.NUIST.NESM3.lig127k.r1i1p1f1.Amon.tas.gn.v20190909|esgf-node.ornl.gov']' ) #These are the embedded files to be downloaded download_files="$(cat < $0 fi - exit 0 + exit 0 fi check_chksum() { @@ -441,25 +571,29 @@ remove_from_cache() { download() { wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" - + while read line do # read csv here document into proper variables eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) + # Route into per-model subdirectory: tas_Amon_{MODEL}_lig127k_... → raw/models/{MODEL}/ + model=$(echo "$file" | cut -d_ -f3) + file="${TARGET_DIR}/${model}/${file}" + #Process the file echo -n "$file ..." #get the cached entry if any. cached="$(grep -e "^$file" "$CACHE_FILE")" - + #if we have the cache entry but no file, clean it. if [[ ! -f $file && "$cached" ]]; then #the file was removed, clean the cache remove_from_cache "$file" unset cached fi - + #check it wasn't modified if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then @@ -471,7 +605,7 @@ download() { remove_from_cache "$file" unset cached else - #file on server is different from what we have. + #file on server is different from what we have. echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" continue fi @@ -487,7 +621,7 @@ download() { #No status will be stored break else - $wget -O "$file" $url || { failed=1; break; } + $wget -O "$file" $url || { failed=1; break; } fi #check if file is there @@ -522,7 +656,7 @@ download() { sleep 1 break fi - + rm $file #try again echo -n " re-trying..." @@ -538,12 +672,12 @@ download() { #done! break done - + if ((failed)); then echo "download failed" unset failed fi - + done <<<"$download_files" } @@ -569,7 +703,7 @@ echo "Running $(basename $0) version: $version" echo "Use $(basename $0) -h for help."$'\n' cat <<'EOF-MESSAGE' -Script created for 210 file(s) +Script created for 338 file(s) (The count won't match if you manually edit this file!) EOF-MESSAGE diff --git a/paleo_scripts/download_model_data/midholocene_pr.sh b/paleo_scripts/download_model_data/midholocene_pr.sh new file mode 100755 index 0000000..2d08b3c --- /dev/null +++ b/paleo_scripts/download_model_data/midholocene_pr.sh @@ -0,0 +1,789 @@ +#!/bin/bash +############################################################################## +# ESGF wget download script +# +# Template version: 0.4 +# Generated by metagrid.esgf-west.org - 2026/05/12 17:46:29 +# Search URL: http://metagrid.esgf-west.org/proxy/wget +# Request method: POST +# +############################################################################### +# first be sure it's bash... anything out of bash or sh will break +# and the test will assure we are not using sh instead of bash +if [ $BASH ] && [ `basename $BASH` != bash ]; then + echo "######## This is a bash script! ##############" + echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." + echo "Trying to recover automatically..." + sleep 1 + /bin/bash $0 $@ + exit $? +fi + +version=0.4 +CACHE_FILE=.$(basename $0).status +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TARGET_DIR="${SCRIPT_DIR}/../paleo_data_cache/raw/models" +search_url='http://metagrid.esgf-west.org/proxy/wget' +request_method='POST' +url_params=( + '['CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.midHolocene.r1i1p1f1.Amon.pr.gn.v20200212|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-f3-L.midHolocene.r1i1p1f1.Amon.pr.gr.v20191025|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-g3.midHolocene.r1i1p1f1.Amon.pr.gn.v20191024|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-g3.midHolocene.r2i1p1f1.Amon.pr.gn.v20191024|esgf-node.ornl.gov', 'CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.midHolocene.r1i1p1f1.Amon.pr.gn.v20210422|esgf-node.ornl.gov', 'CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.midHolocene.r1i1p1f1.Amon.pr.gr.v20200409|esgf-node.ornl.gov', 'CMIP6.PMIP.INM.INM-CM4-8.midHolocene.r1i1p1f1.Amon.pr.gr1.v20190802|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f1.Amon.pr.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f2.Amon.pr.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f3.Amon.pr.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f4.Amon.pr.gr.v20191118|esgf-node.ornl.gov', 'CMIP6.PMIP.MIROC.MIROC-ES2L.midHolocene.r1i1p1f2.Amon.pr.gn.v20191002|esgf-node.ornl.gov', 'CMIP6.PMIP.MPI-M.MPI-ESM1-2-LR.midHolocene.r1i1p1f1.Amon.pr.gn.v20190710|esgf-node.ornl.gov', 'CMIP6.PMIP.MRI.MRI-ESM2-0.midHolocene.r1i1p1f1.Amon.pr.gn.v20190919|esgf-node.ornl.gov', 'CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.midHolocene.r1i1p1f1.Amon.pr.gn.v20190916|esgf-node.ornl.gov', 'CMIP6.PMIP.NCAR.CESM2.midHolocene.r1i1p1f1.Amon.pr.gn.v20190923|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM1-F.midHolocene.r1i1p1f1.Amon.pr.gn.v20190920|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM2-LM.midHolocene.r1i1p1f1.Amon.pr.gn.v20191108|esgf-node.ornl.gov', 'CMIP6.PMIP.NERC.HadGEM3-GC31-LL.midHolocene.r1i1p1f1.Amon.pr.gn.v20210111|esgf-node.ornl.gov', 'CMIP6.PMIP.NUIST.NESM3.midHolocene.r1i1p1f1.Amon.pr.gn.v20190813|esgf-node.ornl.gov']' +) + +#These are the embedded files to be downloaded +download_files="$(cat < 10#${ver2[i]})) + then + return 1 + fi + if ((10#${ver1[i]} < 10#${ver2[i]})) + then + return 2 + fi + done + return 0 +} + +check_commands() { + #check wget + local MIN_WGET_VERSION=1.10 + vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION + case $? in + 2) #lower + wget -V + echo + echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 + exit 1 + esac +} + +usage() { + echo "Usage: $(basename $0) [flags]" + echo "Flags is one of:" + sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 + echo + echo "This command stores the states of the downloads in .$0.status" +} + +#defaults +debug=0 +clean_work=1 + +#parse flags +while getopts 'F:w:iuUnSpdvqh' OPT; do + case $OPT in + F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) + w) output="$OPTARG";; # : Write embedded files into a file and exit + i) insecure=1;; # : set insecure mode, i.e. don't check server certificate + u) update=1;; # : Issue the search again and see if something has changed. + U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) + n) dry_run=1;; # : Don't download any files, just report. + S) skip_checksum=1;; # : Skip file checksum + p) clean_work=0;; # : preserve data that failed checksum + d) verbose=1;debug=1;; # : display debug information + v) verbose=1;; # : be more verbose + q) quiet=1;; # : be less verbose + h) usage && exit 0;; # : displays this help + \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; + \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; + esac +done +shift $(($OPTIND - 1)) + +#setup input as desired by the user +if [[ "$input_file" ]]; then + if [[ "$input_file" == '-' ]]; then + download_files="$(cat)" #read from STDIN + exec 0$output + exit +fi + +#assure we have everything we need +check_commands + +if ((update)); then + echo "Checking the server for changes..." + post_data=$(IFS="&" ; echo "${url_params[*]}") + new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" + compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" + if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then + echo "No changes detected." + else + echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" + counter=0 + while [[ -f $0.old.$counter ]]; do ((counter++)); done + mv $0 $0.old.$counter + echo "$new_wget" > $0 + fi + exit 0 +fi + +check_chksum() { + local file="$1" + local chk_type=$2 + local chk_value=$3 + local local_chksum=Unknown + + case $chk_type in + md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; + sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; + *) echo "Can't verify checksum." && return 0;; + esac + + #verify + ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 + echo $local_chksum +} + +#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) +md5sum_() { + hash -r + if type md5sum >& /dev/null; then + echo $(md5sum $@) + else + echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') + fi +} + +#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) +sha256sum_() { + hash -r + if type sha256sum >& /dev/null; then + echo $(sha256sum $@) + elif type shasum >& /dev/null; then + echo $(shasum -a 256 $@) + else + echo $(sha2 -q -256 $@) + fi +} + +get_mod_time_() { + if ((MACOSX)); then + #on a mac modtime is stat -f %m + echo "$(stat -f %m $@)" + else + #on linux (cygwin) modtime is stat -c %Y + echo "$(stat -c %Y $@)" + fi + return 0; +} + +remove_from_cache() { + local entry="$1" + local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" + echo "$tmp_file" > "$CACHE_FILE" + unset cached +} + +download() { + wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" + + while read line + do + # read csv here document into proper variables + eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) + + # Route into per-model subdirectory: pr_Amon_{MODEL}_midHolocene_... → raw/models/{MODEL}/ + model=$(echo "$file" | cut -d_ -f3) + file="${TARGET_DIR}/${model}/${file}" + + #Process the file + echo -n "$file ..." + + #get the cached entry if any. + cached="$(grep -e "^$file" "$CACHE_FILE")" + + #if we have the cache entry but no file, clean it. + if [[ ! -f $file && "$cached" ]]; then + #the file was removed, clean the cache + remove_from_cache "$file" + unset cached + fi + + #check it wasn't modified + if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then + if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then + echo "Already downloaded and verified" + continue + elif ((update_files)); then + #user want's to overwrite newer files + rm $file + remove_from_cache "$file" + unset cached + else + #file on server is different from what we have. + echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" + continue + fi + fi + unset chksum_err_value chksum_err_count + + while : ; do + # (if we had the file size, we could check before trying to complete) + echo "Downloading" + [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" + if ((dry_run)); then + #all important info was already displayed, if in dry_run mode just abort + #No status will be stored + break + else + $wget -O "$file" $url || { failed=1; break; } + fi + + #check if file is there + if [[ -f $file ]]; then + ((debug)) && echo file found + if ((skip_checksum)); then + echo "Skipping check of file checksum" + break + fi + if [[ ! "$chksum" ]]; then + echo "Checksum not provided, can't verify file integrity" + break + fi + result_chksum=$(check_chksum "$file" $chksum_type $chksum) + if [[ "$result_chksum" != "$chksum" ]]; then + echo " $chksum_type failed!" + if ((clean_work)); then + if !((chksum_err_count)); then + chksum_err_value=$result_chksum + chksum_err_count=2 + elif ((checksum_err_count--)); then + if [[ "$result_chksum" != "$chksum_err_value" ]]; then + #this is a real transmission problem + chksum_err_value=$result_chksum + chksum_err_count=2 + fi + else + #ok if here we keep getting the same "different" checksum + echo "The file returns always a different checksum!" + echo "Contact the data owner to verify what is happening." + echo + sleep 1 + break + fi + + rm $file + #try again + echo -n " re-trying..." + continue + else + echo " don't use -p or remove manually." + fi + else + echo " $chksum_type ok. done!" + echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE + fi + fi + #done! + break + done + + if ((failed)); then + echo "download failed" + unset failed + fi + + done <<<"$download_files" + +} + +dedup_cache_() { + local file=${1:-${CACHE_FILE}} + ((debug)) && echo "dedup'ing cache ${file} ..." + local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) + ((DEBUG)) && echo "$tmp" + echo "$tmp" > $file + ((debug)) && echo "(cache dedup'ed)" +} + +#do we have old results? Create the file if not +[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE + +# +# MAIN +# + +echo "Running $(basename $0) version: $version" +((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" +echo "Use $(basename $0) -h for help."$'\n' + +cat <<'EOF-MESSAGE' +Script created for 409 file(s) +(The count won't match if you manually edit this file!) + +EOF-MESSAGE +sleep 1 + +check_os + +download + +dedup_cache_ + +echo "done" diff --git a/paleo_scripts/download_model_data/midholocene_tas.sh b/paleo_scripts/download_model_data/midholocene_tas.sh new file mode 100755 index 0000000..f30d27f --- /dev/null +++ b/paleo_scripts/download_model_data/midholocene_tas.sh @@ -0,0 +1,789 @@ +#!/bin/bash +############################################################################## +# ESGF wget download script +# +# Template version: 0.4 +# Generated by metagrid.esgf-west.org - 2026/05/12 17:48:55 +# Search URL: http://metagrid.esgf-west.org/proxy/wget +# Request method: POST +# +############################################################################### +# first be sure it's bash... anything out of bash or sh will break +# and the test will assure we are not using sh instead of bash +if [ $BASH ] && [ `basename $BASH` != bash ]; then + echo "######## This is a bash script! ##############" + echo "Change the execution bit 'chmod u+x $0' or start with 'bash $0' instead of sh." + echo "Trying to recover automatically..." + sleep 1 + /bin/bash $0 $@ + exit $? +fi + +version=0.4 +CACHE_FILE=.$(basename $0).status +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TARGET_DIR="${SCRIPT_DIR}/../paleo_data_cache/raw/models" +search_url='http://metagrid.esgf-west.org/proxy/wget' +request_method='POST' +url_params=( + '['CMIP6.PMIP.AWI.AWI-ESM-1-1-LR.midHolocene.r1i1p1f1.Amon.tas.gn.v20200212|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-f3-L.midHolocene.r1i1p1f1.Amon.tas.gr.v20191025|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-g3.midHolocene.r1i1p1f1.Amon.tas.gn.v20191024|esgf-node.ornl.gov', 'CMIP6.PMIP.CAS.FGOALS-g3.midHolocene.r2i1p1f1.Amon.tas.gn.v20191024|esgf-node.ornl.gov', 'CMIP6.PMIP.CSIRO.ACCESS-ESM1-5.midHolocene.r1i1p1f1.Amon.tas.gn.v20210422|esgf-node.ornl.gov', 'CMIP6.PMIP.EC-Earth-Consortium.EC-Earth3-LR.midHolocene.r1i1p1f1.Amon.tas.gr.v20200409|esgf-node.ornl.gov', 'CMIP6.PMIP.INM.INM-CM4-8.midHolocene.r1i1p1f1.Amon.tas.gr1.v20190802|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f1.Amon.tas.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f2.Amon.tas.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f3.Amon.tas.gr.v20180926|esgf-node.ornl.gov', 'CMIP6.PMIP.IPSL.IPSL-CM6A-LR.midHolocene.r1i1p1f4.Amon.tas.gr.v20191118|esgf-node.ornl.gov', 'CMIP6.PMIP.MIROC.MIROC-ES2L.midHolocene.r1i1p1f2.Amon.tas.gn.v20191002|esgf-node.ornl.gov', 'CMIP6.PMIP.MPI-M.MPI-ESM1-2-LR.midHolocene.r1i1p1f1.Amon.tas.gn.v20190710|esgf-node.ornl.gov', 'CMIP6.PMIP.MRI.MRI-ESM2-0.midHolocene.r1i1p1f1.Amon.tas.gn.v20190919|esgf-node.ornl.gov', 'CMIP6.PMIP.NASA-GISS.GISS-E2-1-G.midHolocene.r1i1p1f1.Amon.tas.gn.v20190916|esgf-node.ornl.gov', 'CMIP6.PMIP.NCAR.CESM2.midHolocene.r1i1p1f1.Amon.tas.gn.v20190923|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM1-F.midHolocene.r1i1p1f1.Amon.tas.gn.v20190920|esgf-node.ornl.gov', 'CMIP6.PMIP.NCC.NorESM2-LM.midHolocene.r1i1p1f1.Amon.tas.gn.v20191108|esgf-node.ornl.gov', 'CMIP6.PMIP.NERC.HadGEM3-GC31-LL.midHolocene.r1i1p1f1.Amon.tas.gn.v20210111|esgf-node.ornl.gov', 'CMIP6.PMIP.NUIST.NESM3.midHolocene.r1i1p1f1.Amon.tas.gn.v20190813|esgf-node.ornl.gov']' +) + +#These are the embedded files to be downloaded +download_files="$(cat < 10#${ver2[i]})) + then + return 1 + fi + if ((10#${ver1[i]} < 10#${ver2[i]})) + then + return 2 + fi + done + return 0 +} + +check_commands() { + #check wget + local MIN_WGET_VERSION=1.10 + vercomp $(wget -V | sed -n 's/^.* \([1-9]\.[0-9.]*\) .*$/\1/p') $MIN_WGET_VERSION + case $? in + 2) #lower + wget -V + echo + echo "** ERROR: wget version is too old. Use version $MIN_WGET_VERSION or greater. **" >&2 + exit 1 + esac +} + +usage() { + echo "Usage: $(basename $0) [flags]" + echo "Flags is one of:" + sed -n '/^while getopts/,/^done/ s/^\([^)]*\)[^#]*#\(.*$\)/\1 \2/p' $0 + echo + echo "This command stores the states of the downloads in .$0.status" +} + +#defaults +debug=0 +clean_work=1 + +#parse flags +while getopts 'F:w:iuUnSpdvqh' OPT; do + case $OPT in + F) input_file="$OPTARG";; # : read input from file instead of the embedded one (use - to read from stdin) + w) output="$OPTARG";; # : Write embedded files into a file and exit + i) insecure=1;; # : set insecure mode, i.e. don't check server certificate + u) update=1;; # : Issue the search again and see if something has changed. + U) update_files=1;; # : Update files from server overwriting local ones (detect with -u) + n) dry_run=1;; # : Don't download any files, just report. + S) skip_checksum=1;; # : Skip file checksum + p) clean_work=0;; # : preserve data that failed checksum + d) verbose=1;debug=1;; # : display debug information + v) verbose=1;; # : be more verbose + q) quiet=1;; # : be less verbose + h) usage && exit 0;; # : displays this help + \?) echo "Unknown option '$OPTARG'" >&2 && usage && exit 1;; + \:) echo "Missing parameter for flag '$OPTARG'" >&2 && usage && exit 1;; + esac +done +shift $(($OPTIND - 1)) + +#setup input as desired by the user +if [[ "$input_file" ]]; then + if [[ "$input_file" == '-' ]]; then + download_files="$(cat)" #read from STDIN + exec 0$output + exit +fi + +#assure we have everything we need +check_commands + +if ((update)); then + echo "Checking the server for changes..." + post_data=$(IFS="&" ; echo "${url_params[*]}") + new_wget="$(wget --post-data "$post_data" "$search_url" -qO -)" + compare_cmd="grep -vE '^(# Generated by|# Search URL|search_url=)'" + if diff -q <(eval $compare_cmd<<<"$new_wget") <(eval $compare_cmd $0) >/dev/null; then + echo "No changes detected." + else + echo "Wget was changed. Dowloading. (old renamed to $0.old.#N)" + counter=0 + while [[ -f $0.old.$counter ]]; do ((counter++)); done + mv $0 $0.old.$counter + echo "$new_wget" > $0 + fi + exit 0 +fi + +check_chksum() { + local file="$1" + local chk_type=$2 + local chk_value=$3 + local local_chksum=Unknown + + case $chk_type in + md5) local_chksum=$(md5sum_ $file | cut -f1 -d" ");; + sha256) local_chksum=$(sha256sum_ $file|awk '{print $1}'|cut -d ' ' -f1);; + *) echo "Can't verify checksum." && return 0;; + esac + + #verify + ((debug)) && echo "local:$local_chksum vs remote:$chk_value" >&2 + echo $local_chksum +} + +#Our own md5sum function call that takes into account machines that don't have md5sum but do have md5 (i.e. mac os x) +md5sum_() { + hash -r + if type md5sum >& /dev/null; then + echo $(md5sum $@) + else + echo $(md5 $@ | sed -n 's/MD5[ ]*\(.*\)[^=]*=[ ]*\(.*$\)/\2 \1/p') + fi +} + +#Our own sha256sum function call that takes into account machines that don't have sha256sum but do have sha2 (i.e. mac os x) +sha256sum_() { + hash -r + if type sha256sum >& /dev/null; then + echo $(sha256sum $@) + elif type shasum >& /dev/null; then + echo $(shasum -a 256 $@) + else + echo $(sha2 -q -256 $@) + fi +} + +get_mod_time_() { + if ((MACOSX)); then + #on a mac modtime is stat -f %m + echo "$(stat -f %m $@)" + else + #on linux (cygwin) modtime is stat -c %Y + echo "$(stat -c %Y $@)" + fi + return 0; +} + +remove_from_cache() { + local entry="$1" + local tmp_file="$(grep -ve "^$entry" "$CACHE_FILE")" + echo "$tmp_file" > "$CACHE_FILE" + unset cached +} + +download() { + wget="wget ${insecure:+--no-check-certificate} ${quiet:+-q} ${quiet:--v}" + + while read line + do + # read csv here document into proper variables + eval $(awk -F "' '" '{$0=substr($0,2,length($0)-2); $3=tolower($3); print "file=\""$1"\";url=\""$2"\";chksum_type=\""$3"\";chksum=\""$4"\""}' <(echo $line) ) + + # Route into per-model subdirectory: tas_Amon_{MODEL}_midHolocene_... → raw/models/{MODEL}/ + model=$(echo "$file" | cut -d_ -f3) + file="${TARGET_DIR}/${model}/${file}" + + #Process the file + echo -n "$file ..." + + #get the cached entry if any. + cached="$(grep -e "^$file" "$CACHE_FILE")" + + #if we have the cache entry but no file, clean it. + if [[ ! -f $file && "$cached" ]]; then + #the file was removed, clean the cache + remove_from_cache "$file" + unset cached + fi + + #check it wasn't modified + if [[ -n "$cached" && "$(get_mod_time_ $file)" == $(echo "$cached" | cut -d ' ' -f2) ]]; then + if [[ "$chksum" == "$(echo "$cached" | cut -d ' ' -f3)" ]]; then + echo "Already downloaded and verified" + continue + elif ((update_files)); then + #user want's to overwrite newer files + rm $file + remove_from_cache "$file" + unset cached + else + #file on server is different from what we have. + echo "WARNING: The remote file was changed (probably a new version is available). Use -U to Update/overwrite" + continue + fi + fi + unset chksum_err_value chksum_err_count + + while : ; do + # (if we had the file size, we could check before trying to complete) + echo "Downloading" + [[ ! -d "$(dirname "$file")" ]] && mkdir -p "$(dirname "$file")" + if ((dry_run)); then + #all important info was already displayed, if in dry_run mode just abort + #No status will be stored + break + else + $wget -O "$file" $url || { failed=1; break; } + fi + + #check if file is there + if [[ -f $file ]]; then + ((debug)) && echo file found + if ((skip_checksum)); then + echo "Skipping check of file checksum" + break + fi + if [[ ! "$chksum" ]]; then + echo "Checksum not provided, can't verify file integrity" + break + fi + result_chksum=$(check_chksum "$file" $chksum_type $chksum) + if [[ "$result_chksum" != "$chksum" ]]; then + echo " $chksum_type failed!" + if ((clean_work)); then + if !((chksum_err_count)); then + chksum_err_value=$result_chksum + chksum_err_count=2 + elif ((checksum_err_count--)); then + if [[ "$result_chksum" != "$chksum_err_value" ]]; then + #this is a real transmission problem + chksum_err_value=$result_chksum + chksum_err_count=2 + fi + else + #ok if here we keep getting the same "different" checksum + echo "The file returns always a different checksum!" + echo "Contact the data owner to verify what is happening." + echo + sleep 1 + break + fi + + rm $file + #try again + echo -n " re-trying..." + continue + else + echo " don't use -p or remove manually." + fi + else + echo " $chksum_type ok. done!" + echo "$file" $(get_mod_time_ "$file") $chksum >> $CACHE_FILE + fi + fi + #done! + break + done + + if ((failed)); then + echo "download failed" + unset failed + fi + + done <<<"$download_files" + +} + +dedup_cache_() { + local file=${1:-${CACHE_FILE}} + ((debug)) && echo "dedup'ing cache ${file} ..." + local tmp=$(LC_ALL='C' sort -r -k1,2 $file | awk '!($1 in a) {a[$1];print $0}' | sort -k2,2) + ((DEBUG)) && echo "$tmp" + echo "$tmp" > $file + ((debug)) && echo "(cache dedup'ed)" +} + +#do we have old results? Create the file if not +[ ! -f $CACHE_FILE ] && echo "#filename mtime checksum" > $CACHE_FILE && chmod 666 $CACHE_FILE + +# +# MAIN +# + +echo "Running $(basename $0) version: $version" +((verbose)) && echo "we use other tools in here, don't try to user their proposed 'options' directly" +echo "Use $(basename $0) -h for help."$'\n' + +cat <<'EOF-MESSAGE' +Script created for 409 file(s) +(The count won't match if you manually edit this file!) + +EOF-MESSAGE +sleep 1 + +check_os + +download + +dedup_cache_ + +echo "done" diff --git a/paleo_scripts/download_paleo_observations.py b/paleo_scripts/download_paleo_observations.py new file mode 100644 index 0000000..4d76180 --- /dev/null +++ b/paleo_scripts/download_paleo_observations.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +""" +Download raw paleoclimate observational datasets. + +Downloads proxy/reanalysis observations into paleo_data_cache/raw/observations/. + +Datasets: + ipcc_ar6 IPCC AR6 Fig 7.19 CSV — Eocene/Pliocene global mean anomalies + lgmda lgmDA v2.1 (Tierney et al.) — LGM data assimilation + bartlein2011 Bartlein et al. 2011 — pollen-based temp/precip reconstructions (LGM, mid-Holocene) + temp12k Temp12k (Kaufman et al. 2020) — mid-Holocene temperature reconstruction + osman2021 Osman et al. 2021 LGMR — LGM Reanalysis (GMST/SAT/SST climo + ensemble) + sisal_v3 SISAL v3 — Speleothem Isotopes Synthesis and Analysis + lig127k Otto-Bliesner et al. 2021 — Last Interglacial proxy anomaly tables + scussolini2019 Scussolini et al. 2019 — LIG boreal precipitation proxy + (manual download required — Science.org blocks automated downloads) + tierney_hansen Tierney THansenMethod.csv — Hansen-method deep-time reconstruction + +Usage: + python download_paleo_observations.py + python download_paleo_observations.py --dataset lgmda + python download_paleo_observations.py --dataset lig127k osman2021 + python download_paleo_observations.py --dry-run + python download_paleo_observations.py --list +""" + +import argparse +import logging +import shutil +import subprocess +import sys +import zipfile +from pathlib import Path + +RAW_DIR = Path(__file__).parent / "paleo_data_cache" / "raw" + +# --------------------------------------------------------------------------- +# URL registries +# --------------------------------------------------------------------------- + +LGMDA_FILES = { + "lgmDA_lgm_ATM_monthly_climo.nc": "https://github.com/jesstierney/lgmDA/raw/refs/heads/master/version2.1/lgmDA_lgm_ATM_monthly_climo.nc", + "lgmDA_hol_ATM_monthly_climo.nc": "https://github.com/jesstierney/lgmDA/raw/refs/heads/master/version2.0/lgmDA_hol_ATM_monthly_climo.nc", +} + +TEMP12K_BASE_URL = ( + "https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/kaufman2020/" +) +TEMP12K_FILES = ["temp12k_alldata.nc"] + +TEMP12K_V1_BASE_URL = "https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/" +TEMP12K_V1_FILES = [ + "Temp12k_v1_0_0.pkl", + "Temp12k_v1_essential_metadata_NOAA.csv", + "Temp12k_v1_record_list_NOAA.csv", +] + +OSMAN2021_BASE_URL = ( + "https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/osman2021/" +) +OSMAN2021_FILES = [ + "LGMR_GMST_climo.nc", + "LGMR_GMST_ens.nc", + "LGMR_SAT_climo.nc", + "LGMR_SST_climo.nc", +] + +SISAL_V3_BASE_URL = "https://www.ncei.noaa.gov/pub/data/paleo/speleothem/SISAL-v3/" +SISAL_V3_FILES = [ + "sisalv3_database_mysql_csv.zip", + "sisalv3_codes.zip", +] + +LIG127K_ZIP_URL = ( + "https://cp.copernicus.org/articles/17/63/2021/cp-17-63-2021-supplement.zip" +) +LIG127K_TABLES = [ + "Table S2. Annual - NH Oceans, Europe, and Greenland (40-90N)_CP-2019-174.xlsx", + "Table S3. Annual - Low latitudes (40S-40N)_CP-2019-174.xlsx", + "Table S4. Annual - SH Oceans and Antarctica (40-90S)_CP-2019-174.xlsx", + "Table S5. JJA - NH Oceans (40-90N) JJA_CP-2019-174.xlsx", + "Table S6. JJA - NH terrestrial (40-90N) JJA__CP-2019-174.xlsx", +] + +BARTLEIN_ZIP_URL = "https://static-content.springer.com/esm/art%3A10.1007%2Fs00382-010-0904-1/MediaObjects/382_2010_904_MOESM2_ESM.zip" + +TIERNEY_HANSEN_URL = "https://raw.githubusercontent.com/jesstierney/PastClimates/master/THansenMethod.csv" + +IPCC_AR6_URL = "https://dap.ceda.ac.uk/badc/ar6_wg1/data/ch_07/ch7_fig19/v20230118/Figure7_19_obs.csv" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _wget_simple(url: str, dest: Path, dry_run: bool = False) -> None: + """Download url to dest; skip if dest already exists and is non-empty.""" + if dest.exists() and dest.stat().st_size > 0: + logging.info(f" [skip] {dest.name}") + return + if dry_run: + logging.info(f" [dry-run] would download {dest.name}") + return + logging.info(f" Downloading {dest.name} ...") + subprocess.run(["wget", "-q", "-O", str(dest), url], check=True) + + +def _extract_zip_robust(zip_path: Path, dest_dir: Path) -> Path: + """Extract a zip into dest_dir and return the top-level extracted directory. + + Uses the zipfile module to discover the actual top-level directory name + rather than assuming it, which avoids brittle hardcoded path assumptions. + """ + with zipfile.ZipFile(zip_path) as zf: + top_dirs = {Path(name).parts[0] for name in zf.namelist() if "/" in name} + zf.extractall(dest_dir) + + if len(top_dirs) == 1: + return dest_dir / top_dirs.pop() + # Multiple top-level dirs: return dest_dir and let the caller search + return dest_dir + + +# --------------------------------------------------------------------------- +# Per-dataset download functions +# --------------------------------------------------------------------------- + + +def _download_ipcc_ar6(obs_dir: Path, dry_run: bool) -> None: + """IPCC AR6 Figure 7.19 — global mean temperature anomalies.""" + _wget_simple(IPCC_AR6_URL, obs_dir / "Figure7_19_obs.csv", dry_run) + + +def _download_lgmda(obs_dir: Path, dry_run: bool) -> None: + """lgmDA v2.1 (Tierney et al.) — LGM and Holocene monthly climatologies.""" + for filename, url in LGMDA_FILES.items(): + _wget_simple(url, obs_dir / filename, dry_run) + + +def _download_bartlein2011(obs_dir: Path, dry_run: bool) -> None: + """Bartlein et al. 2011 — pollen-based temperature/precipitation reconstructions.""" + dest = obs_dir / "bartlein2011_pollen_climate_recon.zip" + _wget_simple(BARTLEIN_ZIP_URL, dest, dry_run) + if dry_run or not dest.exists(): + return + # Leave the zip in place; process_paleo_observations.py handles extraction. + logging.info(f" bartlein2011 zip ready at {dest.name}") + + +def _download_temp12k(obs_dir: Path, dry_run: bool) -> None: + """Temp12k (Kaufman et al. 2020) — Holocene temperature reconstruction.""" + temp12k_dir = obs_dir / "climate12k" + if not dry_run: + temp12k_dir.mkdir(exist_ok=True) + for filename in TEMP12K_FILES: + _wget_simple(TEMP12K_BASE_URL + filename, obs_dir / filename, dry_run) + for filename in TEMP12K_V1_FILES: + _wget_simple(TEMP12K_V1_BASE_URL + filename, temp12k_dir / filename, dry_run) + + +def _download_osman2021(obs_dir: Path, dry_run: bool) -> None: + """Osman et al. 2021 LGMR — LGM Reanalysis (GMST/SAT/SST).""" + osman_dir = obs_dir / "osman2021" + if not dry_run: + osman_dir.mkdir(exist_ok=True) + for filename in OSMAN2021_FILES: + _wget_simple(OSMAN2021_BASE_URL + filename, osman_dir / filename, dry_run) + + +def _download_sisal_v3(obs_dir: Path, dry_run: bool) -> None: + """SISAL v3 — Speleothem Isotopes Synthesis and Analysis database.""" + sisal_dir = obs_dir / "sisal_v3" + if not dry_run: + sisal_dir.mkdir(exist_ok=True) + for filename in SISAL_V3_FILES: + _wget_simple(SISAL_V3_BASE_URL + filename, sisal_dir / filename, dry_run) + + +def _download_lig127k(obs_dir: Path, dry_run: bool) -> None: + """Otto-Bliesner et al. 2021 LIG127k — Last Interglacial proxy anomaly tables.""" + lig_dir = obs_dir / "lig127k" + if not dry_run: + lig_dir.mkdir(exist_ok=True) + + missing_tables = [t for t in LIG127K_TABLES if not (lig_dir / t).exists()] + if not missing_tables: + logging.info(" [skip] lig127k tables already extracted") + return + + zip_dest = obs_dir / "cp-17-63-2021-supplement.zip" + _wget_simple(LIG127K_ZIP_URL, zip_dest, dry_run) + if dry_run or not zip_dest.exists(): + return + + logging.info(" Extracting lig127k supplement zip ...") + extract_root = _extract_zip_robust(zip_dest, obs_dir) + + # Move each expected table from wherever it landed in the extract tree + for table in LIG127K_TABLES: + dest = lig_dir / table + if dest.exists(): + continue + # Search the extracted tree for the file (handles nested dirs) + matches = list(obs_dir.rglob(table)) + if matches: + matches[0].rename(dest) + else: + logging.warning(f" [warn] Table not found in zip: {table}") + + # Clean up extracted tree and zip + if extract_root != obs_dir and extract_root.exists(): + shutil.rmtree(extract_root, ignore_errors=True) + for leftover in ["cp-17-63-2021-supplement-title-page.pdf", "__MACOSX"]: + p = obs_dir / leftover + if p.is_dir(): + shutil.rmtree(p, ignore_errors=True) + elif p.exists(): + p.unlink(missing_ok=True) + zip_dest.unlink(missing_ok=True) + + +def _download_scussolini2019(obs_dir: Path, dry_run: bool) -> None: + """Scussolini et al. 2019 — LIG boreal precipitation proxy (manual download required).""" + scussolini_dest = obs_dir / "scussolini2019_lig_precip_proxy.xlsx" + scussolini_orig = obs_dir / "aax7047_external_database_s1.xlsx" + + if scussolini_dest.exists() and scussolini_dest.stat().st_size > 0: + logging.info(" [skip] scussolini2019_lig_precip_proxy.xlsx") + return + + if scussolini_orig.exists() and scussolini_orig.stat().st_size > 0: + if scussolini_dest.exists(): + scussolini_dest.unlink() + scussolini_orig.rename(scussolini_dest) + logging.info( + " Renamed aax7047_external_database_s1.xlsx → scussolini2019_lig_precip_proxy.xlsx" + ) + return + + logging.warning( + "\n" + " ACTION REQUIRED: scussolini2019_lig_precip_proxy.xlsx must be downloaded manually.\n" + " Science.org blocks automated downloads for this file.\n" + "\n" + " 1. Open this URL in a browser:\n" + " https://www.science.org/doi/suppl/10.1126/sciadv.aax7047/suppl_file/aax7047_external_database_s1.xlsx\n" + " 2. Save the file (downloads as aax7047_external_database_s1.xlsx).\n" + " 3. Move it to:\n" + f" {scussolini_dest}\n" + " Then re-run this script to rename it automatically.\n" + ) + + +def _download_tierney_hansen(obs_dir: Path, dry_run: bool) -> None: + """Tierney THansenMethod.csv — Hansen-method deep-time temperature reconstruction.""" + _wget_simple(TIERNEY_HANSEN_URL, obs_dir / "THansenMethod.csv", dry_run) + + +# --------------------------------------------------------------------------- +# Dataset registry — maps CLI name → downloader function +# --------------------------------------------------------------------------- + +DATASET_REGISTRY: dict[str, tuple[str, callable]] = { + "ipcc_ar6": ("IPCC AR6 Fig 7.19 global mean anomalies", _download_ipcc_ar6), + "lgmda": ("lgmDA v2.1 Tierney et al. — LGM data assimilation", _download_lgmda), + "bartlein2011": ( + "Bartlein et al. 2011 pollen temp/precip recon", + _download_bartlein2011, + ), + "temp12k": ( + "Temp12k Kaufman et al. 2020 — Holocene reconstruction", + _download_temp12k, + ), + "osman2021": ( + "Osman et al. 2021 LGMR — SAT/SST/GMST reanalysis", + _download_osman2021, + ), + "sisal_v3": ("SISAL v3 speleothem database", _download_sisal_v3), + "lig127k": ("Otto-Bliesner et al. 2021 LIG127k proxy tables", _download_lig127k), + "scussolini2019": ( + "Scussolini et al. 2019 LIG precipitation proxy", + _download_scussolini2019, + ), + "tierney_hansen": ( + "Tierney THansenMethod deep-time reconstruction", + _download_tierney_hansen, + ), +} + + +# --------------------------------------------------------------------------- +# CLI helpers +# --------------------------------------------------------------------------- + + +def list_datasets() -> None: + print("Available datasets (--dataset ):") + for key, (description, _) in DATASET_REGISTRY.items(): + print(f" {key:<18} {description}") + + +def download_datasets(dataset_names: list[str], dry_run: bool) -> bool: + obs_dir = RAW_DIR / "observations" + if not dry_run: + obs_dir.mkdir(parents=True, exist_ok=True) + + ok = True + for name in dataset_names: + if name not in DATASET_REGISTRY: + logging.error( + f"Unknown dataset '{name}'. Run --list to see available options." + ) + ok = False + continue + description, fn = DATASET_REGISTRY[name] + logging.info(f"\n[{name}] {description}") + try: + fn(obs_dir, dry_run) + except Exception as exc: + logging.error(f" Failed to download {name}: {exc}") + ok = False + return ok + + +def main() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + parser = argparse.ArgumentParser( + description="Download raw paleoclimate observational datasets.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "--dataset", + nargs="+", + default=["all"], + metavar="NAME", + help=( + "One or more dataset names to download, or 'all' (default). " + "Run --list to see available names." + ), + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Report what would be downloaded without downloading", + ) + parser.add_argument( + "--list", + action="store_true", + help="List available datasets and exit", + ) + args = parser.parse_args() + + if args.list: + list_datasets() + return + + # Resolve dataset names; "all" expands to every registered key (order-preserving dedup) + seen: dict[str, None] = {} + for token in args.dataset: + if token == "all": + for key in DATASET_REGISTRY: + seen[key] = None + else: + seen[token] = None + names = list(seen) + + success = download_datasets(names, args.dry_run) + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/paleo_scripts/paleo_benchmark.py b/paleo_scripts/paleo_benchmark.py new file mode 100644 index 0000000..4fcd309 --- /dev/null +++ b/paleo_scripts/paleo_benchmark.py @@ -0,0 +1,847 @@ +"""Paleo climate benchmark: spatial temperature and precipitation evaluation with CRPS scoring. + +Compares PMIP4/CMIP6 model climatologies against paleoclimate proxy reconstructions and +data assimilation (DA) products for LGM, mid-Holocene, and LIG periods. + +Temperature benchmarks: + LGM: lgmDA absolute temperatures (Tierney et al. 2020) + lgmDA anomaly (LGM - Holocene DA) + LGMR SAT anomaly (Osman et al. 2021) + Bartlein et al. 2011 pollen-based MAT anomaly + midHolocene: Bartlein et al. 2011 pollen-based MAT anomaly + lig127k: Otto-Bliesner et al. 2021 proxy temperature anomalies + +Precipitation benchmarks: + LGM: Bartlein et al. 2011 pollen-based MAP anomaly + midHolocene: Bartlein et al. 2011 pollen-based MAP anomaly + lig127k: Scussolini et al. 2019 semi-quantitative precipitation changes + +CRPS uses proxy/DA uncertainty as the width of a Gaussian forecast distribution, +scoring the model value as the "observation". This directly penalises models +whose paleo response falls outside proxy uncertainty bounds. + +For anomaly-based comparisons the model paleo temperature is differenced against +the lgmDA Holocene (PI) field as a spatially resolved modern reference. Pass +--use-picontrol to load the model's own piControl from the main ClimateBench +DataFinder pipeline instead (requires processed piControl data). + +Usage: + cd paleo_scripts + python paleo_benchmark.py --model AWI-ESM-1-1-LR --period lgm + python paleo_benchmark.py --model all --period all + python paleo_benchmark.py --model MIROC-ES2L --period lgm --use-picontrol + python paleo_benchmark.py --model all --period lgm --obs-source lgmDA + python paleo_benchmark.py --model all --period lgm --obs-source Bartlein2011 --variable tas + python paleo_benchmark.py --model all --period all --save-to-cloud + +Results saved to: + ../results/paleo/{period}_paleo_benchmark_results.csv + +References: + Tierney et al. (2020) Nature 584, 569–573 [lgmDA] + Osman et al. (2021) Nature 599, 485–490 [LGMR] + Bartlein et al. (2011) Clim Dyn 37, 775–802 [pollen reconstructions] + Otto-Bliesner et al. (2021) Clim Past 17, 63–88 [LIG proxies] + Scussolini et al. (2019) Science Advances [LIG precip] +""" + +import argparse +import logging +import sys +from pathlib import Path + +import numpy as np +import pandas as pd +import xarray as xr +from scipy.special import erf + +sys.path.append("..") +from utils import save_results_csv, standardize_dims + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- +PALEO_DIR = Path(__file__).parent +OBS_DIR = PALEO_DIR / "paleo_data_cache" / "processed" / "observations" +MODEL_PROC_DIR = PALEO_DIR / "paleo_data_cache" / "processed" / "models" + +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + force=True, +) + + +# --------------------------------------------------------------------------- +# CRPS and spatial metrics +# --------------------------------------------------------------------------- + + +def _crps_gaussian(obs: np.ndarray, mu: np.ndarray, sigma: np.ndarray) -> np.ndarray: + """CRPS for Gaussian N(mu, sigma) scored against point observations. + + Positive-orientated: lower CRPS = better forecast. + Formula from Gneiting & Raftery (2007): + CRPS = sigma * [z*(2*Phi(z)-1) + 2*phi(z) - 1/sqrt(pi)] + where z = (obs - mu) / sigma. + """ + sigma = np.maximum(np.abs(sigma), 1e-6) + z = (obs - mu) / sigma + phi = np.exp(-0.5 * z**2) / np.sqrt(2 * np.pi) + Phi = 0.5 * (1.0 + erf(z / np.sqrt(2.0))) + return sigma * (z * (2.0 * Phi - 1.0) + 2.0 * phi - 1.0 / np.sqrt(np.pi)) + + +def _spatial_metrics( + model_vals: np.ndarray, + proxy_mu: np.ndarray, + proxy_sigma: np.ndarray, + weights: np.ndarray | None = None, +) -> dict: + """Compute RMSE, MAE, and CRPS across valid proxy sites/grid cells. + + Args: + model_vals: model values interpolated/regridded to proxy locations + proxy_mu: proxy reconstruction mean (same units as model_vals) + proxy_sigma: proxy reconstruction uncertainty (1-sigma) + weights: optional area weights (e.g. cos-lat); uniform if None + + Returns: + dict with keys: n_sites, rmse, mae, mean_crps, crps_skill + """ + flat_model = np.asarray(model_vals).ravel() + flat_mu = np.asarray(proxy_mu).ravel() + flat_sigma = np.asarray(proxy_sigma).ravel() + flat_w = np.ones_like(flat_mu) if weights is None else np.asarray(weights).ravel() + + valid = np.isfinite(flat_model) & np.isfinite(flat_mu) & np.isfinite(flat_sigma) + if valid.sum() == 0: + return dict( + n_sites=0, rmse=np.nan, mae=np.nan, mean_crps=np.nan, crps_skill=np.nan + ) + + m = flat_model[valid] + mu = flat_mu[valid] + sig = flat_sigma[valid] + w = flat_w[valid] + w = w / w.sum() + + diff = m - mu + rmse = float(np.sqrt(np.sum(w * diff**2))) + mae = float(np.sum(w * np.abs(diff))) + + crps_vals = _crps_gaussian(m, mu, sig) + mean_crps = float(np.sum(w * crps_vals)) + + # Skill relative to a "climatological" forecast: N(mean(proxy), std(proxy)) + clim_mu = float(np.sum(w * mu)) + clim_sig = float(np.sqrt(np.sum(w * (mu - clim_mu) ** 2))) + if clim_sig < 1e-6: + clim_sig = float(np.mean(sig)) + crps_ref = float(np.mean(_crps_gaussian(m, clim_mu, clim_sig))) + crps_skill = float(1.0 - mean_crps / crps_ref) if crps_ref > 1e-9 else np.nan + + return dict( + n_sites=int(valid.sum()), + rmse=round(rmse, 4), + mae=round(mae, 4), + mean_crps=round(mean_crps, 4), + crps_skill=round(crps_skill, 4), + ) + + +# --------------------------------------------------------------------------- +# Regridding helpers +# --------------------------------------------------------------------------- + + +def _to_celsius(da: xr.DataArray) -> xr.DataArray: + """Convert K → °C if values look like Kelvin (mean > 100).""" + if float(da.mean()) > 100: + return da - 273.15 + return da + + +def _regrid( + source: xr.DataArray, target_lat: np.ndarray, target_lon: np.ndarray +) -> xr.DataArray: + """Bilinearly interpolate source DataArray to a target regular lat/lon grid. + + Handles 0–360 vs −180–180 lon convention by remapping target lons. + """ + src_lon = source.lon.values % 360 + tgt_lon = target_lon % 360 + source = source.assign_coords(lon=src_lon).sortby("lon") + return source.interp( + lat=target_lat, + lon=tgt_lon, + method="linear", + kwargs={"fill_value": "extrapolate"}, + ) + + +def _interp_to_points( + source: xr.DataArray, lats: np.ndarray, lons: np.ndarray +) -> np.ndarray: + """Nearest-neighbour interpolation to scattered lat/lon points.""" + lons_norm = lons % 360 + src_lon = source.lon.values % 360 + source = source.assign_coords(lon=src_lon).sortby("lon") + vals = [] + for la, lo in zip(lats, lons_norm): + try: + v = float(source.sel(lat=la, lon=lo, method="nearest")) + except Exception: + v = np.nan + vals.append(v) + return np.array(vals) + + +# --------------------------------------------------------------------------- +# Model data loaders +# --------------------------------------------------------------------------- + + +def _load_model_tas(model: str, period: str) -> xr.DataArray | None: + """Load annual-mean tas from processed monthly climatology. + + Returns DataArray in °C, or None if data is unavailable. + """ + nc = MODEL_PROC_DIR / model / f"{period}_tas_monthly_climo.nc" + if not nc.exists(): + logger.warning( + f" [skip] No processed {period} tas for {model} — run process_paleo_models.py first" + ) + return None + ds = xr.open_dataset(nc) + da = ds["tas"].mean("month") + da = standardize_dims(da.to_dataset(name="tas"))["tas"] + return _to_celsius(da) + + +def _load_model_pr(model: str, period: str) -> xr.DataArray | None: + """Load annual-mean pr from processed monthly climatology (kg m-2 s-1 → mm/yr). + + Returns DataArray in mm/yr, or None if data is unavailable. + """ + nc = MODEL_PROC_DIR / model / f"{period}_pr_monthly_climo.nc" + if not nc.exists(): + return None + ds = xr.open_dataset(nc) + da = ds["pr"].mean("month") + da = standardize_dims(da.to_dataset(name="pr"))["pr"] + return da * 86400 * 365.25 + + +def _load_picontrol_tas(model: str) -> xr.DataArray | None: + """Load piControl annual-mean tas via the main ClimateBench DataFinder.""" + try: + sys.path.append(str(PALEO_DIR.parent / "benchmark_scrips")) + from benchmark_utils import DataFinder + + df = DataFinder(model=model, variable="tas", start_year=1850, end_year=2000) + pi_ds = df.load_experiment_ds(experiment="piControl", ensemble_mean=True) + pi_da = standardize_dims(pi_ds)["tas"] + return _to_celsius(pi_da.mean(dim="time")) + except Exception as e: + logger.warning(f" Could not load piControl for {model}: {e}") + return None + + +def _load_lgmda_pi_tas() -> xr.DataArray | None: + """Load lgmDA Holocene (PI) annual-mean tas as a spatially resolved modern reference.""" + nc = OBS_DIR / "multi_period" / "lgmDA_v2.1_holocene_tas.nc" + if not nc.exists(): + logger.warning( + " lgmDA_v2.1_holocene_tas.nc not found — cannot compute model anomalies" + ) + return None + ds = xr.open_dataset(nc) + pi = ds["pi_tas"] + if "month" in pi.dims: + pi = pi.mean(dim="month") + return _to_celsius(pi) + + +def _compute_model_anom(model_tas: xr.DataArray, pi_ref: xr.DataArray) -> xr.DataArray: + """Compute model temperature anomaly (paleo − PI reference). + + Regrids pi_ref to the model grid before differencing. + """ + pi_on_model = _regrid(pi_ref, model_tas.lat.values, model_tas.lon.values) + return model_tas - pi_on_model + + +# --------------------------------------------------------------------------- +# Per-dataset benchmark functions +# --------------------------------------------------------------------------- + + +def _result_row(model, period, dataset, variable, metrics: dict) -> dict: + return dict( + model=model, period=period, dataset=dataset, variable=variable, **metrics + ) + + +def bench_lgmda_absolute( + model_tas: xr.DataArray, model: str, period: str +) -> list[dict]: + """Absolute temperature comparison: model lgm tas vs lgmDA lgm_tas.""" + nc = OBS_DIR / "lgm" / "lgmDA_v2.1_tas.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + lgmda_lgm = _to_celsius((ds["pi_tas"] + ds["tas"]).mean(dim="month")) + lgmda_std = ds["tas_std"].mean(dim="month") + + model_on_lgmda = _regrid(model_tas, lgmda_lgm.lat.values, lgmda_lgm.lon.values) + cos_w = np.cos(np.deg2rad(lgmda_lgm.lat.values)) + w2d = np.tile(cos_w[:, None], (1, len(lgmda_lgm.lon))) + + metrics = _spatial_metrics( + model_on_lgmda.values, lgmda_lgm.values, lgmda_std.values, w2d + ) + logger.info( + f" lgmDA absolute tas: n={metrics['n_sites']} RMSE={metrics['rmse']:.2f}°C CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "lgmDA", "tas_absolute_C", metrics)] + + +def bench_lgmda_anomaly( + model_anom: xr.DataArray, model: str, period: str +) -> list[dict]: + """Anomaly comparison: model − lgmDA_pi vs lgmDA (lgm_tas − pi_tas).""" + nc = OBS_DIR / "lgm" / "lgmDA_v2.1_tas.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + proxy_anom = ds["tas"].mean(dim="month") + proxy_sigma = ds["tas_std"].mean(dim="month") + + model_on_lgmda = _regrid(model_anom, proxy_anom.lat.values, proxy_anom.lon.values) + cos_w = np.cos(np.deg2rad(proxy_anom.lat.values)) + w2d = np.tile(cos_w[:, None], (1, len(proxy_anom.lon))) + + metrics = _spatial_metrics( + model_on_lgmda.values, proxy_anom.values, proxy_sigma.values, w2d + ) + logger.info( + f" lgmDA anomaly tas: n={metrics['n_sites']} RMSE={metrics['rmse']:.2f}°C CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "lgmDA", "tas_anomaly_K", metrics)] + + +def bench_lgmr_sat(model_anom: xr.DataArray, model: str, period: str) -> list[dict]: + """Anomaly comparison: model vs LGMR SAT (Osman et al. 2021).""" + nc = OBS_DIR / "lgm" / "LGMR_SAT_tas.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + proxy_mu = ds["tas"] + proxy_sigma = ds["tas_std"] + + model_on_lgmr = _regrid(model_anom, proxy_mu.lat.values, proxy_mu.lon.values) + cos_w = np.cos(np.deg2rad(proxy_mu.lat.values)) + w2d = np.tile(cos_w[:, None], (1, len(proxy_mu.lon))) + + metrics = _spatial_metrics( + model_on_lgmr.values, proxy_mu.values, proxy_sigma.values, w2d + ) + logger.info( + f" LGMR SAT anomaly: n={metrics['n_sites']} RMSE={metrics['rmse']:.2f}°C CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "LGMR_SAT", "tas_anomaly_K", metrics)] + + +def bench_bartlein_tas(model_anom: xr.DataArray, period: str, model: str) -> list[dict]: + """Pollen-based MAT anomaly comparison (Bartlein et al. 2011). + + Uses tas and tas_std. Only cells with significant signal (tas_sig_val != 0) are included. + """ + nc = OBS_DIR / period / "Bartlein2011_tas.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + proxy_mu = ds["tas"] + proxy_sigma = ds["tas_std"] + sig_var = ( + ds["tas_sig_val"] if "tas_sig_val" in ds else ds.get("tas_sig", proxy_mu * 0) + ) + sig_mask = np.isfinite(sig_var.values) & (sig_var.values != 0) + + proxy_mu_masked = proxy_mu.where( + xr.DataArray(sig_mask, dims=proxy_mu.dims, coords=proxy_mu.coords) + ) + proxy_sig_masked = proxy_sigma.where( + xr.DataArray(sig_mask, dims=proxy_sigma.dims, coords=proxy_sigma.coords) + ) + + model_on_bart = _regrid(model_anom, proxy_mu.lat.values, proxy_mu.lon.values) + cos_w = np.cos(np.deg2rad(proxy_mu.lat.values)) + w2d = np.tile(cos_w[:, None], (1, len(proxy_mu.lon))) + + metrics = _spatial_metrics( + model_on_bart.values, proxy_mu_masked.values, proxy_sig_masked.values, w2d + ) + logger.info( + f" Bartlein MAT anomaly ({period}): n={metrics['n_sites']} RMSE={metrics['rmse']:.2f}°C CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "Bartlein2011", "tas_anomaly_K", metrics)] + + +def bench_bartlein_pr( + model_pr_anom: xr.DataArray | None, period: str, model: str +) -> list[dict]: + """Pollen-based MAP anomaly comparison (Bartlein et al. 2011). + + pr in mm/yr; pr_std is standard error. + """ + if model_pr_anom is None: + return [] + nc = OBS_DIR / period / "Bartlein2011_pr.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + if "pr" not in ds: + return [] + proxy_mu = ds["pr"] + proxy_sigma = ds["pr_std"] + sig_var = ds["pr_sig_val"] if "pr_sig_val" in ds else ds.get("pr_sig", proxy_mu * 0) + sig_mask = np.isfinite(sig_var.values) & (sig_var.values != 0) + + proxy_mu_masked = proxy_mu.where( + xr.DataArray(sig_mask, dims=proxy_mu.dims, coords=proxy_mu.coords) + ) + proxy_sig_masked = proxy_sigma.where( + xr.DataArray(sig_mask, dims=proxy_sigma.dims, coords=proxy_sigma.coords) + ) + + model_on_bart = _regrid(model_pr_anom, proxy_mu.lat.values, proxy_mu.lon.values) + cos_w = np.cos(np.deg2rad(proxy_mu.lat.values)) + w2d = np.tile(cos_w[:, None], (1, len(proxy_mu.lon))) + + metrics = _spatial_metrics( + model_on_bart.values, proxy_mu_masked.values, proxy_sig_masked.values, w2d + ) + logger.info( + f" Bartlein MAP anomaly ({period}): n={metrics['n_sites']} RMSE={metrics['rmse']:.1f}mm/yr CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "Bartlein2011", "pr_anomaly_mmyr", metrics)] + + +def bench_ottobliesner_lig( + model_anom: xr.DataArray | None, model: str, period: str +) -> list[dict]: + """LIG proxy temperature comparison (Otto-Bliesner et al. 2021).""" + if model_anom is None: + return [] + nc = OBS_DIR / "lig127k" / "OttoBliesner2021_tas.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + lats = ds["lat"].values + lons = ds["lon"].values + proxy_mu = ds["tas"].values + proxy_sigma = ds["tas_std"].values + valid = np.isfinite(proxy_mu) + lats, lons = lats[valid], lons[valid] + proxy_mu, proxy_sigma = proxy_mu[valid], proxy_sigma[valid] + + model_vals = _interp_to_points(model_anom, lats, lons) + metrics = _spatial_metrics(model_vals, proxy_mu, proxy_sigma) + logger.info( + f" Otto-Bliesner LIG TAS: n={metrics['n_sites']} RMSE={metrics['rmse']:.2f}°C CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "OttoBliesner2021", "tas_anomaly_K", metrics)] + + +def bench_scussolini_lig( + model_pr_anom: xr.DataArray | None, model: str, period: str +) -> list[dict]: + """LIG precipitation comparison (Scussolini et al. 2019). + + Uses only sites with quantitative ΔP (mm) estimates and reliability ≥ 1. + Reliability 1 → proxy_sigma = 300 mm/yr; reliability ≥ 2 → 150 mm/yr. + """ + if model_pr_anom is None: + return [] + nc = OBS_DIR / "lig127k" / "Scussolini2019_pr.nc" + if not nc.exists(): + return [] + ds = xr.open_dataset(nc) + if "pr" not in ds: + logger.warning(" Scussolini: no pr variable in NetCDF") + return [] + + lats = ds["lat"].values + lons = ds["lon"].values + proxy_mu_all = ds["pr"].values + reliability = ( + ds["pr_reliability"].values if "pr_reliability" in ds else np.ones(len(lats)) + ) + + has_quant = np.isfinite(proxy_mu_all) + reliable = reliability >= 1 + mask = has_quant & reliable + if not mask.any(): + logger.warning(" Scussolini: no quantitative precipitation sites available") + return [] + + lats, lons = lats[mask], lons[mask] + proxy_mu = proxy_mu_all[mask] + proxy_sigma = np.where(reliability[mask] >= 2, 150.0, 300.0) + + model_vals = _interp_to_points(model_pr_anom, lats, lons) + metrics = _spatial_metrics(model_vals, proxy_mu, proxy_sigma) + logger.info( + f" Scussolini LIG precip: n={metrics['n_sites']} RMSE={metrics['rmse']:.1f}mm/yr CRPS={metrics['mean_crps']:.3f} skill={metrics['crps_skill']:.3f}" + ) + return [_result_row(model, period, "Scussolini2019", "pr_anomaly_mmyr", metrics)] + + +def bench_temp12k( + model_anom: xr.DataArray | None, model: str, period: str +) -> list[dict]: + """Holocene temperature reconstruction (Kaufman et al. 2020 / Temp12k). Stub — not yet implemented.""" + logger.warning(" bench_temp12k: not yet implemented") + return [] + + +# --------------------------------------------------------------------------- +# Obs source registry — maps period → source → {variable → benchmark functions} +# Used for --obs-source filtering +# --------------------------------------------------------------------------- + +OBS_SOURCE_REGISTRY: dict[str, dict[str, dict[str, tuple]]] = { + "lgm": { + "lgmDA": {"tas": (bench_lgmda_absolute, bench_lgmda_anomaly)}, + "LGMR_SAT": {"tas": (bench_lgmr_sat,)}, + "Bartlein2011": {"tas": (bench_bartlein_tas,), "pr": (bench_bartlein_pr,)}, + }, + "midHolocene": { + "Bartlein2011": {"tas": (bench_bartlein_tas,), "pr": (bench_bartlein_pr,)}, + "Temp12k": {"tas": (bench_temp12k,)}, + }, + "lig127k": { + "OttoBliesner2021": {"tas": (bench_ottobliesner_lig,)}, + "Scussolini2019": {"pr": (bench_scussolini_lig,)}, + }, +} + + +# --------------------------------------------------------------------------- +# Per-period orchestration +# --------------------------------------------------------------------------- + + +def _run_lgm( + model: str, + use_picontrol: bool, + obs_sources: list[str] | None, + variables: list[str], +) -> list[dict]: + rows = [] + run_tas = "tas" in variables + run_pr = "pr" in variables + + model_tas = _load_model_tas(model, "lgm") if run_tas else None + if run_tas and model_tas is None: + return rows + + if use_picontrol: + pi_ref = _load_picontrol_tas(model) + if pi_ref is None: + logger.warning(" Falling back to lgmDA PI reference") + pi_ref = _load_lgmda_pi_tas() + else: + pi_ref = _load_lgmda_pi_tas() + + sources = obs_sources or list(OBS_SOURCE_REGISTRY["lgm"]) + + if run_tas and "lgmDA" in sources and model_tas is not None: + rows += bench_lgmda_absolute(model_tas, model, "lgm") + + if pi_ref is not None: + model_anom = ( + _compute_model_anom(model_tas, pi_ref) if model_tas is not None else None + ) + + if run_tas and model_anom is not None: + if "lgmDA" in sources: + rows += bench_lgmda_anomaly(model_anom, model, "lgm") + if "LGMR_SAT" in sources: + rows += bench_lgmr_sat(model_anom, model, "lgm") + if "Bartlein2011" in sources: + rows += bench_bartlein_tas(model_anom, "lgm", model) + + if run_pr and "Bartlein2011" in sources: + model_pr = _load_model_pr(model, "lgm") + model_pr_anom = ( + _compute_pr_anom(model_pr, model, "lgm", use_picontrol) + if model_pr is not None + else None + ) + rows += bench_bartlein_pr(model_pr_anom, "lgm", model) + else: + logger.warning(" No PI reference — skipping anomaly benchmarks for LGM") + + return rows + + +def _run_midholocene( + model: str, + use_picontrol: bool, + obs_sources: list[str] | None, + variables: list[str], +) -> list[dict]: + rows = [] + run_tas = "tas" in variables + run_pr = "pr" in variables + + model_tas = _load_model_tas(model, "midHolocene") if run_tas else None + if run_tas and model_tas is None: + return rows + + pi_ref = _load_picontrol_tas(model) if use_picontrol else _load_lgmda_pi_tas() + if pi_ref is None: + logger.warning( + " No PI reference — skipping anomaly benchmarks for midHolocene" + ) + return rows + + sources = obs_sources or list(OBS_SOURCE_REGISTRY["midHolocene"]) + + if run_tas and model_tas is not None: + model_anom = _compute_model_anom(model_tas, pi_ref) + if "Bartlein2011" in sources: + rows += bench_bartlein_tas(model_anom, "midHolocene", model) + if "Temp12k" in sources: + rows += bench_temp12k(model_anom, model, "midHolocene") + + if run_pr and "Bartlein2011" in sources: + model_pr = _load_model_pr(model, "midHolocene") + model_pr_anom = ( + _compute_pr_anom(model_pr, model, "midHolocene", use_picontrol) + if model_pr is not None + else None + ) + rows += bench_bartlein_pr(model_pr_anom, "midHolocene", model) + + return rows + + +def _run_lig127k( + model: str, + use_picontrol: bool, + obs_sources: list[str] | None, + variables: list[str], +) -> list[dict]: + rows = [] + run_tas = "tas" in variables + run_pr = "pr" in variables + + model_tas = _load_model_tas(model, "lig127k") if run_tas else None + + pi_ref = _load_picontrol_tas(model) if use_picontrol else _load_lgmda_pi_tas() + if pi_ref is None: + logger.warning(" No PI reference — skipping anomaly benchmarks for lig127k") + return rows + + sources = obs_sources or list(OBS_SOURCE_REGISTRY["lig127k"]) + + if run_tas and model_tas is not None: + model_anom = _compute_model_anom(model_tas, pi_ref) + if "OttoBliesner2021" in sources: + rows += bench_ottobliesner_lig(model_anom, model, "lig127k") + + if run_pr and "Scussolini2019" in sources: + model_pr = _load_model_pr(model, "lig127k") + model_pr_anom = ( + _compute_pr_anom(model_pr, model, "lig127k", use_picontrol) + if model_pr is not None + else None + ) + rows += bench_scussolini_lig(model_pr_anom, model, "lig127k") + + return rows + + +def _compute_pr_anom( + model_pr: xr.DataArray | None, + model: str, + period: str, + use_picontrol: bool, +) -> xr.DataArray | None: + """Compute model precipitation anomaly (mm/yr) relative to PI reference.""" + if model_pr is None: + return None + if use_picontrol: + try: + sys.path.append(str(PALEO_DIR.parent / "benchmark_scrips")) + from benchmark_utils import DataFinder + + df = DataFinder(model=model, variable="pr", start_year=1850, end_year=2000) + pi_ds = df.load_experiment_ds(experiment="piControl", ensemble_mean=True) + pi_pr = standardize_dims(pi_ds)["pr"].mean(dim="time") * 86400 * 365.25 + pi_pr_on_model = _regrid(pi_pr, model_pr.lat.values, model_pr.lon.values) + return model_pr - pi_pr_on_model + except Exception as e: + logger.warning(f" Could not load piControl pr: {e}") + logger.warning( + f" No precipitation PI reference for {period} — skipping pr anomaly benchmarks. " + "Pass --use-picontrol to enable." + ) + return None + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +PERIOD_RUNNERS = { + "lgm": _run_lgm, + "midHolocene": _run_midholocene, + "lig127k": _run_lig127k, +} + + +def main( + models: list[str], + periods: list[str], + use_picontrol: bool = False, + save_to_cloud: bool = False, + overwrite: bool = False, + obs_sources: list[str] | None = None, + variables: list[str] | None = None, +) -> pd.DataFrame: + if variables is None: + variables = ["tas", "pr"] + + rows_by_period: dict[str, list[dict]] = {p: [] for p in periods} + + for model in models: + for period in periods: + if period not in PERIOD_RUNNERS: + logger.info( + f" No benchmark configured for period '{period}' — skipping" + ) + continue + logger.info(f"\n{'='*60}\n {model} / {period}\n{'='*60}") + rows = PERIOD_RUNNERS[period](model, use_picontrol, obs_sources, variables) + rows_by_period[period].extend(rows) + if not rows: + logger.warning(f" No benchmark results for {model}/{period}") + + all_dfs = [] + for period, rows in rows_by_period.items(): + if not rows: + continue + period_df = pd.DataFrame(rows) + results_file = f"../results/paleo/{period}_paleo_benchmark_results.csv" + save_results_csv(period_df, results_file, save_to_cloud, overwrite) + print(period_df.to_string(index=False)) + all_dfs.append(period_df) + + if not all_dfs: + logger.warning("No results collected.") + return pd.DataFrame() + + return pd.concat(all_dfs, ignore_index=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Paleo benchmark: spatial RMSE/MAE/CRPS against proxy and DA observations" + ) + parser.add_argument( + "--model", + default="all", + help="Model name or 'all' for all models with processed data (default: all)", + ) + parser.add_argument( + "--period", + default="all", + choices=["lgm", "midHolocene", "lig127k", "all"], + help="Paleo period to benchmark (default: all)", + ) + parser.add_argument( + "--obs-source", + nargs="+", + default=None, + metavar="SOURCE", + help=( + "Observation source(s) to benchmark against. " + "LGM: lgmDA, LGMR_SAT, Bartlein2011. " + "midHolocene: Bartlein2011, Temp12k. " + "lig127k: OttoBliesner2021, Scussolini2019. " + "Default: all sources for the selected period." + ), + ) + parser.add_argument( + "--variable", + nargs="+", + default=["all"], + choices=["tas", "pr", "all"], + help="Variable(s) to benchmark: tas, pr, or all (default: all)", + ) + parser.add_argument( + "--use-picontrol", + action="store_true", + default=False, + help="Load model piControl from main ClimateBench DataFinder for anomaly computation", + ) + parser.add_argument( + "--save-to-cloud", + action="store_true", + default=False, + help="Save results to GCS bucket 'climatebench'", + ) + parser.add_argument( + "--overwrite", + action="store_true", + default=False, + help="Overwrite existing results CSV instead of appending", + ) + args = parser.parse_args() + + # Resolve model list + if args.model == "all": + available = [ + p.name + for p in MODEL_PROC_DIR.iterdir() + if p.is_dir() and any(p.glob("*_tas_monthly_climo.nc")) + ] + if not available: + logger.error( + "No processed model data found in paleo_data_cache/processed/models/. " + "Run process_paleo_models.py first." + ) + sys.exit(1) + model_list = sorted(available) + else: + model_list = [args.model] + + # Resolve period list + period_list = list(PERIOD_RUNNERS) if args.period == "all" else [args.period] + + # Resolve variable list + variable_list = ["tas", "pr"] if "all" in args.variable else args.variable + + logger.info(f"Models: {model_list}") + logger.info(f"Periods: {period_list}") + logger.info(f"Variables: {variable_list}") + logger.info(f"Sources: {args.obs_source or 'all'}") + logger.info( + f"PI reference: {'piControl (DataFinder)' if args.use_picontrol else 'lgmDA Holocene'}" + ) + + main( + models=model_list, + periods=period_list, + use_picontrol=args.use_picontrol, + save_to_cloud=args.save_to_cloud, + overwrite=args.overwrite, + obs_sources=args.obs_source, + variables=variable_list, + ) diff --git a/paleo_scripts/process_paleo_models.py b/paleo_scripts/process_paleo_models.py new file mode 100644 index 0000000..3c67cd5 --- /dev/null +++ b/paleo_scripts/process_paleo_models.py @@ -0,0 +1,220 @@ +""" +Process raw CMIP6 paleoclimate model data into monthly climatologies. + +For each model/period/variable combination, all raw Amon NetCDF chunks are +concatenated, a 12-month climatology is computed, and the result is written to: + + paleo_data_cache/processed/models/{MODEL}/{period}_{variable}_monthly_climo.nc + +Annual mean is computed on the fly by callers (mean over the month dimension). + +Usage: + python process_paleo_models.py + python process_paleo_models.py --model AWI-ESM-1-1-LR --period lgm + python process_paleo_models.py --model AWI-ESM-1-1-LR --period lgm --variable pr + python process_paleo_models.py --model all --period all --variable all --overwrite + python process_paleo_models.py --model all --period lgm --delete-raw + python process_paleo_models.py --log-level DEBUG +""" + +import argparse +import logging +import sys +from datetime import date +from pathlib import Path + +import xarray as xr + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from utils import standardize_dims + +PALEO_DIR = Path(__file__).parent +RAW_MODELS = PALEO_DIR / "paleo_data_cache" / "raw" / "models" +PROC_MODELS = PALEO_DIR / "paleo_data_cache" / "processed" / "models" + +PROCESSING_DATE = date.today().isoformat() + +KNOWN_PERIODS = ["lgm", "lig127k", "midHolocene", "midPliocene-eoi400"] +KNOWN_VARIABLES = ["tas", "pr"] + + +# --------------------------------------------------------------------------- +# Core processing +# --------------------------------------------------------------------------- + + +def _discover_periods(model_dir: Path) -> list[str]: + """Return periods that have at least one raw file in model_dir.""" + periods = set() + for f in model_dir.glob("*.nc"): + parts = f.name.split("_") + # filename pattern: {var}_Amon_{model}_{period}_... + if len(parts) >= 4: + periods.add(parts[3]) + return sorted(periods) + + +def _process_one( + model: str, period: str, variable: str, overwrite: bool, delete_raw: bool = False +) -> bool: + """Process a single model/period/variable. Returns True if output was written.""" + model_raw = RAW_MODELS / model + pattern = f"{variable}_Amon_*_{period}_*.nc" + files = sorted(model_raw.glob(pattern)) + + if not files: + logging.warning( + f" [skip] {model} / {period} / {variable} — no raw files found" + ) + return False + + out_dir = PROC_MODELS / model + out_path = out_dir / f"{period}_{variable}_monthly_climo.nc" + + if out_path.exists() and not overwrite: + logging.info( + f" [skip] {out_path.relative_to(PALEO_DIR)} already exists (use --overwrite)" + ) + return False + + logging.info( + f" Processing {model} / {period} / {variable} ({len(files)} file(s))" + ) + + ds = xr.open_mfdataset( + files, + combine="nested", + concat_dim="time", + use_cftime=True, + data_vars="minimal", + coords="minimal", + compat="override", + ) + ds = ds.sortby("time") + ds = ds.sel(time=~ds.indexes["time"].duplicated()) + ds = standardize_dims(ds) + + climo = ds[[variable]].groupby("time.month").mean("time") + n_years = len(ds.time) // 12 + + climo.attrs = ds[variable].attrs + climo[variable].attrs = ds[variable].attrs + climo.attrs.update( + { + "model": model, + "period": period, + "variable": variable, + "n_years_averaged": n_years, + "source_files": ", ".join(f.name for f in files), + "processing_date": PROCESSING_DATE, + } + ) + + out_dir.mkdir(parents=True, exist_ok=True) + out_path.unlink(missing_ok=True) + climo.to_netcdf(out_path) + logging.info(f" Saved {out_path.relative_to(PALEO_DIR)}") + ds.close() + + if delete_raw: + for f in files: + f.unlink() + logging.debug(f" Deleted {f.relative_to(PALEO_DIR)}") + logging.info(f" Deleted {len(files)} raw file(s)") + + return True + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Process CMIP6 paleo model data into monthly climatologies." + ) + parser.add_argument( + "--model", + nargs="+", + default=["all"], + help="Model name(s) or 'all' to discover from raw/models/ subdirectories", + ) + parser.add_argument( + "--period", + nargs="+", + default=["all"], + choices=KNOWN_PERIODS + ["all"], + help="Period(s) or 'all'", + ) + parser.add_argument( + "--variable", + nargs="+", + default=["all"], + choices=KNOWN_VARIABLES + ["all"], + help="Variable(s) or 'all'", + ) + parser.add_argument( + "--overwrite", + action="store_true", + help="Reprocess even if output already exists", + ) + parser.add_argument( + "--delete-raw", + action="store_true", + help="Delete raw source files after successful processing", + ) + parser.add_argument( + "--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"] + ) + args = parser.parse_args() + + logging.basicConfig( + level=getattr(logging, args.log_level), format="%(levelname)s %(message)s" + ) + + # Resolve models + if "all" in args.model: + if not RAW_MODELS.exists(): + logging.error(f"Raw models directory not found: {RAW_MODELS}") + sys.exit(1) + models = sorted(d.name for d in RAW_MODELS.iterdir() if d.is_dir()) + else: + models = args.model + + # Resolve variables + variables = KNOWN_VARIABLES if "all" in args.variable else args.variable + + written = 0 + skipped = 0 + + for model in models: + model_dir = RAW_MODELS / model + if not model_dir.exists(): + logging.warning(f"[skip] {model} — directory not found in raw/models/") + continue + + # Resolve periods: either explicit list or discover from filenames + if "all" in args.period: + periods = _discover_periods(model_dir) + if not periods: + logging.warning(f"[skip] {model} — no raw NetCDF files found") + continue + else: + periods = args.period + + for period in periods: + for variable in variables: + ok = _process_one( + model, period, variable, args.overwrite, args.delete_raw + ) + if ok: + written += 1 + else: + skipped += 1 + + logging.info(f"Done — {written} file(s) written, {skipped} skipped.") + + +if __name__ == "__main__": + main() diff --git a/paleo_scripts/process_paleo_observations.py b/paleo_scripts/process_paleo_observations.py new file mode 100644 index 0000000..274fb68 --- /dev/null +++ b/paleo_scripts/process_paleo_observations.py @@ -0,0 +1,750 @@ +""" +Process raw paleoclimate observational data into standardized, period-sorted files. + +Output structure — all under paleo_data_cache/processed/observations/ + lgm/ + lgmDA_v2.1_tas.nc lgmDA LGM climatology + Holocene PI reference + anomaly + LGMR_SAT_tas.nc LGMR LGM surface air temperature anomaly (Osman et al. 2021) + LGMR_SST_tos.nc LGMR LGM sea surface temperature anomaly (Osman et al. 2021) + Bartlein2011_tas.nc Pollen-based LGM MAT anomaly (Bartlein et al. 2011) + Bartlein2011_pr.nc Pollen-based LGM MAP anomaly (Bartlein et al. 2011) + midHolocene/ + Bartlein2011_tas.nc Pollen-based mid-Holocene MAT anomaly + Bartlein2011_pr.nc Pollen-based mid-Holocene MAP anomaly + Temp12k_tas.nc Holocene temperature reconstruction (Kaufman et al. 2020) + lig127k/ + OttoBliesner2021_tas.nc LIG proxy temperature anomalies (Otto-Bliesner et al. 2021) + Scussolini2019_pr.nc LIG boreal precipitation proxy (Scussolini et al. 2019) + multi_period/ + ipcc_ar6_fig7_19.csv Global mean temperature anomalies (IPCC AR6) + tierney2020_global_tas.csv Deep-time global mean temperature (Tierney et al. 2020) + +Each NetCDF carries global attributes: source, doi, source_url, variable, units, period, +anomaly_ref (where applicable), and processing_date. + +Variable naming conventions (matched to paleo_benchmark.py): + tas, tas_std, tas_sig_val surface air temperature, uncertainty, significance flag + pr, pr_std, pr_sig_val precipitation anomaly, uncertainty, significance flag + pr_reliability semi-quantitative reliability score (Scussolini only) + pi_tas pre-industrial (Holocene) monthly tas (lgmDA only) + tos, tos_std sea surface temperature, uncertainty + +Usage: + python process_paleo_observations.py + python process_paleo_observations.py --source lgmda bartlein2011 + python process_paleo_observations.py --source all --log-level DEBUG + python process_paleo_observations.py --delete-raw +""" + +import argparse +import logging +import shutil +import sys +import tempfile +import zipfile +from datetime import date +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +import xarray as xr + +PALEO_DIR = Path(__file__).parent +RAW_DIR = PALEO_DIR / "paleo_data_cache" / "raw" / "observations" +OBS_PROC = PALEO_DIR / "paleo_data_cache" / "processed" / "observations" + +PROCESSING_DATE = date.today().isoformat() + +# LGM age window (years BP) used to average the LGMR reanalysis +LGM_AGE_MIN = 19_000 +LGM_AGE_MAX = 23_000 + + +# --------------------------------------------------------------------------- +# Metadata / IO helpers +# --------------------------------------------------------------------------- + + +def _write_nc(ds: xr.Dataset, path: Path, attrs: dict) -> None: + """Write dataset to NetCDF with standardised global attributes.""" + attrs.setdefault("processing_date", PROCESSING_DATE) + ds.attrs = attrs + path.parent.mkdir(parents=True, exist_ok=True) + path.unlink(missing_ok=True) + ds.to_netcdf(path) + logging.info(f" Saved {path.relative_to(PALEO_DIR)}") + + +def _write_csv(df: pd.DataFrame, path: Path, comment_lines: list[str]) -> None: + """Write CSV with comment-header lines documenting provenance.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + for line in comment_lines: + f.write(f"# {line}\n") + df.to_csv(f, index=False) + logging.info(f" Saved {path.relative_to(PALEO_DIR)} ({len(df)} rows)") + + +# --------------------------------------------------------------------------- +# IPCC AR6 Figure 7.19 +# --------------------------------------------------------------------------- + + +def _process_ipcc_ar6(raw: Path, proc: Path) -> None: + """Multi-period global mean temperature anomalies → multi_period/ipcc_ar6_fig7_19.csv""" + p = raw / "Figure7_19_obs.csv" + if not p.exists(): + logging.warning(" [skip] ipcc_ar6 — Figure7_19_obs.csv not found") + return + + df = pd.read_csv(p, header=2) + df.columns = ["time_period", "tas_min_anom", "tas_anom", "tas_max_anom"] + df["units"] = "K" + + _write_csv( + df, + proc / "multi_period" / "ipcc_ar6_fig7_19.csv", + [ + "source: IPCC AR6 Figure 7.19", + "source_url: https://dap.ceda.ac.uk/badc/ar6_wg1/data/ch_07/ch7_fig19/", + "variable: tas anomaly relative to pre-industrial", + "units: K", + "periods: Eocene, Pliocene, LGM, and others", + f"processing_date: {PROCESSING_DATE}", + ], + ) + + +# --------------------------------------------------------------------------- +# Tierney 2020 deep-time reconstruction +# --------------------------------------------------------------------------- + + +def _process_tierney2020(raw: Path, proc: Path) -> None: + """Deep-time global mean TAS timeseries → multi_period/tierney2020_global_tas.csv""" + p = raw / "THansenMethod.csv" + if not p.exists(): + logging.warning(" [skip] tierney2020 — THansenMethod.csv not found") + return + + df = pd.read_csv(p) + df.columns = ["age_Ma", "tas_degC"] + df["units"] = "degC" + + _write_csv( + df, + proc / "multi_period" / "tierney2020_global_tas.csv", + [ + "source: Tierney et al. (2020) Hansen-method deep-time reconstruction", + "source_url: https://github.com/jesstierney/PastClimates", + "variable: global mean surface temperature", + "units: degC (relative to pre-industrial)", + "age_Ma: millions of years before present", + f"processing_date: {PROCESSING_DATE}", + ], + ) + + +# --------------------------------------------------------------------------- +# lgmDA — LGM data assimilation (Tierney et al. 2020) +# --------------------------------------------------------------------------- + + +def _process_lgmda(raw: Path, proc: Path) -> None: + """lgmDA → lgm/lgmDA_v2.1_tas.nc with pi_tas, tas (anomaly), tas_std. + + Also writes multi_period/lgmDA_v2.1_holocene_tas.nc as a standalone + Holocene PI reference for use in anomaly computation across all periods. + """ + path_hol = raw / "lgmDA_hol_ATM_monthly_climo.nc" + path_lgm = raw / "lgmDA_lgm_ATM_monthly_climo.nc" + if not path_hol.exists() or not path_lgm.exists(): + logging.warning(" [skip] lgmda — raw files not found") + return + + def _load_lgmda(path: Path) -> xr.Dataset: + ds = xr.open_dataset(path).load() + return ( + ds.swap_dims({"nmonth": "nMonth"}) + .set_index({"nLat": "lat", "nLon": "lon", "nMonth": "month"}) + .rename({"nLat": "lat", "nLon": "lon", "nMonth": "month"}) + ) + + ds_hol = _load_lgmda(path_hol) + ds_lgm = _load_lgmda(path_lgm) + + # --- lgm/lgmDA_v2.1_tas.nc --- + # pi_tas: Holocene monthly climatology (absolute) + # tas: LGM − Holocene monthly anomaly + # tas_std: LGM posterior standard deviation + lgm_ds = xr.Dataset( + { + "pi_tas": ds_hol["tas"], # Holocene monthly clim + "tas": ds_lgm["tas"] - ds_hol["tas"], # LGM anomaly + "tas_std": ds_lgm["tas_std"], # LGM uncertainty + } + ) + _write_nc( + lgm_ds, + proc / "lgm" / "lgmDA_v2.1_tas.nc", + { + "source": "Tierney et al. (2020)", + "doi": "10.1038/s41586-020-2617-x", + "source_url": "https://github.com/jesstierney/lgmDA", + "variable": "tas", + "units": "K", + "period": "lgm", + "anomaly_ref": "Holocene (lgmDA v2.0)", + "pi_tas_description": "Holocene (PI) monthly mean surface air temperature (absolute)", + "tas_description": "LGM − Holocene monthly surface air temperature anomaly", + "tas_std_description": "LGM posterior 1-sigma uncertainty", + }, + ) + + # --- multi_period/lgmDA_v2.1_holocene_tas.nc --- + # Standalone Holocene reference file used as PI baseline for all periods + hol_ds = ds_hol[["tas", "tas_std"]].rename( + {"tas": "pi_tas", "tas_std": "pi_tas_std"} + ) + _write_nc( + hol_ds, + proc / "multi_period" / "lgmDA_v2.1_holocene_tas.nc", + { + "source": "Tierney et al. (2020)", + "doi": "10.1038/s41586-020-2617-x", + "source_url": "https://github.com/jesstierney/lgmDA", + "variable": "tas", + "units": "K", + "period": "Holocene (PI reference)", + "description": "Holocene monthly mean surface air temperature — PI reference for anomaly computation", + }, + ) + + +# --------------------------------------------------------------------------- +# LGMR SAT (Osman et al. 2021) +# --------------------------------------------------------------------------- + + +def _process_lgmr_sat(raw: Path, proc: Path) -> None: + """LGMR SAT → lgm/LGMR_SAT_tas.nc (LGM-window mean, lat/lon grid).""" + p = raw / "osman2021" / "LGMR_SAT_climo.nc" + if not p.exists(): + logging.warning(" [skip] lgmr_sat — LGMR_SAT_climo.nc not found") + return + + ds = xr.open_dataset(p).load() + + # Average over the LGM age window + lgm_mask = (ds.age >= LGM_AGE_MIN) & (ds.age <= LGM_AGE_MAX) + ds_lgm = ds.sel(age=lgm_mask).mean(dim="age") + n_ages = int(lgm_mask.sum()) + logging.info( + f" LGMR SAT: averaged over {n_ages} age slices ({LGM_AGE_MIN}–{LGM_AGE_MAX} BP)" + ) + + out_ds = xr.Dataset({"tas": ds_lgm["sat"], "tas_std": ds_lgm["sat_std"]}) + _write_nc( + out_ds, + proc / "lgm" / "LGMR_SAT_tas.nc", + { + "source": "Osman et al. (2021)", + "doi": "10.1038/s41586-021-03984-4", + "source_url": "https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/osman2021/", + "variable": "tas", + "units": "degC", + "period": "lgm", + "anomaly_ref": "modern (LGMR reanalysis internal reference)", + "lgm_age_window_BP": f"{LGM_AGE_MIN}–{LGM_AGE_MAX}", + }, + ) + + +# --------------------------------------------------------------------------- +# LGMR SST (Osman et al. 2021) +# --------------------------------------------------------------------------- + + +def _process_lgmr_sst(raw: Path, proc: Path) -> None: + """LGMR SST → lgm/LGMR_SST_tos.nc (2D curvilinear lat/lon grid).""" + p = raw / "osman2021" / "LGMR_SST_climo.nc" + if not p.exists(): + logging.warning(" [skip] lgmr_sst — LGMR_SST_climo.nc not found") + return + + ds = xr.open_dataset(p).load() + + # Average over the LGM age window + lgm_mask = (ds.age >= LGM_AGE_MIN) & (ds.age <= LGM_AGE_MAX) + ds_lgm = ds.sel(age=lgm_mask).mean(dim="age") + + # The SST grid has 2D lat/lon; promote them to proper coordinates + lat_2d = ds_lgm["lat"].values + lon_2d = ds_lgm["lon"].values + ds_lgm = ds_lgm.drop_vars(["lat", "lon"]) + ds_lgm = ds_lgm.rename({"lat": "y", "lon": "x"}) + ny, nx = lat_2d.shape + ds_lgm = ds_lgm.assign_coords(y=np.arange(ny), x=np.arange(nx)) + ds_lgm = ds_lgm.assign_coords( + lat=xr.DataArray(lat_2d, dims=["y", "x"]), + lon=xr.DataArray(lon_2d, dims=["y", "x"]), + ) + if "nEns" in ds_lgm: + ds_lgm = ds_lgm.drop_vars("nEns") + + # Roll x so longitude starts near 0° (raw grid starts at ~320°) + roll_by = int(np.argmin(ds_lgm["lon"].values[ny // 2, :])) + ds_lgm = ds_lgm.roll(x=-roll_by, roll_coords=False) + ds_lgm = ds_lgm.assign_coords(x=np.arange(nx)) + + out_ds = xr.Dataset({"tos": ds_lgm["sst"], "tos_std": ds_lgm["sst_std"]}) + _write_nc( + out_ds, + proc / "lgm" / "LGMR_SST_tos.nc", + { + "source": "Osman et al. (2021)", + "doi": "10.1038/s41586-021-03984-4", + "source_url": "https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/osman2021/", + "variable": "tos", + "units": "degC", + "period": "lgm", + "anomaly_ref": "modern (LGMR reanalysis internal reference)", + "grid": "curvilinear 2D lat/lon (y, x dimensions)", + "lgm_age_window_BP": f"{LGM_AGE_MIN}–{LGM_AGE_MAX}", + }, + ) + + +# --------------------------------------------------------------------------- +# Bartlein et al. 2011 +# --------------------------------------------------------------------------- + +_BARTLEIN_PERIOD_MAP = { + "06ka": ("midHolocene", "MIDH"), + "21ka": ("lgm", "LGM"), +} + + +def _extract_bartlein_zip(raw: Path) -> Optional[Path]: + """Extract the Bartlein zip to a temp-like subdirectory; return path or None.""" + zp = raw / "bartlein2011_pollen_climate_recon.zip" + if not zp.exists(): + logging.warning(" [skip] bartlein2011 — zip not found") + return None + dest = raw / "bartlein2011" + if dest.exists(): + # Already extracted + return dest + dest.mkdir() + with zipfile.ZipFile(zp) as zf: + zf.extractall(dest) + return dest + + +def _process_bartlein2011(raw: Path, proc: Path) -> None: + """Bartlein → lgm/Bartlein2011_{tas,pr}.nc and midHolocene/Bartlein2011_{tas,pr}.nc""" + bart_dir = _extract_bartlein_zip(raw) + if bart_dir is None: + return + + per_period: dict[str, dict[str, xr.Dataset]] = {} + + for ka, (period_dir, period_label) in _BARTLEIN_PERIOD_MAP.items(): + mat_file = bart_dir / f"mat_delta_{ka}_ALL_grid_2x2.nc" + map_file = bart_dir / f"map_delta_{ka}_ALL_grid_2x2.nc" + + if not mat_file.exists() or not map_file.exists(): + logging.warning(f" [skip] bartlein2011 {ka} — NC files not found in zip") + continue + + ds_mat = xr.open_dataset(mat_file) + ds_map = xr.open_dataset(map_file) + + tas_ds = xr.Dataset( + { + "tas": ds_mat["mat_anm_mean"], + "tas_std": ds_mat["mat_se_mean"], + "tas_sig_val": ds_mat["mat_sig"], + } + ) + pr_ds = xr.Dataset( + { + "pr": ds_map["map_anm_mean"], + "pr_std": ds_map["map_se_mean"], + "pr_sig_val": ds_map["map_sig"], + } + ) + + common_attrs = { + "source": "Bartlein et al. (2011)", + "doi": "10.1007/s00382-010-0904-1", + "source_url": "https://static-content.springer.com/esm/art%3A10.1007%2Fs00382-010-0904-1/", + "period": period_dir, + "processing_date": PROCESSING_DATE, + } + + _write_nc( + tas_ds, + proc / period_dir / "Bartlein2011_tas.nc", + { + **common_attrs, + "variable": "tas", + "units": "K (anomaly relative to pre-industrial)", + "tas_description": "Mean Annual Temperature anomaly (mat_anm_mean)", + "tas_std_description": "Standard error of MAT anomaly (mat_se_mean)", + "tas_sig_val_description": "Significance flag: non-zero = significant", + }, + ) + _write_nc( + pr_ds, + proc / period_dir / "Bartlein2011_pr.nc", + { + **common_attrs, + "variable": "pr", + "units": "mm/yr (anomaly relative to pre-industrial)", + "pr_description": "Mean Annual Precipitation anomaly (map_anm_mean)", + "pr_std_description": "Standard error of MAP anomaly (map_se_mean)", + "pr_sig_val_description": "Significance flag: non-zero = significant", + }, + ) + + +# --------------------------------------------------------------------------- +# Temp12k (Kaufman et al. 2020) +# --------------------------------------------------------------------------- + + +def _process_temp12k(raw: Path, proc: Path) -> None: + """Temp12k → midHolocene/Temp12k_tas.nc (latitudinal band reconstructions).""" + p = raw / "temp12k_alldata.nc" + if not p.exists(): + logging.warning(" [skip] temp12k — temp12k_alldata.nc not found") + return + + ds = xr.open_dataset(p).load() + + ds_all = ds.set_coords(["age", "latband_ranges"]).swap_dims( + {"latbands": "latband_ranges"} + ) + ds_latbnds = ds_all.drop_vars( + [ + "scc_globalmean", + "dcc_globalmean", + "gam_globalmean", + "cps_globalmean", + "pai_globalmean", + ] + ) + ds_glob = ds_all[ + [ + "scc_globalmean", + "dcc_globalmean", + "gam_globalmean", + "cps_globalmean", + "pai_globalmean", + ] + ] + ds_glob = ds_glob.expand_dims({"latband_ranges": ["90S_to_90N"]}).rename( + { + "scc_globalmean": "scc_latbands", + "dcc_globalmean": "dcc_latbands", + "gam_globalmean": "gam_latbands", + "cps_globalmean": "cps_latbands", + "pai_globalmean": "pai_latbands", + } + ) + ds_all = xr.concat([ds_latbnds, ds_glob], dim="latband_ranges") + + dataset_list = [] + for var in ds_all.data_vars: + if var != "latband_weights": + ds_temp = ds_all[var].expand_dims( + {"reconstruct_method": [var.split("_")[0]]} + ) + dataset_list.append(ds_temp.to_dataset(name="tas_anom")) + + ds_new = xr.concat(dataset_list, dim="reconstruct_method") + ds_new = xr.merge([ds_new, ds_all[["latband_weights"]]]) + + _write_nc( + ds_new, + proc / "midHolocene" / "Temp12k_tas.nc", + { + "source": "Kaufman et al. (2020)", + "doi": "10.1038/s41597-020-0530-7", + "source_url": "https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/kaufman2020/", + "variable": "tas", + "units": "K (anomaly relative to pre-industrial)", + "period": "midHolocene", + "description": "Holocene latitudinal band surface temperature reconstructions (5 methods)", + "methods": "scc, dcc, gam, cps, pai", + }, + ) + + +# --------------------------------------------------------------------------- +# Otto-Bliesner et al. 2021 (LIG127k) +# --------------------------------------------------------------------------- + + +def _process_ottobliesner2021(raw: Path, proc: Path) -> None: + """Otto-Bliesner → lig127k/OttoBliesner2021_tas.nc (site-dimension NetCDF).""" + lig_dir = raw / "lig127k" + tables = [ + "Table S2. Annual - NH Oceans, Europe, and Greenland (40-90N)_CP-2019-174.xlsx", + "Table S3. Annual - Low latitudes (40S-40N)_CP-2019-174.xlsx", + "Table S4. Annual - SH Oceans and Antarctica (40-90S)_CP-2019-174.xlsx", + ] + columns_needed = ["Latitude", "Longitude", "Anom-1SD", "Anom", "Anom+1SD"] + + frames = [] + for t in tables: + p = lig_dir / t + if p.exists(): + df = pd.read_excel(p, header=2)[columns_needed] + frames.append(df) + else: + logging.warning(f" Otto-Bliesner: {t} not found") + + if not frames: + logging.warning(" [skip] ottobliesner2021 — no table files found") + return + + df = pd.concat(frames, ignore_index=True).dropna(subset=["Anom"]) + n = len(df) + + # 1-sigma = average of upper and lower 1SD bounds + tas = df["Anom"].values.astype(float) + tas_std = ((df["Anom+1SD"] - df["Anom-1SD"]) / 2.0).values.astype(float) + + ds = xr.Dataset( + { + "tas": xr.DataArray(tas, dims=["site"]), + "tas_std": xr.DataArray(tas_std, dims=["site"]), + }, + coords={ + "lat": xr.DataArray(df["Latitude"].values.astype(float), dims=["site"]), + "lon": xr.DataArray(df["Longitude"].values.astype(float), dims=["site"]), + }, + ) + _write_nc( + ds, + proc / "lig127k" / "OttoBliesner2021_tas.nc", + { + "source": "Otto-Bliesner et al. (2021)", + "doi": "10.5194/cp-17-63-2021", + "source_url": "https://cp.copernicus.org/articles/17/63/2021/", + "variable": "tas", + "units": "K (anomaly relative to pre-industrial)", + "period": "lig127k", + "n_sites": n, + "tas_description": "Annual mean temperature anomaly (Anom column)", + "tas_std_description": "1-sigma = (Anom+1SD − Anom−1SD) / 2", + "tables_used": "S2 (NH), S3 (Tropics), S4 (SH)", + }, + ) + + +# --------------------------------------------------------------------------- +# Scussolini et al. 2019 (LIG precipitation) +# --------------------------------------------------------------------------- + + +def _process_scussolini2019(raw: Path, proc: Path) -> None: + """Scussolini → lig127k/Scussolini2019_pr.nc (site-dimension NetCDF).""" + p = raw / "scussolini2019_lig_precip_proxy.xlsx" + if not p.exists(): + logging.warning( + " [skip] scussolini2019 — file not found. " + "Run download_paleo_observations.py --dataset scussolini2019 and follow instructions." + ) + return + + df = pd.read_excel(p, sheet_name="Proxy_Database", header=0) + + lat_col = "LatºN" + lon_col = "LonºE" + pr_col = "Quantitative signal of ΔP (mm)" + rel_col = "Reliability score" + + # Keep only rows with valid lat/lon + df = df.dropna(subset=[lat_col, lon_col]) + n_total = len(df) + + # pr_col may be missing/NaN for sites without quantitative estimates + if pr_col not in df.columns: + logging.warning(" [skip] scussolini2019 — quantitative ΔP column not found") + return + + pr = df[pr_col].values.astype(float) + reliability = ( + df[rel_col].values.astype(float) if rel_col in df.columns else np.ones(n_total) + ) + + ds = xr.Dataset( + { + "pr": xr.DataArray(pr, dims=["site"]), + "pr_reliability": xr.DataArray(reliability, dims=["site"]), + }, + coords={ + "lat": xr.DataArray(df[lat_col].values.astype(float), dims=["site"]), + "lon": xr.DataArray(df[lon_col].values.astype(float), dims=["site"]), + }, + ) + n_quant = int(np.isfinite(pr).sum()) + _write_nc( + ds, + proc / "lig127k" / "Scussolini2019_pr.nc", + { + "source": "Scussolini et al. (2019)", + "doi": "10.1126/sciadv.aax7047", + "source_url": "https://www.science.org/doi/10.1126/sciadv.aax7047", + "variable": "pr", + "units": "mm (annual precipitation anomaly relative to present)", + "period": "lig127k", + "n_sites_total": n_total, + "n_sites_quantitative": n_quant, + "pr_description": "Quantitative annual precipitation anomaly (ΔP mm); NaN = qualitative only", + "pr_reliability_description": ( + "Reliability score (0–2): 0=low, 1=moderate, 2=high. " + "Benchmark uses σ=300 mm/yr for score 1, σ=150 mm/yr for score ≥2" + ), + }, + ) + + +# --------------------------------------------------------------------------- +# Source registry +# --------------------------------------------------------------------------- + +SOURCE_REGISTRY: dict[str, tuple[str, callable]] = { + "ipcc_ar6": ( + "IPCC AR6 Fig 7.19 multi-period global mean anomalies", + _process_ipcc_ar6, + ), + "tierney2020": ( + "Tierney et al. 2020 deep-time global mean TAS", + _process_tierney2020, + ), + "lgmda": ( + "lgmDA v2.1 — LGM data assimilation (Tierney et al. 2020)", + _process_lgmda, + ), + "lgmr_sat": ( + "LGMR SAT — LGM surface air temp (Osman et al. 2021)", + _process_lgmr_sat, + ), + "lgmr_sst": ( + "LGMR SST — LGM sea surface temp (Osman et al. 2021)", + _process_lgmr_sst, + ), + "bartlein2011": ( + "Bartlein et al. 2011 pollen-based LGM/mid-Hol recon", + _process_bartlein2011, + ), + "temp12k": ( + "Temp12k — Holocene lat-band reconstruction (Kaufman 2020)", + _process_temp12k, + ), + "ottobliesner2021": ( + "Otto-Bliesner et al. 2021 LIG127k proxy temperatures", + _process_ottobliesner2021, + ), + "scussolini2019": ( + "Scussolini et al. 2019 LIG boreal precipitation proxy", + _process_scussolini2019, + ), +} + + +# --------------------------------------------------------------------------- +# Orchestration +# --------------------------------------------------------------------------- + + +def process_observations( + source_names: list[str], + delete_raw: bool = False, +) -> None: + OBS_PROC.mkdir(parents=True, exist_ok=True) + for period_dir in ("lgm", "midHolocene", "lig127k", "multi_period"): + (OBS_PROC / period_dir).mkdir(exist_ok=True) + + for name in source_names: + description, fn = SOURCE_REGISTRY[name] + logging.info(f"\n[{name}] {description}") + try: + fn(RAW_DIR, OBS_PROC) + except Exception as exc: + logging.error(f" Error processing {name}: {exc}", exc_info=True) + + if delete_raw: + shutil.rmtree(RAW_DIR) + logging.info(f" Deleted {RAW_DIR}") + + +def setup_logging(log_level: str = "INFO", log_file: Optional[str] = None) -> None: + handlers: list[logging.Handler] = [logging.StreamHandler(sys.stdout)] + if log_file: + handlers.append(logging.FileHandler(log_file)) + logging.basicConfig( + level=getattr(logging, log_level.upper()), + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=handlers, + force=True, + ) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Process raw paleoclimate observational data into standardized period-sorted files.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "--source", + nargs="+", + default=["all"], + metavar="NAME", + help=( + "One or more source names to process, or 'all' (default). " + "Choices: " + ", ".join(SOURCE_REGISTRY) + ), + ) + parser.add_argument( + "--log-level", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + default="INFO", + ) + parser.add_argument("--log-file", type=str) + parser.add_argument( + "--delete-raw", + action="store_true", + help="Delete the raw/observations directory after processing.", + ) + args = parser.parse_args() + setup_logging(args.log_level, args.log_file) + + # Resolve source list + seen: dict[str, None] = {} + for token in args.source: + if token == "all": + for key in SOURCE_REGISTRY: + seen[key] = None + elif token in SOURCE_REGISTRY: + seen[token] = None + else: + logging.error( + f"Unknown source '{token}'. Choices: {', '.join(SOURCE_REGISTRY)}" + ) + sys.exit(1) + source_names = list(seen) + + logging.info(f"\n{'='*60}\n Processing paleoclimate observations\n{'='*60}") + logging.info(f" Sources: {source_names}") + process_observations(source_names, delete_raw=args.delete_raw) + + +if __name__ == "__main__": + main()