-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1. python code for entropy weight method(EWM) analysis (6).txt
More file actions
105 lines (86 loc) · 3.59 KB
/
1. python code for entropy weight method(EWM) analysis (6).txt
File metadata and controls
105 lines (86 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from __future__ import annotations
from pathlib import Path
import numpy as np
import pandas as pd
# ======================
# Configuration
# ======================
DATA_PATH = r"C:\python\spatial\data1_spatial(1).xlsx"
SHEET_NAME = 0
CITY_COL = "city_code"
YEAR_COL = "year"
# Indicators grouped by subsystem
SUBSYSTEMS = {
"transport": ["T1", "T2"],
"population": ["Ln(P1)", "P2", "P3"],
"industry": ["I1", "I2"],
}
OUTPUT_FILENAME = "subsystem_indices_all_cities.csv"
# ======================
# Functions
# ======================
def minmax_norm(x: pd.Series) -> pd.Series:
"""Normalizes indicators to a 0-1 scale based on the annual range[cite: 2, 9]."""
x = x.astype(float)
mn, mx = np.nanmin(x.values), np.nanmax(x.values)
if np.isclose(mx, mn):
return pd.Series(np.zeros(len(x)), index=x.index)
return (x - mn) / (mx - mn)
def entropy_weights(Z: pd.DataFrame, eps: float = 1e-12) -> pd.Series:
"""Calculates weights based on information entropy for each year[cite: 4]."""
n, m = Z.shape
if n <= 1:
return pd.Series(np.ones(m) / m, index=Z.columns)
col_sums = Z.sum(axis=0).replace(0.0, np.nan)
P = Z.div(col_sums, axis=1).fillna(0.0)
k = 1.0 / np.log(n)
P_safe = P.clip(lower=eps)
e = -k * (P * np.log(P_safe)).sum(axis=0)
d = (1.0 - e).clip(lower=0.0)
if np.isclose(d.sum(), 0.0):
return pd.Series(np.ones(m) / m, index=Z.columns)
return d / d.sum()
# ======================
# Main Processing
# ======================
def main():
try:
# Load data [cite: 2]
df = pd.read_excel(DATA_PATH, sheet_name=SHEET_NAME)
df.columns = df.columns.astype(str).str.strip()
if YEAR_COL not in df.columns:
print(f"Error: '{YEAR_COL}' not found.")
return
df[YEAR_COL] = df[YEAR_COL].astype(int)
yearly_results = []
# Process every year and every city in that year [cite: 7]
for year, group in df.groupby(YEAR_COL):
g = group.copy()
# 1. Impute missing values with yearly median [cite: 8]
all_cols = [c for cols in SUBSYSTEMS.values() for c in cols]
Z = pd.DataFrame(index=g.index)
for col in all_cols:
if col in g.columns:
g[col] = g[col].fillna(g[col].median())
# 2. Annual Normalization [cite: 9]
Z[col] = minmax_norm(g[col])
# 3. Calculate subsystem indices (U) [cite: 1]
for sys_name, cols in SUBSYSTEMS.items():
existing_cols = [c for c in cols if c in Z.columns]
if existing_cols:
w = entropy_weights(Z[existing_cols])
g[f"U_{sys_name}"] = (Z[existing_cols] * w).sum(axis=1)
out_cols = [CITY_COL, YEAR_COL] + [f"U_{s}" for s in SUBSYSTEMS.keys() if f"U_{s}" in g.columns]
yearly_results.append(g[out_cols])
# Combine and Save
final_df = pd.concat(yearly_results, ignore_index=True)
out_path = Path(DATA_PATH).parent / OUTPUT_FILENAME
final_df.to_csv(out_path, index=False, encoding="utf-8-sig")
print(f"Success! Processed {len(final_df)} rows (all years/cities).")
print(f"Results saved to: {out_path}")
# Use .to_string() to verify all 10 cities in the console if desired
print(final_df.head(10))
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()