Skip to content

Commit 48f909c

Browse files
author
hvalfangst
committed
Implemented one HTTP-triggered and one Blob-triggered Azure Functions for purpose of ETL using Pandas
1 parent 299ae0e commit 48f909c

File tree

3 files changed

+87
-0
lines changed

3 files changed

+87
-0
lines changed

function_app.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import logging
2+
import json
3+
import pandas as pd
4+
import azure.functions as func
5+
from io import StringIO
6+
from sklearn.preprocessing import LabelEncoder
7+
8+
# Decree and declare our project as an Azure Function App subsidiary
9+
app = func.FunctionApp()
10+
11+
# Configure logging
12+
logging.basicConfig(level=logging.DEBUG)
13+
logger = logging.getLogger(__name__)
14+
15+
16+
@app.blob_trigger(arg_name="inbound", path="hvalfangstcontainer/in/input.csv", connection="")
17+
@app.blob_output(arg_name="outbound", path="hvalfangstcontainer/out/statistics.json", connection="")
18+
def blob_trigger(inbound: func.InputStream, outbound: func.Out[str]):
19+
try:
20+
# Read CSV content from the blob
21+
csv_content = inbound.read().decode("utf-8")
22+
23+
# Convert CSV content to a pandas DataFrame
24+
df = pd.read_csv(StringIO(csv_content))
25+
26+
# Label encode 'Gender' and 'State' columns
27+
label_encoder = LabelEncoder()
28+
df['Gender'] = label_encoder.fit_transform(df['Gender'])
29+
df['State'] = label_encoder.fit_transform(df['State'])
30+
31+
# Calculate correlations
32+
gender_to_income_corr = df[['Gender', 'Income']].corr().iloc[0, 1]
33+
experience_to_income_corr = df[['Experience', 'Income']].corr().iloc[0, 1]
34+
state_to_income_corr = df[['State', 'Income']].corr().iloc[0, 1]
35+
36+
# Create statistics dictionary
37+
statistics = {
38+
"gender_to_income_corr": gender_to_income_corr,
39+
"experience_to_income_corr": experience_to_income_corr,
40+
"state_to_income_corr": state_to_income_corr
41+
}
42+
43+
# Convert statistics to JSON format
44+
statistics_json = json.dumps(statistics, indent=2)
45+
46+
# Upload statistics JSON file to storage account container blob
47+
outbound.set(statistics_json)
48+
logging.info("- - - - - |File 'statistics.json' was uploaded| - - - - - ")
49+
50+
except Exception as e:
51+
logging.error(f"An error occurred: {str(e)}")
52+
return f"Error: {str(e)}"
53+
54+
55+
@app.route(route="upload_csv", auth_level=func.AuthLevel.ANONYMOUS)
56+
@app.blob_output(arg_name="outbound", path="hvalfangstcontainer/in/input.csv", connection="")
57+
def upload_csv(req: func.HttpRequest, outbound: func.Out[str]) -> str:
58+
try:
59+
# Parse raw bytes derived from request body to string
60+
string_body = req.get_body().decode("utf-8")
61+
62+
# Upload parsed string body, which conforms to CSV format
63+
outbound.set(string_body)
64+
logging.info("- - - - - |Successfully uploaded CSV content| - - - - - ")
65+
return "Successfully uploaded CSV content"
66+
67+
except Exception as e:
68+
logging.error(f"An error occurred: {str(e)}")
69+
return f"Error: {str(e)}"

host.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "2.0",
3+
"logging": {
4+
"applicationInsights": {
5+
"samplingSettings": {
6+
"isEnabled": true,
7+
"excludedTypes": "Request"
8+
}
9+
}
10+
},
11+
"extensionBundle": {
12+
"id": "Microsoft.Azure.Functions.ExtensionBundle",
13+
"version": "[3.*, 4.0.0)"
14+
}
15+
}

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
azure-functions==1.17.0
2+
pandas~=2.1.4
3+
scikit-learn~=1.3.2

0 commit comments

Comments
 (0)