-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQL_Aplied3.sql
More file actions
252 lines (216 loc) · 7.89 KB
/
SQL_Aplied3.sql
File metadata and controls
252 lines (216 loc) · 7.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# 3주차 수업 정리
USE classicmodels;
select * from orders;
# 1. 2주차 때 배운 날짜 함수 활용 (with 물류 관련 쿼리)
# 물류 관련 마트 -> 당일 배송, 로켓 배송, 새벽 배송 등등의 배송들
# 각 배송들의 실제 성과를 비교하자
# "배송일을 예측하는 게 수익에 큰 영향을 미쳐"
## 1-1. 월 별로 비교 분석(2주차 코드)
select
date_format(o.orderDate, '%Y-%m') as ym,
sum(case when order_total >= 50000 then 1 else 0 end) as cnt_high,
sum(case when order_total <= 10000 and order_total >= 3000 then 1 else 0 end) as cnt_medium,
sum(case when order_total < 3000 then 1 else 0 end)as cnt_low
from
orders as o
join
(
select
orderNumber,
sum(priceEach * quantityOrdered) as order_total
from
orderdetails
group by
orderNumber
) as oa
on oa.orderNumber=o.orderNumber
group by date_format(o.orderDate, '%Y-%m')
order by date_format(o.orderDate, '%Y-%m');
## 1-2. 아래의 표처럼 pivoting 하 (날짜 별로)
## 2003-01 / 2003-02 / 2002-03 ... 2003-12
## D
## D+1
## D+2
## D+3
## ...
## D+7
### th1. 월별로 집계가 필요하다.
### date_format(order_date, '%Y-%m') as ym
### th2. 일별 배송 딜레이 계산해야
### datediff(shippedDate, orderDate) as delay_days
#### th1~2 까지의 테이블 구조
#### ym delay_days
#### 2003-02 1
### th3. delay_days를 0이면 D, 1이면 D+1 ... D+7
### case when datediff(shippedDate, orderDate) <=0 then 'D'
#### th1~3 까지의 테이블 구조
#### ym / delay_buket
#### 2003-03 D
### th4. "원하는 인스턴스 값"(예를 들어, 2003-03에서 D가 몇 개?")이 없음
### -> COUNT() 하자
### group by ym, delay_bucket
### th1~4 까지의 테이블 구조
#### ym dealy_bucket cnt
#### 2003-01 D+1 10
#### 2003-01 D+2 5
#### 2003-02 D+1 7
### th5. ym이 컬럼에, bucket이 행에 들어가야 해
### -> PIVOT 하자 (위의 테이블 구조에서 가로 세로가 바뀌어야해)
### sum(case when)
## ym, D/D+1/.../D+7, pivot 필요하다
### 1) ym, D/D+1/.../D+7
select
date_format(orderDate, '%Y-%m') as ym,
case
when datediff(shippedDate, orderDate) <=0 then 'D'
when datediff(shippedDate, orderDate) =1 then 'D+1'
when datediff(shippedDate, orderDate) =2 then 'D+2'
when datediff(shippedDate, orderDate) =3 then 'D+3'
when datediff(shippedDate, orderDate) =4 then 'D+4'
when datediff(shippedDate, orderDate) =5 then 'D+5'
when datediff(shippedDate, orderDate) =6 then 'D+6'
when datediff(shippedDate, orderDate) =7 then 'D+7'
else 'D++'
end as delay_bucket
from
orders;
### 2) cnt : group by 하구 count 하자
#### 표준 SQL과 달리 MySQL는 GROUP BY에서 SELECT 별칭(alias)을 허용해
select
date_format(orderDate, '%Y-%m') as ym,
case
when datediff(shippedDate, orderDate) <=0 then 'D'
when datediff(shippedDate, orderDate) =1 then 'D+1'
when datediff(shippedDate, orderDate) =2 then 'D+2'
when datediff(shippedDate, orderDate) =3 then 'D+3'
when datediff(shippedDate, orderDate) =4 then 'D+4'
when datediff(shippedDate, orderDate) =5 then 'D+5'
when datediff(shippedDate, orderDate) =6 then 'D+6'
when datediff(shippedDate, orderDate) =7 then 'D+7'
else 'D++'
end as delay_bucket,
count(*) as cnt
from
orders
group by ym, delay_bucket;
#### GROUP BY에서 AS ym, AS delay_bucket처럼 별칭을 “정의”하는 문법은 안 돼
select
ym,
delay_bucket,
count(*) as cnt
from
orders
group by
date_format(orderDate, '%Y-%m') as ym,
case
when datediff(shippedDate, orderDate) <=0 then 'D'
when datediff(shippedDate, orderDate) =1 then 'D+1'
when datediff(shippedDate, orderDate) =2 then 'D+2'
when datediff(shippedDate, orderDate) =3 then 'D+3'
when datediff(shippedDate, orderDate) =4 then 'D+4'
when datediff(shippedDate, orderDate) =5 then 'D+5'
when datediff(shippedDate, orderDate) =6 then 'D+6'
when datediff(shippedDate, orderDate) =7 then 'D+7'
else 'D++'
end as delay_bucket;
#### CTE/서브쿼리로 파생컬럼을 만든 뒤, 바깥에서 GROUP BY
#### 파생 컬럼을 한 번만 정의하고, 바깥 쿼리에서는 그 컬럼명으로 그룹핑 (표준 OK)
WITH base AS (
select
date_format(orderDate, '%Y-%m') as ym,
case
when datediff(shippedDate, orderDate) <=0 then 'D'
when datediff(shippedDate, orderDate) =1 then 'D+1'
when datediff(shippedDate, orderDate) =2 then 'D+2'
when datediff(shippedDate, orderDate) =3 then 'D+3'
when datediff(shippedDate, orderDate) =4 then 'D+4'
when datediff(shippedDate, orderDate) =5 then 'D+5'
when datediff(shippedDate, orderDate) =6 then 'D+6'
when datediff(shippedDate, orderDate) =7 then 'D+7'
else 'D++'
end as delay_bucket
from orders
)
select
ym,
delay_bucket,
count(*) as cnt
from base
group by ym, delay_bucket
order by ym, delay_bucket;
### 3) pivoting 하자 (2주차 응용해서 컬럼 만들기)
#### 3-1) t를 하나의 테이블이라 생각하고 다시 만들자
select
*
from(
select
date_format(orderDate, '%Y-%m') as ym,
case
when datediff(shippedDate, orderDate) <=0 then 'D'
when datediff(shippedDate, orderDate) =1 then 'D+1'
when datediff(shippedDate, orderDate) =2 then 'D+2'
when datediff(shippedDate, orderDate) =3 then 'D+3'
when datediff(shippedDate, orderDate) =4 then 'D+4'
when datediff(shippedDate, orderDate) =5 then 'D+5'
when datediff(shippedDate, orderDate) =6 then 'D+6'
when datediff(shippedDate, orderDate) =7 then 'D+7'
else 'D++'
end as delay_bucket,
count(*) as cnt
from
orders
group by ym, delay_bucket
) as t
#### 3-2) 응용
select
delay_bucket,
sum(case when ym ='2003-01' then cnt else 0 end) as '2003-01',
sum(case when ym ='2003-02' then cnt else 0 end) as '2003-02',
sum(case when ym ='2003-03' then cnt else 0 end) as '2003-03',
sum(case when ym ='2003-04' then cnt else 0 end) as '2003-04',
sum(case when ym ='2003-05' then cnt else 0 end) as '2003-05',
sum(case when ym ='2003-06' then cnt else 0 end) as '2003-06',
sum(case when ym ='2003-07' then cnt else 0 end) as '2003-07',
sum(case when ym ='2003-08' then cnt else 0 end) as '2003-08',
sum(case when ym ='2003-09' then cnt else 0 end) as '2003-09',
sum(case when ym ='2003-10' then cnt else 0 end) as '2003-10',
sum(case when ym ='2003-11' then cnt else 0 end) as '2003-11',
sum(case when ym ='2003-12' then cnt else 0 end) as '2003-12'
from(
select
date_format(orderDate, '%Y-%m') as ym,
case
when datediff(shippedDate, orderDate) <=0 then 'D'
when datediff(shippedDate, orderDate) =1 then 'D+1'
when datediff(shippedDate, orderDate) =2 then 'D+2'
when datediff(shippedDate, orderDate) =3 then 'D+3'
when datediff(shippedDate, orderDate) =4 then 'D+4'
when datediff(shippedDate, orderDate) =5 then 'D+5'
when datediff(shippedDate, orderDate) =6 then 'D+6'
when datediff(shippedDate, orderDate) =7 then 'D+7'
else 'D++'
end as delay_bucket,
count(*) as cnt
from
orders
group by ym, delay_bucket
) as t
group by delay_bucket # select 에서 sum 하려면 group by 해줘야 해!!!!!!!!!!!! (중요)
order by delay_bucket;
## 물류 쿼리에 대한 요약
### 날짜 함수 사용하려는 목적
### 날짜함수 date_format 연-월
### datediff 배송 지연의 값을 넣음
### case when d, d+1 매핑 변환
### group by cnt 카운팅
### sum(case when) -> 특정 월만 피벗
## 29분 ~ 37분 ... 다시 들어->
# 2. MySQL 문자열 함수
## REPLACE '붙일 값' ''붙여버리면 된다.
select replace(replace(replace(customerName,' ',''),',',''),'+','')
from customers;
select
substring(orderDate,1,7)
from orders;
select concat_ws('/', contactFirstName, contactLastName) from customers;
## regexp_replace (정규식치환)