Math-Methods-Notes-2022/P231.tex at main · fliptanedo/Math-Methods-Notes-2022 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[
  11pt,
	colorful,
	raggedright,
  % boxey,
  % oneside % screws things up because of spacing
  % raggedbottom,
]{tufte-style-thesis-flip}

\usepackage[
  autocite  = superscript,
  backend   = biber,
  citestyle   = numeric-comp,
  style     = numeric,
  sorting   = none,
  bibencoding = utf8,
]{biblatex}

\input{FlipPreamble}
\newtheorem{exercise}{Exercise}[section]
\newtheorem{example}{Example}[section]

% Flip's stuff
% This doesn't work with the default Linux Libertine font :()
% \renewcommand{\dbar}{d\mkern-6mu\mathchar'26\hspace{-.1em}}    % for d/2pi
% Hack: https://tex.stackexchange.com/a/203511
\renewcommand{\dbar}{d\hspace*{-0.08em}\bar{}\hspace*{0.1em}}
% See discussion in stackexchange

% For matrices
\newcommand{\aij}[2]{^{#1}_{\phantom{#1}#2}}
\newcommand{\mat}[3]{#1\aij{#2}{#3}}

% Bold italic math: https://tex.stackexchange.com/a/82747/8094
\DeclareMathAlphabet{\mathbfsf}{\encodingdefault}{\sfdefault}{bx}{n}
\newcommand{\tens}[1]{\mathbfsf{#1}}


%	package inclusions for this specific documentation file. the .cls does not neet them.
\usepackage{lipsum}		% the chad package
\usepackage{textgreek}	% for greek
\usepackage{imakeidx}	% for index
\usepackage{multicol}
% \usepackage{bibentry}
\makeindex[columns=3]

\addbibresource{refs.bib} % for biblatex
% NOTE: sometimes biber has problems
% https://tex.stackexchange.com/a/135496/8094 clearing cache helps

% INFO : used in the titlepage, copyright and stuff.
\author{Flip Tanedo}
\title{Physics 231: Methods of Theoretical Physics}
\subtitle{A mathematical methods course for first-year graduate students in physics \& astronomy}
\university{University of California, Riverside}
\lab{Physics 231, Fall 2022}
\logo{figures/FlipAmbigram.png}
\type{Lecture Notes}

\begin{document}

\maketitle

\frontmatter

\chapter{Abstract}
Physics 231 is a crash course on mathematical methods necessary to succeed in the first-year physics graduate curriculum at \acro{UC~R}iverside.
%
The focus is how to solve differential equations using Green's functions. This version is revised for Fall 2022. Last Compiled: \today


\tableofcontents
% \listoffigures
% \listoftables
% \listoflistings


\mainmatter

\chapter{Mathematical Methods}

This is a \emph{crash course} on the mathematical toolkit necessary for graduate courses in electrodynamics, quantum mechanics, and statistical mechanics. The emphasis is physical intuition rather than mathematical rigor. Let us be clear: as a student you are \emph{expected} to be as mathematically rigorous as your discipline requires. Fortunately, there are plenty of excellent textbooks pitched at various levels of rigor and you can find the one most appropriate for you. This course is meant to complement those references, not to replace them.\footnote{In other words, this is \emph{your} \acro{Ph.D}, craft it appropriately.}

Unfortunately for you, it is unlikely that the choice of topics in this course will be either necessary or sufficient for your training. If anyone asks, you should say that the theme of this course is to solve the types of linear differential equations that will show up in your physics coursework (\emph{ugh! Boring!}). The actual choice of topics is meant to highlight big, unifying themes in mathematical physics sprinkled with topics of current research significance.

\section{Green's functions and this course}

Our goal is to solve linear differential equations:
\begin{align}
  \mathcal O f(x) = s(x) \ .
  \label{eq:greens:function:equation}
\end{align}
In this equation, $\mathcal O$ is a \emph{differential operator}\index{differential operator} that encodes some kind of physical dynamics\footnote{A \textbf{differential operator} is just something built out of derivatives that can act on a function. The differential operator may contain coefficients that depend on the variable that we are differentiating with respect to; for example, $\mathcal O = (d/dx)^2 + 3x\,(d/dx)$.}, $s(x)$ is the \emph{source} of those dynamics, and $f(x)$ is the system's physical \emph{response} that we would like to determine. The solution to this equation is:
\begin{align}
  f(x) &= \mathcal O^{-1} s(x) \ .
\end{align}
This statement is trivial and deeply unsatisfying. We will think carefully about what $\mathcal O^{-1}$ actually means and how to calculate it. $\mathcal O^{-1}$ is the \textbf{Green's function}\index{Green's function} for the differential operator $\mathcal O$. %In this course, we use the quest to understand Green's functions to guide our study of mathematical physics.

\begin{exercise}
Consider the differential operator $\mathcal O = (d/dx)^2 + 3x\,(d/dx)$. A colleague tells you that $(d/dx)^2$ is squared, therefore it is not a linear operator. Explain why the colleague is mistaken.
\end{exercise}

To make sense of $\mathcal O^{-1}$, we appeal to linear algebra. A linear transformation---that is, a \textbf{matrix}---$A$ acts on a vector $\vec{v}$ to give equations like
\begin{align}
  A \vec{v} = \vec{w} \ ,
\end{align}
whose solution is
\begin{align}
  \vec{v} = A^{-1} \vec{w} \ .
\end{align}
In this course, we think of linear differential operators $\mathcal O$ as infinite-dimensional matrices. $\mathcal O^{-1}$ is the inverse of this matrix. At this point, you should feel a bit nervous because you remember that inverse of a $3\times 3$ matrix is tricky... say nothing of the \emph{infinite} dimensional limit. We remember, however, that calculus is infinite-dimensional linear algebra. Complex analysis extends the real line to the complex plane. In so doing, the \emph{analytic structure} of our theories offer both a method to calculate challenging integrals and their own physical significance.

Our simple example through this journey is the humble harmonic oscillator. In subsequent chapters we extend this to higher-dimensions and, indeed, to \emph{infinite} dimensions. Along the way we reflect on the nature of all of these infinities we bandy about. We close by connecting our study of Green's functions to statistics, non-perturbative methods, and the burgeoning connections of physics to machine learning.

\section{This is not what I expected}


This is a course in mathematical methods for \emph{physicists}.
%
We do not solve \emph{every} class of differential equation that is likely to pop up in your research careers---that would be a course on mathematical methods for \emph{engineers}. (I recommend Carl Bender's 2011 lectures at \acro{PSI} for an insightful course along those lines.\sidecite{pirsa_11110040}) Instead, we methodically dissect a physically motivated example---the harmonic oscillator---to emphasize how we think about mathematical problems.

We weave together ideas that are not often connected explicitly in undergraduate physics courses: linear algebra, differential equations, complex analysis, statistics. I expect that you have had some formal training in these topics so that we may focus on the interconnections between these ideas and how those interconnections come up over and over again in our study of nature.

Do not be surprised if we only mention Bessel functions in passing. Do not think less of our efforts if we do not calculate Wronksians or go beyond a single Riemann sheet. As graduate students, it is \emph{your} responsibility to be able to grab your favorite reference to apply mathematics as needed to \emph{your} research. \emph{This} course is about the larger narrative that is not often shared explicitly in those books. It is about that which makes physicists employable in Silicon Valley while simultaneously terrible at splitting the bill at a restaurant.

\begin{example}
Our cavalier attitude towards mathematical rigor should not make you think that mathematical rigor is not necessary. For a nice, visual example, see ``How to lie using visual proofs'' by 3Blue1Brown.\cite{3Blue1Brown_2022}
% https://youtu.be/VYQVlVoWoPY
\end{example}


\section{The non-mathematical idea  of mathematical niceness}
\label{sec:niceness}

I find it useful to appeal to the notion of a \textbf{nice}\index{nice} mathematical situation. This is not a formal idea. In fact, it is one many things that mathematicians find ridiculous about me. But as a physicist, the concept of mathematical \emph{niceness} is remarkably helpful.

The physical systems that we spend the most time thinking about are all \emph{nice}.
%
While our mathematical cousins spend years proving every exceptional case to a theorem, we tend to be happy to push onward as long as mathematical results are true for the \emph{nice} cases.
%
In fact, nature often admits an \emph{approximately} nice mathematical description.\footnote{This is not because nature is kind, but rather because we are only clever enough to build simple theories. What is important to appreciate as a student is \emph{why} simple theories can so nearly approximate nature.}

%
Nice mathematical models make tidy predictions. Then we can Taylor expand about these nice predictions to make better predictions.
%
We sometimes chant \emph{perturbation theory}\index{perturbation theory} out loud several times in case someone watching us does not think we are being rigorous enough.\footnote{Sometimes our Taylor expansions have zero radius of convergence. ``\emph{E pur si muove},'' as Galileo would say.}
% We make Taylor expansions without anguishing about the radius of convergence\footnote{\url{https://johncarlosbaez.wordpress.com/2016/09/21/struggles-with-the-continuum-part-6/}} and validate it post-facto because it \emph{works}.


This is not to say that nature cares at all about our physical models.
%
Every once in a while, we \emph{do} have to worry about the exceptional cases because our models fail to accommodate what is \emph{actually} happening in nature.\footnote{Full disclosure? Your \acro{Ph.D} will likely depend on finding a clever solution to one of these cases.} Those scenarios are the most exciting of all: that is when our mathematical formalism grabs us by the collar and says, \emph{listen to me---something important is happening!} This often happens when a calculation tells us that a physical result is infinite.

\begin{exercise}\label{ex:hydrogen:problem}
Consider the potential that an electron feels in the hydrogen atom:
\begin{align}
  V(r) &= -\frac{\alpha}{r} \ .
\end{align}
As the electron--proton separation goes to zero, $r\to 0$, the potential goes to infinity. Classical electrodynamics is telling us that something curious is happening. What actually happens? (And why didn't you ask this question when you were in high school?)
\end{exercise}

We focus on \emph{nice} functions and \emph{nice} operators and \emph{nice} boundary conditions, and so forth. We often only need the \emph{nice} math to make progress on our \emph{nice} physical models. It is worth spending our time learning to work with these \emph{nice} limits. Leave the degenerate cases to the mathematicians for now. Eventually, you will find yourself in a situation where physics demands \emph{not nice} mathematics. In that case---and only when the physics demands it---you will be ready to poke and prod at the mathematical curiosity until the underlying \emph{physics} reason for the not-niceness is apparent. All this is to say: if you object to this course because we do not start with proofs about open sets or convergence, then you are missing the point of an education in physics.

\section{The unbearable arrogance of physicists}
\label{sec:obvious}

Sometimes we, as physicists, have a reputation for arrogance. The most generous interpretation is that we must have some Promethean \emph{chutzpah} to seek to comprehend/invent/discover an underlying mathematical organizing principle for the universe. On the other side of this is the damaging ways in which scientists can mistreat each other in academia. Somewhere in between are footnotes poking fun at mathematicians, or being a bit of a bore at parties. But these are lecture notes about mathematical physics; it is up to you to figure out how to be the best version of you-as-physicist-and-human-being that you can realize.\footnote{There are plenty of excellent pieces to reflect on this. For example, \emph{The Disordered Cosmos}, \emph{The Only Woman in the Room}, and \emph{Beamtimes and Lifetimes}.}

What is within the scope of a set of lecture notes on mathematical physics is the apparent arrogance about the way we speak and write about technical ideas. In particular, the arrogance of phrases like \emph{it is obvious that$\ldots$}. In colloquial conversation, these phrases are smug or aggressive: \emph{look how smart I am that I comprehend this so easily!}\footnote{In my youth I would often curse at my textbooks: \emph{If it's so obvious, then why don't you explain it, you lazy asshole!}} In a graduate course, however, this sentiment means something very specific with pedagogical value. When these notes say that something is \emph{obvious}\index{obvious} or \emph{clear}, what we really mean is the following:
\begin{quote}
If you think about this idea with a certain perspective, then the idea is self-evident in a way that is illuminating. However, there are many other perspectives in which the idea is unclear. If you find the idea unclear, do \emph{not} assume that the idea itself is esoteric or that you are somehow deficient. Instead, take a step back to see if the idea is a natural consequence of a different approach.
\end{quote}
Any time that these notes refer to being \emph{obvious}, it is a checkpoint. When something is not obvious, it is an invitation to reflect and perhaps backtrack a bit. In fact, when something is not obvious, it is an \emph{opportunity} to understand the idea more deeply---for now you have seen how the idea can become apparently complex, but you are being assured that in this complexity there is a underlying simple organizing principle. What can be more tantalizing than that?

There is a complementary idea that students have a secret superpower that they can exercise in the classroom. It is terrifying to ask questions in public---after all, what if your peers decide that you must be \emph{stupid}? There is didactic armor against this. Whenever you are confused, and at the first appropriate moment after you are confused, raise your hand and phrase your question as follows:
\begin{quote}
\emph{Is it obvious that} $\ldots$ ?
\end{quote}
Linguistically, this is a trick of the passive voice: it removes \emph{you} from the query. It does not insist that you are incapable of comprehending something, it simply asks if there is some intuitive understanding that you want to make sure you do not miss. After all, developing your physics intuition is one of the goals of your first-year graduate courses. Any self-respecting instructor will respond sympathetically, either:
\begin{itemize}
  \item \emph{no}, it is not obvious. Perhaps then you work through the idea carefully. Or,
  \item \emph{yes}, it is obvious---but only when we remember some previous key step, which your instructor should then highlight.
\end{itemize}
Either way, the result is wisdom rather than risking how you look in front of your peers.

By the way, \emph{how you look in front of your peers} is not a good reason to do anything. It is almost as bad as not asking questions because you do not want to look stupid in front of your advisor. Here is some free advice: your adviser knows \emph{exactly} how stupid you are. Most likely your advisor does not think you are stupid, but if you are convinced that you are stupid, then rest assured that your advisor knows this and has still chosen to invest their time into you. Make the most of this time: ask questions.


\section{Footnotes}

I tend to be verbose with foot/margin-notes.\footnote{\emph{Pale Fire}, V.~Nabokov.} There are a few types of marginalia:
\begin{itemize}
  \item References, so you do not have to flip back to a bibliography at the end of the document.
  \item Enigmatic hints of how ideas interconnect. This document's main narrative focuses on first-year graduate physics, but because so many of the ideas here reappear in more advanced topics, I cannot help but mention a few of them in passing. These notes seem more mysterious than pedagogical to those who have not studied those topics; please take this as an invitation to dig deeper into the topic if it excites you.
  \item Miscellaneous examples or observations that are not germane to the specific topic in the main text, but are worth highlighting for the eager student.
  \item Miscellaneous personal reflections with no pedagogical value other than to remind the reader that the human being writing these notes was once a beginner student as well.
\end{itemize}

\section{One last piece of advice}

It took me way too long to appreciate the crucial significance of homework and exercises in learning physics. Your job in your \acro{Ph.D} is to answer questions where no previous answer had ever existed in the history of humanity. You will be guided by your advisor and your mentors, but you will be the discover-er of truth. This is a tall order, something like completing a marathon or climbing Everest. And like those physical feats, the only way to succeed in your intellectual pursuit is to \emph{train}. And the best training we have in physics are practice problems. These are problems that are crafted to hone your skills. They are examples that are assured to be \emph{solvable}\footnote{The fact that they are solvable does not mean that you are entitled to a solution set other than the solution that you earn by deriving it yourself.} and with a framework (like a course with peers) to guide you through the challenges. Do not squander the opportunity to \emph{train}. My undergraduate advisor used to say, \emph{you should do every problem in the book---but especially the ones that you cannot do.}


\chapter{Physics versus Mathematics}

Let us make one point clear:
\begin{align}
  \text{Physics} \neq \text{Mathematics} \ .
\end{align}
This is a truth in many different respects\footnote{The astronomer Fritz Zwicky would perhaps call this a \emph{spherical truth}; no matter how you look at it, the statement is still true.}:
\begin{itemize}
  \item Physicists are rooted in experimental results. {Even theorists? \emph{Especially} theorists.}

  \item Physicists Taylor expand to their hearts’ content. Even when it is sometimes not mathematically valid.~\sidecite{Baez_Azimuth_2016}

  \item Physicists pick a basis, use coordinates, and decorate every tensor with indices. {Equations in physics appear intimidating because of the indices decorating our variables. Ironically, physicists are often intimidated by mathematics because of the conspicuous absence of any indices.}.

  \item Physicists seek truths about \emph{this} universe.

  \item Physicists have a fast and loose relationship to the concept of infinity and the related concept of the infinitesimal---on this, I recommend Jim Holt's essay ``The Dangerous Idea of the Infinitesimal.''~\sidecite{holt2018einstein}
  At the same time, many of our tools seem to \emph{beg} questions about the infinite.
\end{itemize}
My friends, we are not doing mathematics.

\section{The most important binary relation}

When we write equations, the symbol that separates the left-hand side from the right-hand side is a binary relation. We use binary relations like $=$ or $\neq$. Sometimes to make a point we write $\cong$ or $\equiv$ or $\dot =$ to mean something like `definition’ or `tautologically equivalent to’ or some other variant of \emph{even more equal than equal}.

 \begin{figure}[h]
      \sidecaption{Mathematical symbol fight from \acro{XKCD}.~\cite{xkcd_2343} \acro{CC BY-NC 2.5} \label{fig:xkcd:symbol}} % put this on top
      % \label HAS to be inside the \sidecaption
      \includegraphics[width=\textwidth]{mathematical_symbol_fight_2x.png} % or tikz or anything
  \end{figure}
% xkcd_2343

As physicists the most important binary relation is none of those things\footnote{I thank Yuval Grossman teaching me this.}. What we usually care about is  $\sim$.\footnote{I use this the same way as $\propto$, which is completely different from `approximately,’ $\approx$.} The symbol $\sim$ tells us how how something \emph{scales}. If I double a quantity on the right-hand side, how does the quantity on the left-hand side scale? Does it depend linearly? Quadratically? Non-linearly? The answer encodes something important about the underlying physics of the system. The symbol $\sim$ the reason why \emph{imagine the cow is a sphere} is a popular punchline in a joke about physicists.


Implicit in this discussion is the pragmatic policy that we will not care about stray factors of 2 in this class. As my adviser used to say, if you are worried about a factor of 2, then you have addition homework to figure out that factor of 2.\footnote{That being said, you are reading these notes and find an error, do let me know about it.}

\section{Units}

There is another way in which physics is different from mathematics. It is far more prosaic. \emph{Quantities in physics have units}. We do deal in simply numbers, we deal with kilograms, electron volts, meters. It turns out that dimensional analysis is a big part of what we do as physicists.

\begin{exercise}
Explain, in words, why the quantity $\sin(3~\text{cm})$ is absolute nonsense in any context. What about $\text{exp}(2~\text{kg})$?
\end{exercise}

\chapter{Dimensional Analysis}

% FLIP: add this : https://youtu.be/kkfIXUjkYqE

You may be surprised how far one can go in physics by thinking deeply about dimensional analysis. Here we only get started. To take the next step, you may read more about the Buckingham Pi theorem or applications in physics. I recommend any of the following:
\begin{itemize}
  \item \fullcite{doi:10.1119/1.1987069}
  \item \fullcite{doi:10.1119/1.4902882}
  \item \fullcite{doi:10.1119/1.3535586}
  \item \fullcite{Stevenson:1980ga}.
\end{itemize}
\textbf{Dimensional analysis}\index{dimensional analysis} is simply the idea that by keeping track of the units of physical quantities, we can learn quite a bit about how those quantities must show up in our physical laws.


\section{Converting Units}

Imagine that you have three apples. This is a number (three) and a unit (apple). The meaning of the unit depends on what you're using it to measure. For example, if apples are \$1 each, then you could use an apple as a unit of currency. The way to do this is to simply \emph{multiply by one}:
\begin{align}
  (3\text{ apples}) \times \left(\frac{\text{\$ 1}}{\text{apple}}\right)
  &= \$ 3 \ .
\end{align}
We have used the fact that the exchange rate is simply the statement that
\begin{align}
  1\text{ apple} &= \$1
  & \Rightarrow &&
  1 &= \frac{\$ 1}{1\text{ apple}} \ .
\end{align}
You can do a similar thing for [kilo-]calories or any other conversion rate.

All that matters is that the conversion factor is a constant. The constants of nature make very good `exchange rates.' For example, high-energy physicists use \textbf{natural units}\index{natural units}:
\begin{align}
  \hbar = c = 1 \ .
\end{align}
At face value, this does not make sense. $\hbar$ has units of action, $c$ is a speed, and 1 is dimensionless. In more conventional units,\footnote{For the most part, we are happy with one significant figure in this course.}
\begin{align}
  c &= 3 \times 10^{10}~\text{cm}/\text{s}
  &
  \hbar &= 10^{-34}~\text{kg}~\text{m}^2/s
  \ .
\end{align}
However, because nature gives us a \emph{fundamental} unit of action and a \emph{fundamental} unit of speed, we may use them as conversion factors (exchange rates). If $c=1$, then
\begin{align}
  1~\text{s} &=  3 \times 10^{10}~\text{cm} \ .
\end{align}
This connects a unit of time to a unit of distance. By measuring time, the constant $c$ automatically gives an associated distance. The physical relevance of the distance is tied to the nature of the fundamental constant: one second (or `light-second') is the distance that a photon travels in one second. Observe that this only works because $c$ is a constant.

\section{Quantifying units}

We use the notation that a physical quantity $Q$ has \textbf{dimension}\index{dimension} $[Q]$ that can be expressed in terms of units of length, mass, and time:
\begin{align}
  [Q] = L^a M^b T^c \ .
\end{align}
The {dimension} is the statement of the powers $a$, $b$, and $c$. You may want to also include units of, say, electric charge. Sticklers may pontificate about whether electric charge formally carries a new unit or not.


\begin{example}
What are the units of force? We remember that $\vec{F} = m\vec{a}$, so
\begin{align}
  [\vec F] &= [m][\vec{a}] = M\times L T^{-2} = L^1 M^1 T^{-2} \ .
  \label{eq:02:force:units}
\end{align}
\end{example}

\begin{exercise}
What are the units of the fine structure constant?
\end{exercise}


When working in \textbf{natural units}, $c=1$ means that units of length and time are the same and $\hbar = 1$ means that units of time and energy (mass) are inversely related. In natural units, one simply writes $[Q]$ to mean the mass-dimension of a quantity. To revert back to conventional units, one simply multiplies by appropriate factors of $1=c$ and $1=\hbar$.

\begin{example}
What are the units of force in natural units? From \eqref{eq:02:force:units} we multiply by one to convert length and time into mass dimensions:
\begin{align}
  [\vec F] &= [c^{-3} \hbar \vec{F}] = M^2 \ .
\end{align}
In natural units we say $[\vec F] = 2$. Recall that energy and mass have the same dimension, which you may recall from the Einstein relation $E^2 = m^2c^4 + p^2c^2$.
\end{example}

\section{Dimensional analysis at work}


\subsection{Sanity Check}

The simplest use of dimensional analysis is to check your work. The following expression is obviously wrong:
\begin{align}
  1 + (3~\text{cm}) \ .
\end{align}
This does not make sense. You cannot sum terms with different dimensions. Similarly, $\sin(3\text{ cm})$ does not make sense. What about $e^{5~\text{cm}}$? This doesn't make sense because
\begin{align}
  e^x = 1 + x + \frac{1}{2!} x^2 +  \cdots
\end{align}
Since each term comes with a different power of $x$, the argument of the exponential must be dimensionless.

\begin{example}
As pointed out by Matta et al.\footnote{\emph{J.~Chem.~Educ.} 2011, 88, 1, 67–70. \url{https://doi.org/10.1021/ed1000476}}, this argument is not quite correct. Each term in the Taylor expansion of a function $f(x)$ maintains the dimensions of $f(x)$, as is obvious when written out carefully:
\begin{align}
  f(x_0+\Delta x) = f(x_0) + \left.\frac{df}{dx}\right|_{x_0}\Delta x + \frac{1}{2}\left.\frac{d^2f}{dx^2}\right|_{x_0}\Delta x^2 + \cdots \ .
\end{align}
The units of every $dx^2$ in the `denominator' of $d^{(n)}f/dx^n$ is canceled by the units in $\Delta x^n$, no matter what the dimensions of $\Delta x$ are.
%
The real issue is that for many functions, $f(x_0)$ is simply not defined for dimensionful arguments. This is certainly true for trigonometric functions. For the exponential, one may fall back to the limit definition:
\begin{align}
  e^x = \lim_{n\to\infty} \left(1+ \frac{x}{n}\right)^n \ ,
\end{align}
where it is now an issue of different terms having different dimensions. Note that the right-hand side is not a Taylor expansion. The exponential definition above is handy because it makes sense even when $x$ is a matrix or operator.
\end{example}

\begin{exercise}
Sometimes you may think it is useful to keep track of radians (or degrees) as a dimensionful quantity. This, by the way, is a slippery slope because then you may want to think of $\pi$ as some unit of circles... whatever that means. Following the exercise above, show that (1) each term in the Taylor expansion of $f(x) = \sin(x)$ has the same dimensions, and (2) that there is no issue with trigonometric functions being defined as having `dimensionful' arguments in this way.
\end{exercise}

\begin{exercise}
Consider the energy spectrum of light emitted from some constant source---a distant star, the ongoing annihilation of dark matter in the galactic center, or a high-intensity laser. The spectrum encodes how many photons are emitted per unit time. We can plot this spectrum as a curve on a graph. We can even normalize the curve so that it integrates to one photon. This means we only care about the distribution of energy, not the absolute amount. The horizontal axis of such a plot is the photon energy. What are the units of the vertical axis?
\end{exercise}


\subsection{Solving problems}

Here is a common problem in introductory physics. Assume you have a pendulum with some sufficiently small initial displacement $\theta_0$. What’s the period, $\tau$ of the pendulum? We draw a picture like Fig~\ref{fig:simple_pendulum}.
%
\marginfig{figures/lec01_pendulum.pdf}{Sketch of a simple pendulum.}{fig:simple_pendulum}
%
%
From dimensional analysis, we know that the period has dimensions of time, $[\tau] = T$. The problem gives us a length $[\ell]=L$ and the gravitational acceleration, $[g]=LT^{-2}$. Note that $[\theta_0] = 1$ is dimensionless. This means that the only way to form a quantity with dimensions of time is to use $g^{-1/2}$. This leaves us with a leftover $L^{-1/2}$, which we can fix by inserting a square root of $\ell$:
\begin{align}
  \tau \sim g^{-1/2} \ell^{1/2} \ .
\end{align}
If we want to be fancy, we can make this an equal sign by writing a function of the other dimensionless quantities in the problem:
\begin{align}
  \tau = f(\theta_0) \sqrt{\frac{\ell}{g}} \ .
\end{align}

\flip{To do: include problems from R.W.~Robinett \emph{American Journal of Physics} \textbf{8}3, 353 (2015); \url{https://doi.org/10.1119/1.4902882}.}


\subsection{Scaling}

A key theme in physics is scaling relations. We present a somewhat contrived example of how this works adapted from section 11 of V.\ I.\ Arnold's \emph{Mathematical Methods of Classical Mechanics}.\footnote{This is one of my favorite differential geometry textbooks because it is disguised as a book on mechanics.}. Suppose you have some static, central potential $U(\vec r)$. Maybe it’s some planet orbiting a star.
%
\textfig[1]{figures/lec01_orbit.pdf}{A orbital trajectory, $\vec{r}_0(t)$.}{fig:simple_orbit}
%
The force law gives:
\begin{align}
  m
  \ddot{\vec{r}} = - \frac{\partial U}{\partial\vec{r}} \ .
  \label{eq:scaling:eg}
\end{align}
Suppose we are given a solution, $\vec r_0(t)$. Perhaps this is a trajectory that is experimentally verified. Dimensional analysis gives us a way to scale this solution into other solutions. For example, let us scale time by defining a new variable $t'$:
\begin{align}
  t \equiv \alpha t' \ .
\end{align}
Because the potential is static, then only the left-hand side of the force law changes. Even though the right-hand side formally has dimensions of time, $T^{-2}$, it does not transform because those units are carried in a constant, perhaps $G_N$, not a $(d/dt)^2$ like the left-hand side. The left-hand side of the force law gives:
\begin{align}
  m\left(\frac{d}{dt}\right)^2 \vec r_0(t)
  &=
  m\alpha^{-2} \left(\frac{d}{dt'}\right)^2 \vec r_0(\alpha t') \ .
\end{align}
This begs us to define a new mass $m' = m\alpha^{-2}$ so that
\begin{align}
   m' \left(\frac{d}{dt'}\right)^2 {\vec{r}_0}(\alpha t')
  = - \frac{\partial U}{\partial\vec{r}_0} \ .
\end{align}
What this tells us is that we may define a new trajectory, $\vec r_1(t') \equiv \vec{r}_0(\alpha t')$, which is a solution in the same potential that traces the same trajectory but at $\alpha$ times the speed and with mass $m'$. Changing labels $t'\to t$ for a direct comparison:
\begin{align}
   m' \left(\frac{d}{dt}\right)^2 {\vec{r}_1}(t)
  = - \frac{\partial U}{\partial\vec{r}_1} \ ,
\end{align}
which is indeed\footnote{We were able to swap $\vec r_0$ with $\vec r_1$ simply because $U$ only depends on the position.} \eqref{eq:scaling:eg} with a new mass $m'$ and a trajectory $\vec r_1(t') \equiv \vec{r}_0(\alpha t')$. For example, if $\alpha = 2$, then $\vec r_1(t)$ traces the same trajectory at double the velocity with one fourth of the mass.

\begin{exercise}
I missed something in the example above. In order for a planet of mass $m'$ to have trajectory $\vec r_1(t')$, what is the mass of the star compared to the original mass $M_\star$?\footnote{Thanks to Eric Zhang (2021) for pointing this out.}
\end{exercise}

\begin{example}
Business-y people like to quantify effort using words like `person--hour' or `person--years.' This is the idea that a 10 person--hour task would take 10 people one hour to complete, or one person 10 hours to complete, or 5 people two hours to complete, etc.  As you can see, this choice of units implies that effort has a linear scaling in both the number of people and the amount of time needed. Anyone who has worked on a group project knows that this linear scaling is bullshit. Frederick Brooks reflects on this in the 1974 essay, ``Myth of the Man--Month.''\sidecite{Brooks1975}
\end{example}

\subsection{Error Estimates}

This section is based on a lovely \emph{American Journal of Physics} article by Craig Bohren.\sidecite{doi:10.1119/1.1574042}%\footnote{\url{https://doi.org/10.1119/1.1574042}}
Let us go back to another high school physics problem: we drop a ball of mass $m$ from height $h$. See Fig.~\ref{fig:simple_drop}. The task is to find the time $t_0$ for the ball to hit the ground.
%
\marginfig{figures/lec01_drop.pdf}{Dropping a ball of mass $m$.}{fig:simple_drop}

% Suppose you drop a mass $m$ from height $h$ that is initially at rest. How long before this hits the ground?
You can integrate the force equation to get
\begin{align}
  t_0 = \sqrt{\frac{2h}{g}} \ .
\end{align}
This is the \emph{exact} answer \emph{within our model} of the system. The model made several assumptions: the mass is a point mass, the gravitational acceleration is constant at all positions, there is no air resistance, etc. In fact, we \emph{know} that if we do an experiment, our result will almost certainly \emph{not} be $t_0$. All we know is that $t_0$ is probably a good approximation of the actual answer. What we would like to to know is: \emph{how good of an approximation is it?}

One way to check this is to do the next-to-leading order (\acro{NLO}) calculation, taking into account a more realistic model and then compare to $t_0$. Of course, ``more realistic'' is also code for ``more complicated.'' Take a moment to appreciate that doing this is \emph{stupid}. Why do we need to do a \emph{hard} calculation to justify doing an \emph{easy} one? If we are going to do the hard calculation anyway, what was the point of ever doing the easy one?

What we really want is an error \emph{estimate}. The error\index{error} is
\begin{align}
  \epsilon &= \frac{t_1 - t_0}{t_0} \ .
\end{align}
This is a dimensionless quantity that determines how far off $t_0$ is from a more realistic calculation, $t_1$. Ideally we should not actually have to do much work to estimate $t_1$.

Let us assume that we are not completely nuts and that we are in a regime where the error is small\footnote{Note the error has to be dimensionless in order for us to be able to call it `small,` otherwise it begs the question of `small with respect to what?'}. Then the error is a function of some dimensionless parameters, $\xi$, in the system. We define these $\xi$ so that as $\xi \to 0$, $\epsilon(\xi) \to 0$. In other words, the approximation gets better as the $\xi$ are made smaller. By Taylor expansion:
\begin{align}
  \epsilon(\xi) = \epsilon(0) + \epsilon'(0) \xi + \mathcal O(\xi^2) \ .
\end{align}
By assumption, $\epsilon(0) = 0$ and $\mathcal O(\xi^2)$ is  small. We can then make a reasonable \emph{assumption} that the dimensionless value $\epsilon'(0)$  is $\mathcal O(1)$. This tells us that the error goes like $\epsilon(\xi) \sim \xi$.

By the way $\mathcal O(1)$ is read ``order one'' and is fancy notation for the order of magnitude. Numbers like 0.6, 2, and $\pi$ are all $\mathcal O(1)$. A number like $4\pi$, on the other hand, is $\mathcal O(10)$.  The assumption that a dimensionless number is $\mathcal O(1)$ is reasonable. When nature gives you a dimensionless parameter that is both (a) important and (b) very different from $\mathcal O(1)$, then there's a good chance that it's trying to tell you something about your model. Good examples of this are the cosmological constant, the strong \acro{CP} phase, and the electroweak hierarchy problem.\footnote{There are also `bad' examples. The ratio of the angular size of the moon to the angular size of the sun is unity to very good approximation. This is quite certainly a coincidence. Our universe appears to be in an epoch where the density of matter, radiation, and dark energy all happen to be in the same ballpark. Our cosmological models imply that this is purely a coincidence. It would be very curious if this were not the case. As an exercise, you can critically explore the use of the anthropic principle in physics.}

Here is how it works in practice. One effect that we miss in our toy calculation of $t_0$ is that the earth is round with radius $R$. This means that assuming a constant $g$ is an approximation. We have two choices for a dimensionless parameter $\xi$:
\begin{align}
  \xi &= \frac{h}{R}
  &\text{or}&&
  \xi &= \frac{R}{h} \ .
\end{align}
There is an obvious choice: $\xi = h/R$, because we know that as $h$ is made smaller (drop the ball closer to the ground) or $R$ becomes bigger (larger radius of Earth) then the constant $g$ approximation gets better. We thus expect that the corrections from the position-dependence of $g$ go like $\mathcal O(h/R)$.

% Exercise: check by explicit calculation, 2017 lec 1
\begin{exercise}
Check by explicit calculation that the correction to the constant $g$ approximation is linear in $h/R$. Start by writing the force law for a point source of at distance $r=R+h$ from the center of the Earth. Taylor expand to find a second order differential equation that is difficult to solve:
\begin{align}
  \ddot{h} = \frac{-g}{\left(1+\frac{h}{R}\right)^2} \ .
\end{align}
Taylor expand to reduce this to an equation of the form
\begin{align}
  \frac{d^2 q}{ds^2} = -1 + 2q \ ,
\end{align}
Here we define the natural dimensionless variables, $q = h/R$ and $s = \left(g/R\right)^{1/2} t$. If the choice of $s$ is not obvious, please do everything in terms of $t$ and then observe that one can conveniently absorb a factor of $g/R$ into dimensionless time variables.\footnote{You should find an equation of the form $\ddot q = -(g/R)(1-\cdots)$.} Plug the dimensionless differential equation into \emph{Mathematica} or your favorite symbolic solver to obtain
\begin{align}
  q(s) = c_1 e^{\sqrt{2}s} + c_2 e^{-\sqrt{2} s} + \frac{1}{2} \ .
\end{align}
Argue that the initial condition $\left.\dot h(t)\right|_{t=2} = 0$ implies that the coefficients satisfy $c_1 = c_2$ so that you can combine the exponentials into a hyperbolic cosine.
% If $q_0$ is the value of $q(s)$ at $t=0$, show that $c_1 = (q_0  - 1/2)/2$.
Show that one obtains:
\begin{align}
  \frac{2q(s) - 1}{2q(0) -1} = \cosh(\sqrt{2}s) \ .
\end{align}
Argue why you can Taylor expand the right-hand side about small argument; that is, explain why $s \ll 1$. (Hint: use $h\ll R$.) Perform the Taylor expansion of the hyperbolic cosine to find that the leading correction to the fall time is
\begin{align}
  s_1 = \frac{2q_0}{1-2q_0} \ .
\end{align}
The zeroth order approximation was $s_0 = (g/R)^{1/2} t_0 = \sqrt{2q_0}$. Calculate $(s_1 - s_0)/s_0$ to confirm that this is $\mathcal O(h/R)$.
\end{exercise}

\subsection{Bonus: Allometry}

There is a fun topic called \textbf{allometry}.\index{allometry} This is basically dimensional analysis applied to biology. A typical example is to consider two people who have roughly the same shape but different characteristic lengths, $\ell$ and $L$, Fig.~\ref{fig:lec1_allometry}.
\marginfig{figures/lec01_allometry.pdf}{Two mathematically similar people.}{fig:lec1_allometry}

% \begin{center}
% \includegraphics[width=.4\textwidth]{figures/lec01_allometry.pdf}
% \end{center}

\begin{exercise}
If both people exercised at the same rate, which one loses more absolute weight? By how much? Let us assume that weight loss is primarily from the conversion of organic molecules into carbon dioxide.
\end{exercise}

\begin{exercise}
David Hu won his first IgNobel prize for determining that mammals take about 21 seconds to urinate, largely independently of their size\footnote{I learned about this in his excellent popular science book, \emph{How To Walk on Water and Climb Up Walls}.}. Can you use dimensional analysis to argue why this would be the case? It may be helpful to refer to the paper\sidecite{doi:10.1073/pnas.1402289111}. As you read it, figure out which terms are negligible (and in what limits), identify the assumptions of the mathematical model (scaling of the bladder and urethra), and prove the approximate scaling relation. Make a note to yourself of which steps were non-trivial and where one may have naively mis-modeled the system. By the way, David Hu won a second IgNobel prize for understanding wombats' cubical poop.
\end{exercise}

The above exercise on mammalian urination is a good example of \emph{modeling}.\index{model} As physicists, we must identify and make a mathematical model for the most salient features of a problem. We must also be able to quantify the error from neglecting sub-leading contributions. As a rough model for scaling purposes, we can ignore viscosity and surface tension effects on human-sized mammals. For much smaller mammals, these effects become larger---the authors of the study note that mice tend to urinate droplets---in which case one can ignore the `inertial' $\frac{1}{2} \rho v^2$ term in Bernoulli's equation. For human-sized mammals, we may assume that steady state urination is given by Bernoulli's equation:
\begin{align}
  P + \rho g h = \frac{1}{2}\rho v^2 \ ,
\end{align}
where $P$ is the pressure from the bladder, $h$ is the column height of the urethra, $\rho$ is the mass density of urine, and $v$ is the velocity of the urine at the end of the urethra. Let us simplify to the condition where urination is purely driven by gravity---that is, the bladder does not exert any additional pressure, $P=0$. You can now show that the total urination time scales like the mass of the mammal to the one-sixth power, $\tau \sim M^{1/6}$. That is, the urination time has a very weak scaling dependence on how massive the mammal is.

\begin{exercise}
In August 2021, Ezra Klein interviewed Dr.~C\'eline Goudner about the \acro{COVID-19} variant.~\sidecite{klein_2021} In the interview, Klein cited the statement that the Delta variant has $\mathcal O(1000)$ times the viral load than prior \acro{COVID} strains. Goudner then interprets this in the following way: if the \acro{CDC} defined `close contact' for prior strains as 15 minutes of being indoors with an infected invdividual without a mask, then the equivalent `close contact' time for the Delta variant is around \emph{one second}. What scaling assumptions go into that estimate? Some of these assumptions are not obvious to me: for example, parts of the respiratory have a fractal-like structure that would lead me to suspect fractal scaling dimensions for surface area. \acro{Remark}: Just because you know dimensional analysis, that does not make you a medical, healthcare, or public policy expert.\footnote{Early in the \acro{COVID-19} pandemic, many physicists became armchair  modelers of epidemics. Some of this was driven by hubris about our mathematical intuition. Many of the physicists lost interest when their models aligned poorly with what actually happened.}
\end{exercise}

The following exercises draw from an article by Nicole Meyer-Verneta and Jean-Pierre Rospars in the American Journal of Physics\sidecite{doi:10.1119/1.4917310} and the references therein.
 \begin{exercise}
 Estimate the expected velocity of an All Terrain Armored Transport (\acro{AT}-\acro{AT})\footnote{\url{https://starwars.fandom.com/wiki/All_Terrain_Armored_Transport}} of characteristic height $L$. You can assume that the walking behavior is based on a pendulum. \acro{Answer}: $v \sim \sqrt{Lg}/2\pi$.
 \end{exercise}

 \begin{exercise}
 Based on the density $\rho$, the force-per-cross-sectional area $\sigma$, and the maximum rate of energy consumption per unit mass $b$, one may estimate the `sprint' velocity of an animal of length $L$. This sprint velocity is conveniently described with respect to the dimensionless `body lengths per time,' $v_\text{spr}/L$.

Remarkably, for over 20 orders of magnitude in animal length $L$, the value of $v_\text{spr}/L$ is within an order of magnitude of 10/sec:
% \begin{center}
%  \includegraphics[width=.7\textwidth]{figures/allometry_meyer-verneta.png}
% \end{center}
\textfig[1]{figures/allometry_meyer-verneta.png}{Image from Meyer-Verneta and Rospars.~\cite{doi:10.1119/1.4917310}}{fig:allometry_meyer-verneta}


Argue from dimensional analysis that $v_\text{spr}/L \sim b\rho/\sigma$. (This is the easy part.) It turns out that there are simple physical principles for each of these terms to be roughly constant for all life on Earth (this is the more subtle part); see the article for a discussion.
\end{exercise}

\begin{exercise}
The height of trees. How does the maximum height of a tree, $L$ scale with the diameter of its cross section, $d$? For an argument that $L\sim d^{3/2}$, see Thomas McMahon's article ``The Mechanical Design of Trees'' in \emph{Scientific American} volume 233 (1975)\footnote{\url{https://www.jstor.org/stable/24949846}}. McMahon was the first to propose a physical explanation for the observed scaling law that the metabolic rate of an animal scales like the characteristic size to the 3/4 power. A nice bibliography of his work can be found in \emph{Annual Review of Biomedical Engineering}.~\sidecite{doi:10.1146/annurev.bioeng.3.1.0}
\end{exercise}


\part{Linear Algebra}

\chapter{Finite-Dimensional Linear Algebra}

\section{Yet another review of linear algebra}

Linear algebra is part of our physics \acro{DNA}. So why should we patronize ourselves with yet another review of linear algebra?
%
We want to understand Green’s functions as a matrix inverse. The `matrix' in question is the differential operator $\mathcal O$ in \eqref{eq:greens:function:equation}.
%
The identification boils down to the following:
\begin{align}
  \text{differential operator}
  &=
  \infty\text{-dimensional matrix} \ .
\end{align}
This is a poetic equal sign; for example, not every infinite dimensional matrix is a differential operator.\footnote{At least not one with a finite number of terms.} You may know matrices as a block of numbers that act on columns of numbers---\emph{vectors}---to produce another vector.
If differential operators are matrices, are vectors on which they act? These matrices act on a space of functions, which turns out to be a vector space:
\begin{align}
  \text{function space} &= \infty\text{-dimensional vector space} \ .
\end{align}
Again, the equal sign is poetic.
Do not be intimidated by terminology like \emph{function space}; this is just an abstract place where functions live. Just recall back to your intuition from \acro{3D} Euclidean vector space, $\mathbb{R}^3$: any 3-vector $\vec{v}$ lives in the vector space $\mathbb{R}^3$. If we transform $\vec{v}$ by a linear transformation ${A}$, you get a new vector  $\vec{w} = {A}\vec{v} \in \mathbb{R}^3$ that is also in the vector space.

%
Weird things can happen when we extend our intuition from finite things to infinite things\footnote{For example, the Hilbert Hotel puzzle.}, but for this course we try to draw as much intuition as we can from finite dimensional linear algebra to apply it to infinite dimensional function spaces.


\section{What is Linear?}

A function\index{function}, $f(x)$, takes some kind of input $x$ and produces some output. When the inputs and outputs are real numbers, $x,f(x)\in \mathbbm{R}$, then we can plot this relation between inputs and outputs on the $\mathbbm{R}^2$ by the mapping $(x,f(x))$. The curve $f(x)$ is the set of all points $(x,y) \in \mathbbm{R}^2$ such that $y=f(x)$. Said in yet another way, $y=f(x)$ is a one dimensional slice of $\mathbbm{R}^2$. To emphasize that $f$ takes in real numbers and spits out real numbers, we can write $f: \mathbbm{R}\to\mathbbm{R}$.

Before we end up getting too pedantic about what a curve is, recall that even children can tell you that the equation $f(x) = mx+b$ defines a line in the two dimensional plane. In this relation, $m$ is the slope and $b$ is the intercept of the line with the $y$-axis. We must be more restrictive: we set $b=0$ and impose that a linear relation between two numbers $x$ and $f(x)$ takes the form $f(x)=mx$.

The reason for our apparent pedantry is to generalize the definition of {linearity} to extend beyond the picture of curves on $\mathbbm{R}^2$. A relation $f(x)$ is linear if the following are true:
\begin{align}
  f(\alpha x) &= \alpha f(x)\\
  f(x+y) &= f(x) + f(y) \ .
\end{align}
We assume that $x$ and $y$ are two objects of the same type, and $\alpha$ is simply some number.
\begin{exercise}
Confirm that $f(x)=mx$ is a linear function between real numbers for any value of $m\in \mathbbm{R}$.
\end{exercise}
We can state this all more formally. Suppose there is some collection of objects that we call $V$. Let $x$ and $y$ be two such objects: $x,y\in V$. Further, let $\alpha$ and $\beta$ be two numbers: $\alpha,\beta\in\mathbbm{R}$. Then a function $f:V\to W$ is \textbf{linear}\index{linear} if and only if\footnote{One can also write ``$\Leftrightarrow$'' to mean `if and only if.'}
\begin{align}
  f(\alpha x + \beta y) = \alpha f(x)+\beta f(y) \ .
  \label{eq:def:linear}
\end{align}
Let us dissect this a bit. We stated that $x$ and $y$ have to be the same type of object, members of the class $V$. This echoes our discussion of dimensional analysis: if $x$ are apples and $y$ are Pokemon, then $2.5x+7y$ is nonsensical and so any function of such an object is nonsensical. We defined $\alpha$ and $\beta$ to be numbers\footnote{More generally, these are elements of a \textbf{field}: sets of objects with addition and multiplication defined.}: these just count how many objects in $V$ are being fed into our function. For now we assume that these are real numbers, but we will soon generalize to complex numbers.

We wrote $f:V\to W$, which means that the output of the function $f(x)$ is an object of class $W$. In our toy example $f(x)=mx$, $V=W=\mathbbm{R}$. In general, $W$ and $V$ could be two totally different classes of objects. $W$ could be real numbers, something with units, a mathematical object with more structure, or any other class of objects. No matter what $V$ and $W$ are, the relation \eqref{eq:def:linear} tells us when a function is linear.

\begin{example}
Let $R$ be a function that maps angles $\theta$ to rotation matrices,
\begin{align}
  R(\theta) =
  \begin{pmatrix}
    \phantom{+}\cos\theta & \phantom{+}\sin\theta \\
    -\sin\theta & \phantom{+}\cos\theta
  \end{pmatrix} \ .
  \label{eq:vectors:R:theta}
\end{align}
This map is not linear because
\begin{align}
  R(\theta_1 + \theta_2) \neq R(\theta_1) + R(\theta_2) \ ,
\end{align}
as you can check for the trivial case of $\theta_1 = \theta_2 = 0$.
\end{example}
\begin{example}
The rotation matrix $R(\theta)$ in \eqref{eq:vectors:R:theta} is a linear map from two component vectors to two component vectors. Thus, $R(\theta): \mathbbm{R}^2 \to \mathbbm{R}^2$ is a linear function. But the map $R:\mathbbm{R} \to \mathbbm{R}^{2\times 2}$ that takes an angle $\theta$ and returns a $2\times 2$ matrix is \emph{not} a linear function.
\end{example}
\begin{exercise}
Let $f$ be a function that maps driving speed to the probability of being pulled over by the police or highway patrol. Explain why $f$ cannot be linear.
\end{exercise}

We can diagnose the linearity of a function, even when that function inputs and outputs objects that are more general than simply numbers. Thus far, we have been rather coy what it means to be an ``object in class $V$.'' This leads us to the notion of a vector space.

\section{Vectors and Vector Spaces}

For our purposes in this course, we consider functions of \textbf{vectors}. Not all objects of interest are vectors, for example, coordinates are decidedly not vectors. The term \emph{position vector} is thus a faux pas.\footnote{Mathematicians chuckle at freshman physics textbooks. The notion of a `position vector' implies identifying some special point, the origin.} On the contrary, infinitesimal differences of coordinates are vectors called \textbf{tangent vectors}. So what are vectors and vector spaces?

We focus on an imprecise, but physically intuitive working definition. For those who prefer a more mathematical discussion that is still tied to physical intuition, see \emph{Geometry, Topology, and Physics} by Nakahara.\sidecite{Nakahara:206619} A \textbf{vector}\index{vector} is an element of a vector space. That is, a \textbf{vector space}\index{vector space} is the `complete' collection of a given type of vector.

We have already assumed that you can add vectors and rescale them by numbers; that is, we can take \textbf{linear combinations}\index{linear combination} of vectors. That one can do this is implicit in our definition of linearity, \eqref{eq:def:linear}. Let $\vec{v}$ and $\vec{w}$ be any two vectors in the vector space $V$; we write this as $\vec{v},\vec{w}\in V$. Further, let $\alpha, \beta$ be numbers. We have the following rules:
\begin{itemize}
  \item The sum $\alpha\vec v + \beta\vec w$ is a vector in $V$.
  \item The zero vector $\vec 0$ is a vector in $V$ such that $\vec 0 + \vec v = \vec v$.
\end{itemize}
Formally there are other rules, but you probably already assumed them.\footnote{Please refer to your favorite linear algebra book.} These include the fact that vector addition is commutative, $\vec v + \vec w = \vec w + \vec v$, and associative, $(\vec v + \vec w) + \vec u = \vec v + (\vec w + \vec u)$.
\begin{example}
The existence of a meaningful zero vector is one example why `position space' or coordinate space is not a vector space. The zero vector $\vec 0$ is the one that leaves other vectors unchanged upon addition: $\vec 0 + \vec x = \vec x$. Clearly this depends on the coordinate system and violates our intuition that physics should be independent of the choice of coordinates. Further, the notion of `adding positions' is physically nonsensical. On the contrary, differences of positions $\Delta{x} = \vec{x}_1-\vec{x}_2$ \emph{are} meaningful. For example, the force between two non-relativistic point particles depends the vector that is the difference between the particle positions.
\end{example}

% https://twitter.com/FlipTanedo/status/1690107498039230464

\begin{exercise}\label{ex:color:space} \textbf{Color Space}. Colors for digital media have many representations. One popular representation is \acro{RGB} where the intensity of red, green, and blue components are specified. For example, $(25,174,40)$ is a pleasant shade of green. This triplet of numbers certainly looks like a vector: it is an ordered collection of numbers. Is the space of \acro{RGB} colors a vector space? \emph{Answer: ``kind  of.'' Make a list of reasons why and why not.} Like all meaningful questions, this rabbit hole runs deep; for example, see articles on the geometry of colors~\sidecite{weinberg1976geometry} or the overlapping sciences of physics and neurological perception of color~\sidecite{Logvinenko:2022}. To get you started: the reason why we have three primary colors is that we have three types of cone cells that detect color. Our neurological color response is an integral of an electromagnetic spectrum with the sensitivity of each of these cone types. In this way, each cone type defines a basis for the space of color responses.
\end{exercise}

\section{Notation for Vectors}
\label{sec:vector:notation}

Physicists use a few different notations for vectors depending on the context. In these notes we also use whichever notation is most convenient for the task and we are not ashamed to change notation as needed. Students should be nimble to be able to make use of different notations.\footnote{As a student I was once very because a textbook introduced the notation $\overleftrightarrow{\partial}$. Only much later would I come to appreciate the perspicacity of that symbol.} It is absolutely critical that even though we can refer to a mathematical object with different notation, the underlying idea is the same. This insight is the key to never getting lost in the forest of special functions (Bessel, Legendre, spherical harmonics, etc.) that appear in physics.

\subsection{Bold/over-arrow notation} Thus far we refer to vectors with a boldfaced and italicized symbol,\footnote{See \acro{ISO 80000-2:2019} from the International Organization for Standardization.} $\vec{v}$. For the author this is a comfortable and familiar notation from school. On a board, we sometimes write the vector with an underline $\underline{v}$ since boldfaced can be difficult to write freehand. Another notation is to use arrows over the symbol, $\overrightarrow{\mathbf{v}}$. %Tensor $\tensor{T}$. ISO 80000-2:2019

\subsection{Ket notation} Bra--ket notation is particularly useful in physics and are familiar from quantum mechanics. Vectors are \emph{kets}, $\ket{v}$. As we review below, `row vectors' are \emph{bras}, $\bra{w}$. This is all a tongue-in-cheek joke because you can combine a bra and a ket to form a \emph{braket}, $\langle w \,|\, v\rangle$, which we identify as the inner product (dot product) of two vectors $\langle w, v\rangle = \vec{w}\cdot\vec{v}$.

\subsection{Index notation} A common metaphorical extension\footnote{\url{https://en.wikipedia.org/wiki/Metaphorical_extension}} in physics is the notation $v^i$. Technically, $v^i$ refers to the $i^\text{th}$ component of the vector $\vec{v}$. However, since physicists often write their equations component-wise, we often slip into the shorthand of using $v^i$ to mean either a given component or the entire vector. The appropriate meaning is usually clear from context. One may formalize this shorthand in so-called abstract index notation or `slot-naming' index notation.\footnote{\url{https://en.wikipedia.org/wiki/Abstract_index_notation}}

\subsection{Index-free notation}
Mathematicians prefer an index-free notation where vectors and tensors are understood as linear maps. As physicists, we usually \emph{like} indices because they often indicate how an object transforms with respect to a physical symmetry transformation. We occasionally write tensorial (objects with indices) objects without indices when it is clear that the indices are implicit.
%
% We identify functions as vectors in an infinite dimensional space. In this case, it is often sufficient to avoid any additional adornment, so we write a function as $f$. The analog of a component of a vector, $v^i$, is the function at a point, $f(x)$.


\section{Basis Vectors}


The number of vectors in a vector space is formally infinite. If $\vec v$ is a vector, then so is $2v$ and $3v$, not to mention $0.999\vec{v}$, ad nauseum. For finite-dimensional vector sapces, we can pick a finite number reference vectors and define any other vector with respect to those reference vectors. We call those reference vectors \textbf{basis vectors}
% $\hat{\vec{e}}_{i}$
$\basis{i}$
and the entire set a \textbf{basis}\index{basis} for the vector space. We may express any vector $\vec v$ in the vector space as a linear combination of basis vectors:
\begin{align}
  \vec{v} = \sum_i v^i
  % \hat{\vec{e}}_{(i)}
  \basis{i}
  \ .
\end{align}
\begin{example}
In two dimensional Euclidean space, $\mathbbm{R}^2$ we have a canonical basis
\begin{align}
 % \hat{\vec{e}}_{(1)}
 \basis{1}
 \equiv
  \begin{pmatrix}
    1\\0
  \end{pmatrix}
  &&
 % \hat{\vec{e}}_{(2)}
 \basis{2}
 \equiv
  \begin{pmatrix}
    0\\1
  \end{pmatrix} \ .
\end{align}
\emph{Canonical} is just a fancy way to say `\emph{the obvious choice}.' The first basis vector points in the $x$ direction, the cond points in the $y$ direction. We can thus represent a vector $\vec{v}$ as a linear combination,
\begin{align}
  \vec{v} =
  \begin{pmatrix}
    v^1 \\ v^2
  \end{pmatrix}
  =
  v^1
  % \hat{\vec{e}}_{(1)}
  \basis{1}
  +
  v^2
  % \hat{\vec{e}}_{(2)}
  \basis{2}
  \ .
\end{align}
We could have defined a different basis, for example
\begin{align}
 % \hat{\vec{e}}_{(1)}'
 \basis{1}'
 \equiv
  \frac{1}{\sqrt{2}}
  \begin{pmatrix}
    1\\1
  \end{pmatrix}
  &&
 % \hat{\vec{e}}_{(2)}'
 \basis{2}'
 \equiv
  \frac{1}{\sqrt{2}}
  \begin{pmatrix}
    \phantom{+}1\\-1
  \end{pmatrix} \ .
\end{align}
In this primed basis the vector $\vec{v}$ would have different components,
\begin{align}
  \vec{v} = v'^1
  % \hat{\vec{e}}_{(1)}'
  \basis{1}'
  +
  % v'^2\hat{\vec{e}}_{(2)}'
  \basis{2}'
  \ ,
\end{align}
where clearly $v'^i \neq v^i$.
\end{example}
\begin{exercise}
Suppose $v^1 = 3$ and $v^2 = -4.5$ in the unprimed basis from the example above. Find the corresponding primed components $v'^1$ and $v'^2$. You do not have to do this in any fancy systematic way. \emph{Suggestion}: just draw the vectors on $\mathbbm{R}^2$. You do not often get to do this, especially with more abstract vectors. But when you can do something the simple way, you should do it that way and then think about how to generalize `the simple way.'
\end{exercise}

The number of basis vectors required to describe any vector is called the \textbf{dimension}\index{dimension} of the vector space. The dimension of $\mathbbm{R}^2$ is two. If you specify fewer basis vectors than the dimension of the vector space, then there are vectors that you cannot describe. If you specify more basis vectors than the dimension of the vector space, then there is not a unique way to specify the vector components.\footnote{\emph{Then shalt thou count to three, no more, no less. Three shall be the number thou shalt count, and the number of the counting shall be three. Four shalt thou not count, neither count thou two, excepting that thou then proceed to three. Five is right out.} (\emph{Monty Python \& The Holy Grail}, 1975)}
\begin{example}
In $\mathbbm{R}^2$, suppose we specified \emph{three} basis vectors,
\begin{align}
 % \hat{\vec{e}}_{(1)}
 \basis{1}
 \equiv
  \begin{pmatrix}
    1\\0
  \end{pmatrix}
  &&
 % \hat{\vec{e}}_{(2)}
 \basis{2}
 \equiv
  \begin{pmatrix}
    0\\1
  \end{pmatrix}
  &&
 % \hat{\vec{e}}_{(3)}
 \basis{3}
 \equiv
  \frac{-1}{\sqrt{2}}
  \begin{pmatrix}
    1\\1
  \end{pmatrix}
  \ .
\end{align}
The vector $\vec{v} =\hat{\vec{e}}_{(1)} +\hat{\vec{e}}_{(2)}$ can equivalently be written as $\vec{v} = -\sqrt{2}\vec{e}_{(3)}$.
\end{example}
\begin{exercise}
In the example above, write $\vec{v}$ with respect to the three basis vectors in a way that has not yet been specified. Repeat this exercise until it is obvious that there are an infinite number of ways of writing $\vec{v}$ with respect to the three basis vectors. Contrast this to the case where we restrict to any pair of the basis vectors, in which case the components of $\vec{v}$ are unique.
\end{exercise}

\section{Nice basis vectors}
\label{sec:nice:basis}

You have likely been trained to \emph{assume} that a basis is \emph{nice}, following the notion of assumed \emph{niceness}\footnote{Section~\ref{sec:niceness}.} in physics. As we move towards abstract vector spaces, it is worth explicitly stating these assumptions so that we can make a note of where they may break and what other mathematical structure we need to define them. The two assumptions are linear independence and orthonormality.

\subsection{Linear independence}

Two vectors $\vec{v}$ and $\vec{w}$ are \textbf{linearly independent}\index{linear independence} if they are not proportional to each other: $\vec{v} \neq \alpha \vec{w}$ for any number $\alpha$. The basis vectors for any reasonable basis are linearly independent---any given basis vector $\vec{e}_{(i)}$ cannot be written as a linear combination of the other basis vectors:
\begin{align}
\hat{\vec{e}}_{(i)} \neq \sum_{i\neq j}\hat{\vec{e}}_{(j)} \ ,
\end{align}
where the sum is over all basis vectors $\vec{e}_{(j)}$ except the $i^\text{th}$ basis vector. It should be obvious\footnote{In the sense of Section~\ref{sec:obvious}.} that a proposed basis with a linearly dependent basis vector
\begin{enumerate}
  \item Does not uniquely define the components of some vectors $\vec{v}$ in the vector space.
  \item Either has more basis vectors than the dimension of the space, or there are vectors in the space that cannot be described by the basis.
\end{enumerate}
Thus every basis vector in any reasonable basis is linearly independent from the other basis vectors.

\subsection{Orthonormality}

Okay, this is actually two different conditions: orthogonal and normal. The basis vectors of a nice basis are \emph{orthogonal} to every other basis vector and \emph{normalized} to have unit length. \textbf{Orthogonal} means that the vectors are perpendicular\index{orthogonal}.

\emph{Eh...} then what do we mean by \emph{perpendicular}? We certainly have some notion of two directions being perpendicular from everyday life: north is perpendicular to west because if we move five steps north we are stationary in the east--west direction. But how do we define this mathematically? In fact, while we are at it: how do we define `unit length' with respect to these vectors?\footnote{From a physics perspective: in what units?}

\begin{example}
Orthogonality and linear independence are related but are not the same. Orthogonal vectors are linearly independent, but linear independence does not imply orthogonality. For example, consider the basis of $\mathbbm{R}^2$:
\begin{align}
 \hat{\vec{e}}_{(1)} \equiv
  \begin{pmatrix}
    1\\0
  \end{pmatrix}
  &&
 \hat{\vec{e}}_{(2)} \equiv
  \begin{pmatrix}
    1\\1
  \end{pmatrix}
  \label{eq:nice:basis:eg:e1e2}
  \ .
\end{align}
These vectors are obviously linearly independent. Assuming the usual Euclidean inner product, the are also \emph{not} orthogonal. The observant student will notice that linear independence does not require one to define an inner product, whereas orthogonality is only defined with respect to some inner product.
\end{example}
\begin{exercise}
Let $\vec{v}$ be the vector that is pointing in the $\hat{\vec{x}}$ direction of $\mathbbm{R}^2$.  What are the components of $\vec{v}$ with respect to the basis in \eqref{eq:nice:basis:eg:e1e2}? Similarly, let $\vec{w}$ be the vector that is pointing in the $\hat{\vec{y}}$ direction of $\mathbbm{R}^2$. What are the components of $\vec{w}$ with respect to the basis in \eqref{eq:nice:basis:eg:e1e2}?
\end{exercise}
Unlike linear independence, orthonormality is not a strictly necessary condition for having a basis---though it is hard to imagine a scenario where one would \emph{not} use an orthonormal basis. However, the notions of orthogonality and normalization depend on \emph{additional} mathematical structure that we have to impose/assume/invent for our vector space. Formally, we say that we promote the vector space to a \textbf{metric space}\index{metric space}. The additional structure that we define is a machine that takes two vectors and tells us something about the `distance' between them. We call this machine the \textbf{metric}\index{metric}, \textbf{inner product}\index{inner product}, or \textbf{dot product}\index{dot product}; each phrase refers to the same thing. Once we have a metric, the Gram--Schmidt procedure assures us that we can construct an orthonormal basis from a linearly independent basis, see Exercise~\ref{ex:gram:schmidt}.


At this point we should take a deep breath and state explicitly that we’ve been assuming an orthonormal basis. In this course we will continue to use an orthonormal basis. You may object to this and say that you used to believe in orthonormal bases until you were forced to write down the gradient (or worse, the Laplacian) in spherical coordinates.

\begin{exercise}
Are polar coordinates an orthonormal basis? You can generalize this to spherical or cylindrical coordinates. Take a moment to think about this. Do vectors in polar coordinates even make sense as vectors? Do we have a sensible addition rule? These questions are unfair because they make the assumption that the vector space described by these coordinates is ``the same'' as the coordinate space. This seems perfectly innocuous until you have learned ab it differential geometry or general relativity. If this piques your interest, I encourage you to read more about this.\footnote{A good reference that presents both the physical intuition and mathematical formalism is Sean Carroll's \emph{Spacetime and Geometry}; Carroll's appendices are an excellent crash course in differential geometry.}
\end{exercise}


There are many things to be said about non-Cartesian (``curvilinear'') coordinate systems and orthonormality. None of them are particularly edifying without a full discussion. With no apologies, I make the following [perhaps perplexing] remarks, illustrated in a figure below:
\begin{enumerate}
\item There is no such thing as a `position vector.' Positions refer to some base space, whereas vectors (like differential operators) act on the tangent space at a point of that base space.
\item A given tangent space is `nice’ and has a nice orthonormal basis.
\item That basis may not be the same for neighboring tangent spaces (perhaps due to coordinates, perhaps due to intrinsic curvature).
\end{enumerate}
In this course these nuances will not come up. In the rest of your life you will still have to deal with curvilinear coordinates.\footnote{There is an excellent discussion of non-coordinate bases and curvilinear coordinates in Bernard Schutz's \emph{A First Course in General Relativity}. See Chapter 5.6 (in the first edition), ``Noncoordinate bases.''} Suffice it to say that our study of function space will be nice an orthonormal. %We haven’t yet given an adequate definition of `orthonormality,’ so let's take \eqref{eq:basis:dual:vec:act:on:vec} as a working definition.

\begin{center}
\includegraphics[width=.7\textwidth]{figures/Lec_2021_tangentS2.pdf}
\end{center}


\section{Examples of Finite-Dimensional Vector Spaces}

\subsection{Space and spacetime vectors}
There are some obvious examples of vector spaces. The one you are most used to is $\mathbbm{R}^3$, the `ordinary' (Cartesian) three-dimensional space. The components of $\vec{v}\in \mathbbm{R}^3$ are simply three real numbers $v^i$ multiplying the `obvious' basis vectors:
\begin{align}
   v^i \hat{\vec{e}}_{(i)}
   =
   v^1
   \begin{pmatrix}
     1 \\ 0 \\ 0
   \end{pmatrix}
   +
   v^2
   \begin{pmatrix}
     0 \\ 1 \\ 0
   \end{pmatrix}
   +v^3
   \begin{pmatrix}
     0 \\ 0 \\ 1
   \end{pmatrix} \ .
 \end{align}
 We can generalized to $\mathbbm{R}^n$ with $n$ basis vectors. From special relativity you may be familiar with four-vectors with components $(p^0, p^1, p^2, p^3)$. We index starting with a zero for historical conventions, but it serves to indicate that the \emph{timelike} component $p^0$ is special. At this level, it looks like spacetime is $\mathbbm{R}^4$. An indeed, at this level that would be accurate. We will see in the next section that when we extend from a vector space to a \emph{metric space}, there is an important distinction and spacetime is in fact $\mathbbm{R}^{1,3}$.

This is a good place to remind ourselves that there is no such thing as a `position vector.' What people really mean by a position vector is the vector between some arbitrarily chosen origin and a given point on space.


There are more abstract versions of vector spaces.

\subsection{Matrices}
  The space of $2\times 2$ Hermitian\footnote{A complex matrix $A$ is Hermitian if $A^\dag = (A^\text{T})^* = A$.} matrices is a vector space with basis elements
 \begin{align}
   \mathbbm{1} &=
   \begin{pmatrix}
     1 & 0 \\
     0 & 1
   \end{pmatrix}
   &
   \sigma^1 &=
   \begin{pmatrix}
     0 & 1 \\
     1 & 0
   \end{pmatrix}
   &
   \sigma^2 &=
   \begin{pmatrix}
     0 & -i \\
     i &  0
   \end{pmatrix}
   &
   \sigma^3 &=
   \begin{pmatrix}
     1 & 0 \\
     0 & -1
   \end{pmatrix} \ .
 \end{align}
 Convince yourself that you can form any Hermitian $2\time 2$ matrix out of these four basis matrices. You may recognize the familiar Pauli matrices $\sigma^i$ from quantum mechanics.

\subsection{Quantum states}

While we have mentioned quantum mechanics, the space of quantum mechanical states is a kind of vector space. This is why the bra-ket notation that we usually learn in quantum mechanics is equally applicable to linear algebra. In fact, the big \emph{aha!} moment of the first time you learned quantum mechanics should have been when you realized that quantum mechanics is simply physicists doing linear algebra. As a bonus, quantum mechanics has primed us to already be comfortable with \emph{complex} vector spaces.

There are a few caveats. In quantum mechanics two states that differ by a phase are equivalent. This means that a state is not just a given vector $\ket\psi$ in the complex vector space, but a \emph{ray} where $\ket\psi$ is identified\footnote{We use the symbol $\cong$ to mean `identified with.'} with all rephasings, $\ket\psi \cong e^{i\theta}\ket\psi$. This identification simply means that two vectors that only differ by a phase correspond to the same quantum state: for example, they correspond to the same observable (if the states are directly observable).

\subsection{Functions}

This is a powerful idea hidden behind a trivial idea. Functions are vectors in a vector space that we call \emph{function space}\index{function space}. I am pretty sure that mathematicians do not call it that, but we use this terminology anyway.  This should be \emph{obviously} true. Consider two functions $f$ and $g$. Maybe $f(x) = x^3$ and $g(x) = x + 2$, it does not matter. Clearly you can take a linear combination of the two and the result is a function:
\begin{align}
  \alpha f(x) + \beta g(x) = \alpha x^3 + \beta x + 2\beta \ .
\end{align}
This is a totally valid function that we could call $(\alpha f + \beta g)(x)$.

\begin{exercise}
What are some possible bases for function space?
\end{exercise}

A judicious choice of basis for function space can make our lives much easier. This is a central theme of this course and justifies the beastiary of special functions that you meet in graduate school. Let us ignore the subtleties of defining a function space for now---we get to this soon enough. The following example gives a taste of what it means to work with functions as vectors.


\begin{exercise}
%https://math.stackexchange.com/questions/942263/really-advanced-techniques-of-integration-definite-or-indefinite/943212

Here is a cute two-dimensional function space that gives us a shortcut to calculate a particular \emph{indefinite} integral.  Consider a two dimensional vector space spanned by the functions
\begin{align}
  \left|f_1\right\rangle
  &= f_1(x) =
  e^{ax} \cos bx
  &
  \left|f_2\right\rangle
  &=
  f_2(x) =
  e^{ax} \sin bx \ ,
\end{align}
where $a$ and $b$ are constants. Forget orthonormality or boundary conditions for this problem. The derivative $d/dx$ is a linear operator that acts on this space. Write down the derivative as a $2\times 2$ matrix in the above basis, $D$.

Invert $D$ in the usual way that you learned to invert $2\times 2$ matrices during your childhood\footnote{Stuck? Here's a life pro tip: \url{http://bfy.tw/KG2Z}}. Call this matrix $D^{-1}$.

Now stop and think: the inverse of a derivative is an indefinite integral\footnote{Ignore the constant term.}. Thus acting with $D^{-1}$ on the vector $|f_1\rangle$ should be understood as an integral of $f_1(x)$. Show that, indeed,
\begin{align}
  D^{-1} |f_1\rangle = \int dx\, e^{ax} \cos bx \ .
\end{align}
Feel free to use \emph{Mathematica} to do the indefinite integral on the right-hand side. Pat yourself on the back if you can do it without a computer.
\end{exercise}

\subsection{More exotic examples}

We saw in Example~\ref{ex:color:space} that color space is almost---but not quite---a vector space. There are, however, plenty of unusual vector spaces.

One fun example is the space of Fibbonacci sequences. These are sequences of numbers $\{x_1, x_2, x_3, \cdots \}$ such that $x_n = x_{n-1} + x_{n-2}$. You generate a Fibbonacci sequence by picking some $x_1$ and $x_2$ and determining all subsequent values using the defining rule.
\begin{exercise}
Show that the Fibbonacci sequences form a vector space. What is the dimension of this vector space? Hint: the dimension is \emph{not infinite}.
\end{exercise}

Another example that shows up in mathematical physics is something called a \textbf{root space}. This is a space spanned by a finite number of lattice basis vectors. The space is defined so that vectors are \emph{integer} multiples of the basis vectors. Surprisingly, root space has an intimate connection to the study of continuous symmetries---the representation theory of Lie groups.

Along these lines, you could consider the vector space of $n$-bits, $(\mathbbm{Z}_2)^n$. The bits are either zero or one and you impose modular arithmetic.
%
You can find several other great examples by judicious Googling.\footnote{I grabbed some of these examples from \url{https://math.stackexchange.com/q/5233/1032899}.}
% fibbonacci sequences as an example
% https://math.stackexchange.com/questions/2738065/fibonacci-sequences-as-a-vector-space

%% Other great examples
% https://math.stackexchange.com/questions/5233/vivid-examples-of-vector-spaces
% simplices


\section{The metric, inner product, or dot product}

The \textbf{metric} (inner/dot product product) on a real vector space $V$ is a bilinear\footnote{This simply means linear in each argument.} map from $V\times V\to \mathbbm{R}$.\footnote{We also care about \emph{complex} vector spaces, in which case the metric is a map from $V\times V\to \mathbbm{C}$.} This means that it is a machine that takes two vectors and returns out a number. We use the metric to measure one vector with respect to another. We further impose that the metric is symmetric: it should not matter whether we measure $\vec{v}$ with respect to $\vec{w}$ or vice versa: their `overlap' should be the same.
%
We use the angle bracket notation where $\langle \vec{v},\vec{w}\rangle$ is the inner product of two vectors $\vec{v}$ and $\vec{w}$.\footnote{This is deliberately suggestive of the ket notation for vectors, Sec.~\ref{sec:vector:notation}.}
%
Summarizing the above properties in equations:
\begin{align}
  \langle \alpha \vec{v}+\beta\vec{w}, \vec{u} \rangle &=
  \alpha \langle \vec{v},\vec{u}\rangle +
  \beta \langle \vec{w},\vec{u}\rangle
  \\
  \langle \vec{v}, \alpha \vec{w} + \beta \vec{u} \rangle &=
  \alpha \langle \vec{v},\vec{w}\rangle +
  \beta \langle \vec{v},\vec{u}\rangle
  \\
  \langle \vec{v},\vec{w}\rangle  &=
  \langle \vec{w},\vec{v}\rangle  \ .
\end{align}
The \textbf{norm}\index{norm} of a vector is simply its length with respect to the metric. We write the norm of a vector $\vec{v}$ as $|\vec{v}|$, or sometimes as $||\vec{v}||$ when we really want to be fancy. The norm is simply the square root of the inner product of the vector with itself:
\begin{align}
  |\vec{v}| = \sqrt{\langle \vec{v}, \vec{v}\rangle} \ .
\end{align}
Because we want lengths to make sense, we make a further requirement that any sensible metric is \emph{positive definite},\footnote{Mathematicians use the adjective \textbf{Riemannian}\index{Riemannian} to mean `positive definite' when applied to metrics. Physicists often assume that their metrics are Riemannian, with the exception of spacetime, which is semi-Riemannian... because time is funny.}
\begin{align}
  \langle \vec{v},\vec{v}\rangle > 0
\end{align}
for any vector $\vec{v}\in V$.
%
Both the angle bracket notation $\langle \vec v, \vec w \rangle$ and the dot product notation $\vec v \cdot \vec w$ make it clear that the metric is an operation between two vectors. There is another notation that highlights the idea of the metric as a map/function on $V\times V$:
\begin{align}
  g(\vec v, \vec w) = \langle \vec v, \vec w \rangle \ .