From c7f62cff53d646158fe888b97118bff039f592f4 Mon Sep 17 00:00:00 2001
From: eoropeza <ernesto.oropeza@ischool.berkeley.edu>
Date: Sat, 27 Mar 2021 21:58:36 -0500
Subject: [PATCH 1/9] tsne update

---
 classification/tsne_from_main_train.py | 726 +++++++++++++++++++++++++
 1 file changed, 726 insertions(+)
 create mode 100644 classification/tsne_from_main_train.py

diff --git a/classification/tsne_from_main_train.py b/classification/tsne_from_main_train.py
new file mode 100644
index 0000000..c52a9e2
--- /dev/null
+++ b/classification/tsne_from_main_train.py
@@ -0,0 +1,726 @@
+# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
+# Modified by Yu-Lun Wu, TUM
+
+import os
+import argparse
+import numpy as np
+from datetime import datetime 
+from tqdm import tqdm
+
+
+import matplotlib.pyplot as plt
+from matplotlib import colors
+import matplotlib
+
+
+# import sklearn
+from sklearn.manifold import TSNE
+
+import torch
+import torch.optim as optim 
+import torchvision.transforms as transforms
+import torch.backends.cudnn as cudnn
+from torch.utils.data import DataLoader
+# from tensorboardX import SummaryWriter
+
+import shutil 
+import sys
+sys.path.append('../')
+
+
+from dataset import SEN12MS, ToTensor, Normalize
+from models.VGG import VGG16, VGG19
+from models.ResNet import ResNet50, ResNet101, ResNet152, Moco, Moco_1x1, ResNet50_1x1
+from models.DenseNet import DenseNet121, DenseNet161, DenseNet169, DenseNet201
+from metrics import MetricTracker, Precision_score, Recall_score, F1_score, \
+    F2_score, Hamming_loss, Subset_accuracy, Accuracy_score, One_error, \
+    Coverage_error, Ranking_loss, LabelAvgPrec_score, calssification_report, \
+    conf_mat_nor, get_AA, multi_conf_mat, OA_multi
+
+import wandb
+
+#sec.2 (done)
+    
+model_choices = ['VGG16', 'VGG19',
+                 'ResNet50','ResNet101','ResNet152',
+                 'DenseNet121','DenseNet161','DenseNet169','DenseNet201', 'Moco']
+label_choices = ['multi_label', 'single_label']
+
+# ----------------------- define and parse arguments --------------------------
+parser = argparse.ArgumentParser()
+
+# experiment name
+parser.add_argument('--exp_name', type=str, default=None,
+                    help="experiment name. will be used in the path names \
+                         for log- and savefiles. If no input experiment name, \
+                         path would be set to model name.")
+
+# data directory
+parser.add_argument('--data_dir', type=str, default=None,
+                    help='path to SEN12MS dataset')
+parser.add_argument('--label_split_dir', type=str, default=None,
+                    help="path to label data and split list")
+parser.add_argument('--data_size', type=str, default="full",
+                    help="64, 128, 256, 1000, 1024, full")
+# input/output
+parser.add_argument('--use_s2', action='store_true', default=False,
+                    help='use sentinel-2 bands')
+parser.add_argument('--use_s1', action='store_true', default=False,
+                    help='use sentinel-1 data')
+parser.add_argument('--use_RGB', action='store_true', default=False,
+                    help='use sentinel-2 RGB bands')
+parser.add_argument('--IGBP_simple', action='store_true', default=True,
+                    help='use IGBP simplified scheme; otherwise: IGBP original scheme')
+parser.add_argument('--label_type', type=str, choices = label_choices,
+                    default='multi_label',
+                    help="label-type (default: multi_label)")
+parser.add_argument('--threshold', type=float, default=0.1, 
+                    help='threshold to convert probability-labels to multi-hot \
+                    labels, mean/std for normalizatin would not be accurate \
+                    if the threshold is larger than 0.22. \
+                    for single_label threshold would be ignored')
+parser.add_argument('--eval', action='store_true', default=False,
+                    help='evaluate against test set')
+
+# network
+parser.add_argument('--model', type=str, choices = model_choices,
+                    default='ResNet50',
+                    help="network architecture (default: ResNet50)")
+
+# training hyperparameters
+parser.add_argument('--lr', type=float, default=0.001, 
+                    help='initial learning rate')
+parser.add_argument('--decay', type=float, default=1e-5, 
+                    help='decay rate')
+parser.add_argument('--batch_size', type=int, default=64,
+                    help='mini-batch size (default: 64)')
+parser.add_argument('--num_workers',type=int, default=4,
+                    help='num_workers for data loading in pytorch')
+parser.add_argument('--epochs', type=int, default=100,
+                    help='number of training epochs (default: 100)')
+parser.add_argument('--resume', '-r', type=str, default=None,
+                    help='path to the pretrained weights file', )
+parser.add_argument('--pt_dir', '-pd', type=str, default=None,
+                    help='directory for pretrained model', )
+parser.add_argument('--pt_name', '-pn', type=str, default=None,
+                    help='model name without extension', )
+parser.add_argument('--pt_type', '-pt', type=str, default=None,
+                    help='model name without extension', )
+
+args = parser.parse_args()
+
+wandb.init(config=args)
+
+# -------------------- set directory for saving files -------------------------
+
+if wandb.run is not None:
+    # save to wandb run dir for tracking and saving the models
+    checkpoint_dir = wandb.run.dir
+    logs_dir = wandb.run.dir
+elif args.exp_name:
+    checkpoint_dir = os.path.join('./', args.exp_name, 'checkpoints')
+    logs_dir = os.path.join('./', args.exp_name, 'logs')
+else:
+    checkpoint_dir = os.path.join('./', args.model, 'checkpoints')
+    logs_dir = os.path.join('./', args.model, 'logs')
+
+if not os.path.isdir(checkpoint_dir):
+    os.makedirs(checkpoint_dir)
+if not os.path.isdir(logs_dir):
+    os.makedirs(logs_dir)
+
+# ----------------------------- saving files ---------------------------------
+def write_arguments_to_file(args, filename):
+    with open(filename, 'w') as f:
+        for key, value in vars(args).items():
+            f.write('%s: %s\n' % (key, str(value)))
+
+def save_checkpoint(state, is_best, name):
+
+    filename = os.path.join(checkpoint_dir, name + '_checkpoint.pth')
+
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, os.path.join(checkpoint_dir, name + 
+                                               '_model_best.pth'))
+        
+# -------------------------------- Main Program -------------------------------
+def main():
+    global args
+    
+    # save configuration to file
+    sv_name = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')
+    print('saving file name is ', sv_name)
+
+    write_arguments_to_file(args, os.path.join(logs_dir, sv_name+'_arguments.txt'))
+
+# ----------------------------------- data
+    # define mean/std of the training set (for data normalization)
+    label_type = args.label_type
+        
+    bands_mean = {'s1_mean': [-11.76858, -18.294598],
+                  's2_mean': [1226.4215, 1137.3799, 1139.6792, 1350.9973, 1932.9058,
+                              2211.1584, 2154.9846, 2409.1128, 2001.8622, 1356.0801]}
+                  
+    bands_std = {'s1_std': [4.525339, 4.3586307],
+                 's2_std': [741.6254, 740.883, 960.1045, 946.76056, 985.52747,
+                            1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]} 
+
+    print(os.listdir(args.data_dir))
+    
+    # load datasets 
+    imgTransform = transforms.Compose([ToTensor(),Normalize(bands_mean, bands_std)])
+    
+    # train_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
+    #                         imgTransform=imgTransform,
+    #                         label_type=label_type, threshold=args.threshold, subset="train",
+    #                         use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
+    #                         IGBP_s=args.IGBP_simple, data_size=args.data_size)
+    #
+    # val_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
+    #                       imgTransform=imgTransform,
+    #                       label_type=label_type, threshold=args.threshold, subset="val",
+    #                       use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
+    #                       IGBP_s=args.IGBP_simple, data_size=args.data_size)
+
+    if args.eval:
+        test_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
+                               imgTransform=imgTransform,
+                               label_type=label_type, threshold=args.threshold, subset="test",
+                               use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
+                               IGBP_s=args.IGBP_simple)
+
+    # number of input channels
+    # n_inputs = train_dataGen.n_inputs
+    n_inputs = test_dataGen.n_inputs
+    # print('input channels =', n_inputs)
+    
+    # set up dataloaders
+    # train_data_loader = DataLoader(train_dataGen,
+    #                                batch_size=args.batch_size,
+    #                                num_workers=args.num_workers,
+    #                                shuffle=True,
+    #                                pin_memory=True)
+    # val_data_loader = DataLoader(val_dataGen,
+    #                              batch_size=args.batch_size,
+    #                              num_workers=args.num_workers,
+    #                              shuffle=False,
+    #                              pin_memory=True)
+
+    if args.eval:
+        test_data_loader = DataLoader(test_dataGen,
+                                  batch_size=args.batch_size,
+                                  num_workers=args.num_workers,
+                                  shuffle=False,
+                                  pin_memory=True)
+
+# -------------------------------- ML setup
+    # cuda
+    use_cuda = torch.cuda.is_available()
+    if use_cuda:
+        torch.backends.cudnn.enabled = True
+        cudnn.benchmark = True
+
+    # define number of classes
+    if args.IGBP_simple:
+        numCls = 10
+        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
+    else:
+        numCls = 17
+        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+                      '11', '12', '13', '14', '15', '16', '17']
+    
+    print('num_class: ', numCls)
+
+    # define model
+    if args.model == 'VGG16':
+        model = VGG16(n_inputs, numCls)
+    elif args.model == 'VGG19':
+        model = VGG19(n_inputs, numCls)
+    elif args.model == 'ResNet50':
+        model = ResNet50(n_inputs, numCls)
+    elif args.model == 'ResNet101':
+        model = ResNet101(n_inputs, numCls)
+    elif args.model == 'ResNet152':
+        model = ResNet152(n_inputs, numCls)
+    elif args.model == 'DenseNet121':
+        model = DenseNet121(n_inputs, numCls)
+    elif args.model == 'DenseNet161':
+        model = DenseNet161(n_inputs, numCls)
+    elif args.model == 'DenseNet169':
+        model = DenseNet169(n_inputs, numCls)
+    elif args.model == 'DenseNet201':
+        model = DenseNet201(n_inputs, numCls)
+    # finetune moco pre-trained model
+    elif args.model == 'Moco':
+        pt_path = os.path.join(args.pt_dir, f"{args.pt_name}_{args.pt_type}_converted.pth")
+        assert os.path.exists(pt_path)
+        model = Moco(torch.load(pt_path), n_inputs, numCls)
+    else:
+        raise NameError("no model")
+
+    # move model to GPU if is available
+    if use_cuda:
+        model = model.cuda() 
+
+    # # define loss function
+    # if label_type == 'multi_label':
+    #     lossfunc = torch.nn.BCEWithLogitsLoss()
+    # else:
+    #     lossfunc = torch.nn.CrossEntropyLoss()
+    #
+    print(model)
+
+    # model.encoder = torch.nn.Sequential(*[model.encoder[i] for i in range(8)])
+    print(model.encoder)
+
+    # print(model)
+    # # set up optimizer
+    # optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
+
+    # best_acc = 0
+    # start_epoch = 0
+    # if args.resume:
+    #     if os.path.isfile(args.resume):
+    #         print("=> loading checkpoint '{}'".format(args.resume))
+    #         checkpoint = torch.load(args.resume)
+    #         checkpoint_nm = os.path.basename(args.resume)
+    #         sv_name = checkpoint_nm.split('_')[0] + '_' + checkpoint_nm.split('_')[1]
+    #         print('saving file name is ', sv_name)
+    #
+    #         if checkpoint['epoch'] > start_epoch:
+    #             start_epoch = checkpoint['epoch']
+    #         best_acc = checkpoint['best_prec']
+    #         model.load_state_dict(checkpoint['state_dict'])
+    #         optimizer.load_state_dict(checkpoint['optimizer'])
+    #         print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
+    #     else:
+    #         print("=> no checkpoint found at '{}'".format(args.resume))
+
+
+    # set up tensorboard logging
+    # train_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'training'))
+    # val_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'val'))
+
+
+# ----------------------------- executing Train/Val. 
+    # train network
+
+    # wandb.watch(model, log="all")
+    # for epoch in range(start_epoch, args.epochs):
+    #
+    #     print('Epoch {}/{}'.format(epoch, args.epochs - 1))
+    #     print('-' * 10)
+    #
+    #     train(train_data_loader, model, optimizer, lossfunc, label_type, epoch, use_cuda)
+    #     micro_f1 = val(val_data_loader, model, optimizer, label_type, epoch, use_cuda)
+    #
+    #     is_best_acc = micro_f1 > best_acc
+    #     best_acc = max(best_acc, micro_f1)
+    #
+    #     save_checkpoint({
+    #         'epoch': epoch,
+    #         'arch': args.model,
+    #         'model_state_dict': model.state_dict(),
+    #         'optimizer_state_dict': optimizer.state_dict(),
+    #         'best_prec': best_acc
+    #         }, is_best_acc, sv_name)
+    #
+    #     wandb.log({'epoch': epoch, 'micro_f1': micro_f1})
+    #
+    # print("=============")
+    # print("done training")
+    # print("=============")
+
+    # model.eval()
+    model.encoder.eval()
+
+    y_true = []
+    predicted_output = []
+    with torch.no_grad():
+        for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
+
+            # unpack sample
+            bands = data["image"]
+            labels = data["label"]
+
+            # move data to gpu if model is on gpu
+            if use_cuda:
+                bands = bands.to(torch.device("cuda"))
+                # labels = labels.to(torch.device("cuda"))
+
+            # forward pass
+            logits = model.encoder(bands)
+
+            outputs = logits.cpu().numpy()
+            predicted_output += list(outputs.reshape(data['image'].shape[0],2048))
+
+            labels = labels.cpu().numpy()  # keep true & pred label at same loc.
+            y_true += list(labels)
+
+
+    X = np.array(predicted_output)
+    # print(f'Activation Vector Shape: {X.shape}')
+    # # images = np.concatenate(images)
+    # # print(f'Image Vector Shape: {images.shape}')
+    #
+    class_simp = ['Forest', 'Shrublands', 'Savana', 'Grassland', 'Wetlands',
+                  'Croplands', 'Urban and Built-Up Lands','Permanent Snow and Ice',
+                  'Barren','Water Bodies']
+
+    color10 = ['#009900','#c6b044','#fbff13', '#b6ff05',
+               '#27ff87','#c24f44','#a5a5a5','#69fff8',
+               '#f9ffa4','#1c0dff']
+
+    color = []
+    for i in range(10):
+        color.append(np.array(y_true)[:,i] == 1)
+    color = np.array(color)
+
+    tsne = TSNE(n_components=2, learning_rate=150, perplexity=30, angle=0.2, verbose=2).fit_transform(X)
+    #
+
+    samples = np.random.choice([True, False], size=X.shape[0], p=[1.0,0.0])
+    color_sam = color[:,samples]
+    tx, ty = tsne[samples,0], tsne[samples,1]
+    tx = (tx-np.min(tx)) / (np.max(tx) - np.min(tx))
+    ty = (ty-np.min(ty)) / (np.max(ty) - np.min(ty))
+    tsne[samples,0] = tx
+    tsne[samples,1] = ty
+
+
+
+    fig = plt.figure(figsize=(20,10))
+
+    for j in range(10):
+        plt.scatter(tsne[samples,0][color_sam[j]], tsne[samples,1][color_sam[j]],
+                    c=color10[j],label = class_simp[j])
+
+    plt.yticks([])
+    plt.xticks([])
+    plt.grid(False)
+    plt.title('TSNE Output MoCo', fontsize=14, color= 'black')
+    plt.legend()
+
+    # cb = plt.colorbar(cax = fig.add_axes([0.92, 0.2, 0.025, 0.6]))
+    # cb.set_ticklabels(class_simp)
+    # loc = np.arange(0.5,10.5,1)
+    # cb.set_ticks(loc)
+    plt.show()
+    print('done...')
+
+    # if args.eval:
+    #     eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS)
+
+# def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
+#
+#     model.eval()
+#     # define metrics
+#     prec_score_ = Precision_score()
+#     recal_score_ = Recall_score()
+#     f1_score_ = F1_score()
+#     f2_score_ = F2_score()
+#     hamming_loss_ = Hamming_loss()
+#     subset_acc_ = Subset_accuracy()
+#     acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
+#     one_err_ = One_error()
+#     coverage_err_ = Coverage_error()
+#     rank_loss_ = Ranking_loss()
+#     labelAvgPrec_score_ = LabelAvgPrec_score()
+#
+#     calssification_report_ = calssification_report(ORG_LABELS)
+#
+#     # -------------------------------- prediction
+#     y_true = []
+#     predicted_probs = []
+#
+#     with torch.no_grad():
+#         for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
+#
+#             # unpack sample
+#             bands = data["image"]
+#             labels = data["label"]
+#
+#             # move data to gpu if model is on gpu
+#             if use_cuda:
+#                 bands = bands.to(torch.device("cuda"))
+#                 # labels = labels.to(torch.device("cuda"))
+#
+#             # forward pass
+#             logits = model(bands)
+#
+#             # convert logits to probabilies
+#             if label_type == 'multi_label':
+#                 probs = torch.sigmoid(logits).cpu().numpy()
+#             else:
+#                 sm = torch.nn.Softmax(dim=1)
+#                 probs = sm(logits).cpu().numpy()
+#
+#             labels = labels.cpu().numpy()  # keep true & pred label at same loc.
+#             predicted_probs += list(probs)
+#             y_true += list(labels)
+#
+#     predicted_probs = np.asarray(predicted_probs)
+#     # convert predicted probabilities into one/multi-hot labels
+#     if label_type == 'multi_label':
+#         y_predicted = (predicted_probs >= 0.5).astype(np.float32)
+#     else:
+#         loc = np.argmax(predicted_probs, axis=-1)
+#         y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
+#         for i in range(len(loc)):
+#             y_predicted[i, loc[i]] = 1
+#
+#     y_true = np.asarray(y_true)
+#
+#     # --------------------------- evaluation with metrics
+#     # general
+#     macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+#     macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+#     macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+#     macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+#     hamming_loss = hamming_loss_(y_predicted, y_true)
+#     subset_acc = subset_acc_(y_predicted, y_true)
+#     macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+#     # ranking-based
+#     one_error = one_err_(predicted_probs, y_true)
+#     coverage_error = coverage_err_(predicted_probs, y_true)
+#     rank_loss = rank_loss_(predicted_probs, y_true)
+#     labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+#
+#     cls_report = calssification_report_(y_predicted, y_true)
+#
+#     if label_type == 'multi_label':
+#         [conf_mat, cls_acc, aa] = multi_conf_mat(y_predicted, y_true, n_classes=numCls)
+#         # the results derived from multilabel confusion matrix are not recommended to use
+#         oa = OA_multi(y_predicted, y_true)
+#         # this oa can be Jaccard index
+#
+#         info = {
+#             "macroPrec": macro_prec,
+#             "microPrec": micro_prec,
+#             "samplePrec": sample_prec,
+#             "macroRec": macro_rec,
+#             "microRec": micro_rec,
+#             "sampleRec": sample_rec,
+#             "macroF1": macro_f1,
+#             "microF1": micro_f1,
+#             "sampleF1": sample_f1,
+#             "macroF2": macro_f2,
+#             "microF2": micro_f2,
+#             "sampleF2": sample_f2,
+#             "HammingLoss": hamming_loss,
+#             "subsetAcc": subset_acc,
+#             "macroAcc": macro_acc,
+#             "microAcc": micro_acc,
+#             "sampleAcc": sample_acc,
+#             "oneError": one_error,
+#             "coverageError": coverage_error,
+#             "rankLoss": rank_loss,
+#             "labelAvgPrec": labelAvgPrec,
+#             "clsReport": cls_report,
+#             "multilabel_conf_mat": conf_mat,
+#             "class-wise Acc": cls_acc,
+#             "AverageAcc": aa,
+#             "OverallAcc": oa}
+#
+#     else:
+#         conf_mat = conf_mat_nor(y_predicted, y_true, n_classes=numCls)
+#         aa = get_AA(y_predicted, y_true, n_classes=numCls)  # average accuracy, \
+#         # zero-sample classes are not excluded
+#
+#         info = {
+#             "macroPrec": macro_prec,
+#             "microPrec": micro_prec,
+#             "samplePrec": sample_prec,
+#             "macroRec": macro_rec,
+#             "microRec": micro_rec,
+#             "sampleRec": sample_rec,
+#             "macroF1": macro_f1,
+#             "microF1": micro_f1,
+#             "sampleF1": sample_f1,
+#             "macroF2": macro_f2,
+#             "microF2": micro_f2,
+#             "sampleF2": sample_f2,
+#             "HammingLoss": hamming_loss,
+#             "subsetAcc": subset_acc,
+#             "macroAcc": macro_acc,
+#             "microAcc": micro_acc,
+#             "sampleAcc": sample_acc,
+#             "oneError": one_error,
+#             "coverageError": coverage_error,
+#             "rankLoss": rank_loss,
+#             "labelAvgPrec": labelAvgPrec,
+#             "clsReport": cls_report,
+#             "conf_mat": conf_mat,
+#             "AverageAcc": aa}
+#
+#     wandb.run.summary.update(info)
+#     print("saving metrics...")
+#     # pkl.dump(info, open("test_scores.pkl", "wb"))
+#
+#
+# def train(trainloader, model, optimizer, lossfunc, label_type, epoch, use_cuda):
+#
+#     lossTracker = MetricTracker()
+#
+#     # set model to train mode
+#     model.train()
+#
+#
+#     # main training loop
+#     for idx, data in enumerate(tqdm(trainloader, desc="training")):
+#
+#         numSample = data["image"].size(0)
+#
+#         # unpack sample
+#         bands = data["image"]
+#         if label_type == 'multi_label':
+#             labels = data["label"]
+#         else:
+#            labels = (torch.max(data["label"], 1)[1]).type(torch.long)
+#
+#         # move data to gpu if model is on gpu
+#         if use_cuda:
+#             bands = bands.to(torch.device("cuda"))
+#             labels = labels.to(torch.device("cuda"))
+#
+#         # reset gradients
+#         optimizer.zero_grad()
+#
+#         # forward pass
+#         logits = model(bands)
+#         loss = lossfunc(logits, labels)
+#
+#         # backward pass
+#         loss.backward()
+#         optimizer.step()
+#
+#         #
+#         lossTracker.update(loss.item(), numSample)
+#
+#     # train_writer.add_scalar("loss", lossTracker.avg, epoch)
+#     wandb.log({'loss': lossTracker.avg, 'epoch': epoch})
+#
+#     print('Train loss: {:.6f}'.format(lossTracker.avg))
+#
+#
+# def val(valloader, model, optimizer, label_type, epoch, use_cuda):
+#
+#     prec_score_ = Precision_score()
+#     recal_score_ = Recall_score()
+#     f1_score_ = F1_score()
+#     f2_score_ = F2_score()
+#     hamming_loss_ = Hamming_loss()
+#     subset_acc_ = Subset_accuracy()
+#     acc_score_ = Accuracy_score()
+#     one_err_ = One_error()
+#     coverage_err_ = Coverage_error()
+#     rank_loss_ = Ranking_loss()
+#     labelAvgPrec_score_ = LabelAvgPrec_score()
+#
+#     # set model to evaluation mode
+#     model.eval()
+#
+#     # main validation loop
+#     y_true = []
+#     predicted_probs = []
+#
+#     with torch.no_grad():
+#         for batch_idx, data in enumerate(tqdm(valloader, desc="validation")):
+#
+#             # unpack sample
+#             bands = data["image"]
+#             labels = data["label"]
+#
+#             # move data to gpu if model is on gpu
+#             if use_cuda:
+#                 bands = bands.to(torch.device("cuda"))
+#                 #labels = labels.to(torch.device("cuda"))
+#
+#             # forward pass
+#             logits = model(bands)
+#
+#             # convert logits to probabilies
+#             if label_type == 'multi_label':
+#                 probs = torch.sigmoid(logits).cpu().numpy()
+#             else:
+#                 sm = torch.nn.Softmax(dim=1)
+#                 probs = sm(logits).cpu().numpy()
+#
+#             labels = labels.cpu().numpy() # keep true & pred label at same loc.
+#             predicted_probs += list(probs)
+#             y_true += list(labels)
+#
+#
+#     predicted_probs = np.asarray(predicted_probs)
+#     # convert predicted probabilities into one/multi-hot labels
+#     if label_type == 'multi_label':
+#         y_predicted = (predicted_probs >= 0.5).astype(np.float32)
+#     else:
+#         loc = np.argmax(predicted_probs, axis=-1)
+#         y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
+#         for i in range(len(loc)):
+#             y_predicted[i,loc[i]] = 1
+#
+#     y_true = np.asarray(y_true)
+#
+#
+#     macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+#     macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+#     macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+#     macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+#     hamming_loss = hamming_loss_(y_predicted, y_true)
+#     subset_acc = subset_acc_(y_predicted, y_true)
+#     macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+#
+#     # Note that below 4 ranking-based metrics are not applicable to single-label
+#     # (multi-class) classification, but they will still show the scores during
+#     # validation on tensorboard
+#     one_error = one_err_(predicted_probs, y_true)
+#     coverage_error = coverage_err_(predicted_probs, y_true)
+#     rank_loss = rank_loss_(predicted_probs, y_true)
+#     labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+#
+#     info = {
+#             "macroPrec" : macro_prec,
+#             "microPrec" : micro_prec,
+#             "samplePrec" : sample_prec,
+#             "macroRec" : macro_rec,
+#             "microRec" : micro_rec,
+#             "sampleRec" : sample_rec,
+#             "macroF1" : macro_f1,
+#             "microF1" : micro_f1,
+#             "sampleF1" : sample_f1,
+#             "macroF2" : macro_f2,
+#             "microF2" : micro_f2,
+#             "sampleF2" : sample_f2,
+#             "HammingLoss" : hamming_loss,
+#             "subsetAcc" : subset_acc,
+#             "macroAcc" : macro_acc,
+#             "microAcc" : micro_acc,
+#             "sampleAcc" : sample_acc,
+#             "oneError" : one_error,
+#             "coverageError" : coverage_error,
+#             "rankLoss" : rank_loss,
+#             "labelAvgPrec" : labelAvgPrec
+#             }
+#
+#     wandb.run.summary.update(info)
+#     for tag, value in info.items():
+#         wandb.log({tag: value, 'epoch': epoch})
+#         # val_writer.add_scalar(tag, value, epoch)
+#
+#     print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
+#             micro_prec,
+#             micro_f1,
+#             sample_f1,
+#             micro_f2,
+#             sample_f2
+#             ))
+#     return micro_f1
+
+
+if __name__ == "__main__":
+    main()
+    
+    
\ No newline at end of file

From 505122a450bf23be65555284721c499bb9753085 Mon Sep 17 00:00:00 2001
From: eoropeza <ernesto.oropeza@ischool.berkeley.edu>
Date: Wed, 31 Mar 2021 10:26:30 -0500
Subject: [PATCH 2/9] Layer Freezing and Conv1x1

---
 .idea/.gitignore                              |   8 +
 .idea/SEN12MS.iml                             |   8 +
 .idea/deployment.xml                          |  29 +
 .idea/encodings.xml                           |   4 +
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 .idea/misc.xml                                |   4 +
 .idea/modules.xml                             |   8 +
 .idea/remote-mappings.xml                     |  10 +
 .idea/vcs.xml                                 |   6 +
 classification/main_train_bu.py               | 689 ++++++++++++++++++
 classification/models/ResNet_bu.py            | 463 ++++++++++++
 classification/models/ResNet_nnedition.py     | 467 ++++++++++++
 12 files changed, 1702 insertions(+)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/SEN12MS.iml
 create mode 100644 .idea/deployment.xml
 create mode 100644 .idea/encodings.xml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/remote-mappings.xml
 create mode 100644 .idea/vcs.xml
 create mode 100644 classification/main_train_bu.py
 create mode 100644 classification/models/ResNet_bu.py
 create mode 100644 classification/models/ResNet_nnedition.py

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..66cee35
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/../../../../../../../../../:\Users\ernes\Documents\UCBerkley_MIDS\Courses\Capstone\SEN12MS\.idea/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/SEN12MS.iml b/.idea/SEN12MS.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/SEN12MS.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
new file mode 100644
index 0000000..c848bae
--- /dev/null
+++ b/.idea/deployment.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" autoUpload="Always" serverName="taeil@angler.ist.berkeley.edu:22 agent" remoteFilesAllowedToDisappearOnAutoupload="false" autoUploadExternalChanges="true">
+    <serverData>
+      <paths name="Colorado_Llano">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="taeil@angler.ist.berkeley.edu:22">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/home/taeil/SEN12MS" local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="taeil@angler.ist.berkeley.edu:22 agent">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/home/taeil/SEN12MS_E/SEN12MS" local="$PROJECT_DIR$" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+    <option name="myAutoUpload" value="ALWAYS" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..15a15b2
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding" addBOMForNewFiles="with NO BOM" />
+</project>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..127644b
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.10 (sftp://taeil@angler.ist.berkeley.edu:22/home/taeil/python)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..c2ceaad
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/SEN12MS.iml" filepath="$PROJECT_DIR$/.idea/SEN12MS.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/remote-mappings.xml b/.idea/remote-mappings.xml
new file mode 100644
index 0000000..d528309
--- /dev/null
+++ b/.idea/remote-mappings.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteMappingsManager">
+    <list>
+      <list>
+        <remote-mappings server-id="python@579126cd-ef12-4aa4-b79b-d3e5433469df" />
+      </list>
+    </list>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/classification/main_train_bu.py b/classification/main_train_bu.py
new file mode 100644
index 0000000..696cb68
--- /dev/null
+++ b/classification/main_train_bu.py
@@ -0,0 +1,689 @@
+# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
+# Modified by Yu-Lun Wu, TUM
+
+import os
+import argparse
+import numpy as np
+from datetime import datetime 
+from tqdm import tqdm
+
+import torch
+import torch.optim as optim 
+import torchvision.transforms as transforms
+import torch.backends.cudnn as cudnn
+from torch.utils.data import DataLoader
+# from tensorboardX import SummaryWriter
+
+import shutil 
+import sys
+sys.path.append('../')
+
+from dataset import SEN12MS, ToTensor, Normalize
+from models.VGG import VGG16, VGG19
+from models.ResNet import ResNet50, ResNet50_1x1, ResNet101, ResNet152, Moco, Moco_1x1, Moco_1x1RND
+from models.DenseNet import DenseNet121, DenseNet161, DenseNet169, DenseNet201
+from metrics import MetricTracker, Precision_score, Recall_score, F1_score, \
+    F2_score, Hamming_loss, Subset_accuracy, Accuracy_score, One_error, \
+    Coverage_error, Ranking_loss, LabelAvgPrec_score, calssification_report, \
+    conf_mat_nor, get_AA, multi_conf_mat, OA_multi
+
+import wandb
+
+#sec.2 (done)
+    
+model_choices = ['VGG16', 'VGG19',
+                 'ResNet50','ResNet101','ResNet152', 'ResNet50_1x1',
+                 'DenseNet121','DenseNet161','DenseNet169','DenseNet201', 'Moco', 'Moco_1x1', 'Moco_1x1RND']
+label_choices = ['multi_label', 'single_label']
+
+# ----------------------- define and parse arguments --------------------------
+parser = argparse.ArgumentParser()
+
+# experiment name
+parser.add_argument('--exp_name', type=str, default=None,
+                    help="experiment name. will be used in the path names \
+                         for log- and savefiles. If no input experiment name, \
+                         path would be set to model name.")
+
+# data directory
+parser.add_argument('--data_dir', type=str, default=None,
+                    help='path to SEN12MS dataset')
+parser.add_argument('--label_split_dir', type=str, default=None,
+                    help="path to label data and split list")
+parser.add_argument('--data_size', type=str, default="full",
+                    help="64, 128, 256, 1000, 1024, full")
+# input/output
+parser.add_argument('--use_s2', action='store_true', default=False,
+                    help='use sentinel-2 bands')
+parser.add_argument('--use_s1', action='store_true', default=False,
+                    help='use sentinel-1 data')
+parser.add_argument('--use_RGB', action='store_true', default=False,
+                    help='use sentinel-2 RGB bands')
+parser.add_argument('--IGBP_simple', action='store_true', default=True,
+                    help='use IGBP simplified scheme; otherwise: IGBP original scheme')
+parser.add_argument('--label_type', type=str, choices = label_choices,
+                    default='multi_label',
+                    help="label-type (default: multi_label)")
+parser.add_argument('--threshold', type=float, default=0.1, 
+                    help='threshold to convert probability-labels to multi-hot \
+                    labels, mean/std for normalizatin would not be accurate \
+                    if the threshold is larger than 0.22. \
+                    for single_label threshold would be ignored')
+parser.add_argument('--eval', action='store_true', default=False,
+                    help='evaluate against test set')
+
+# network
+parser.add_argument('--model', type=str, choices = model_choices,
+                    default='ResNet50',
+                    help="network architecture (default: ResNet50)")
+
+# training hyperparameters
+parser.add_argument('--lr', type=float, default=0.001, 
+                    help='initial learning rate')
+parser.add_argument('--use_lr_step', action='store_true', default=False,
+                    help='use learning rate steps')
+parser.add_argument('--lr_step_size', type=int, default=25,
+                    help='Learning rate step size')
+parser.add_argument('--lr_step_gamma', type=float, default=0.1,
+                    help='Learning rate step gamma')
+parser.add_argument('--decay', type=float, default=1e-5,
+                    help='decay rate')
+parser.add_argument('--batch_size', type=int, default=64,
+                    help='mini-batch size (default: 64)')
+parser.add_argument('--num_workers',type=int, default=4,
+                    help='num_workers for data loading in pytorch')
+parser.add_argument('--epochs', type=int, default=100,
+                    help='number of training epochs (default: 100)')
+parser.add_argument('--resume', '-r', type=str, default=None,
+                    help='path to the pretrained weights file', )
+parser.add_argument('--pt_dir', '-pd', type=str, default=None,
+                    help='directory for pretrained model', )
+parser.add_argument('--pt_name', '-pn', type=str, default=None,
+                    help='model name without extension', )
+parser.add_argument('--pt_type', '-pt', type=str, default='bb',
+                    help='bb (backbone) or qe (query encoder)', )
+
+args = parser.parse_args()
+
+wandb.init(config=args)
+
+# -------------------- set directory for saving files -------------------------
+
+if wandb.run is not None:
+    # save to wandb run dir for tracking and saving the models
+    checkpoint_dir = wandb.run.dir
+    logs_dir = wandb.run.dir
+elif args.exp_name:
+    checkpoint_dir = os.path.join('./', args.exp_name, 'checkpoints')
+    logs_dir = os.path.join('./', args.exp_name, 'logs')
+else:
+    checkpoint_dir = os.path.join('./', args.model, 'checkpoints')
+    logs_dir = os.path.join('./', args.model, 'logs')
+
+if not os.path.isdir(checkpoint_dir):
+    os.makedirs(checkpoint_dir)
+if not os.path.isdir(logs_dir):
+    os.makedirs(logs_dir)
+
+# ----------------------------- saving files ---------------------------------
+def write_arguments_to_file(args, filename):
+    with open(filename, 'w') as f:
+        for key, value in vars(args).items():
+            f.write('%s: %s\n' % (key, str(value)))
+
+def save_checkpoint(state, is_best, name):
+
+    filename = os.path.join(checkpoint_dir, name + '_checkpoint.pth')
+
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, os.path.join(checkpoint_dir, name + 
+                                               '_model_best.pth'))
+        
+# -------------------------------- Main Program -------------------------------
+def main():
+    global args
+    
+    # save configuration to file
+    sv_name = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')
+    print('saving file name is ', sv_name)
+
+    write_arguments_to_file(args, os.path.join(logs_dir, sv_name+'_arguments.txt'))
+
+# ----------------------------------- data
+    # define mean/std of the training set (for data normalization)
+    label_type = args.label_type
+        
+    bands_mean = {'s1_mean': [-11.76858, -18.294598],
+                  's2_mean': [1226.4215, 1137.3799, 1139.6792, 1350.9973, 1932.9058,
+                              2211.1584, 2154.9846, 2409.1128, 2001.8622, 1356.0801]}
+                  
+    bands_std = {'s1_std': [4.525339, 4.3586307],
+                 's2_std': [741.6254, 740.883, 960.1045, 946.76056, 985.52747,
+                            1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]} 
+
+    
+    # load datasets 
+    imgTransform = transforms.Compose([ToTensor(),Normalize(bands_mean, bands_std)])
+    
+    train_dataGen = SEN12MS(args.data_dir, args.label_split_dir, 
+                            imgTransform=imgTransform, 
+                            label_type=label_type, threshold=args.threshold, subset="train", 
+                            use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
+                            IGBP_s=args.IGBP_simple, data_size=args.data_size)
+    
+    val_dataGen = SEN12MS(args.data_dir, args.label_split_dir, 
+                          imgTransform=imgTransform, 
+                          label_type=label_type, threshold=args.threshold, subset="val", 
+                          use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
+                          IGBP_s=args.IGBP_simple, data_size=args.data_size)
+
+    if args.eval:
+        test_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
+                               imgTransform=imgTransform,
+                               label_type=label_type, threshold=args.threshold, subset="test",
+                               use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
+                               IGBP_s=args.IGBP_simple)
+
+
+    
+    # number of input channels
+    n_inputs = train_dataGen.n_inputs 
+    print('input channels =', n_inputs)
+    
+    # set up dataloaders
+    train_data_loader = DataLoader(train_dataGen, 
+                                   batch_size=args.batch_size, 
+                                   num_workers=args.num_workers, 
+                                   shuffle=True, 
+                                   pin_memory=True)
+    val_data_loader = DataLoader(val_dataGen, 
+                                 batch_size=args.batch_size, 
+                                 num_workers=args.num_workers, 
+                                 shuffle=False, 
+                                 pin_memory=True)
+
+    if args.eval:
+        test_data_loader = DataLoader(test_dataGen,
+                                  batch_size=args.batch_size,
+                                  num_workers=args.num_workers,
+                                  shuffle=False,
+                                  pin_memory=True)
+
+# -------------------------------- ML setup
+    # cuda
+    use_cuda = torch.cuda.is_available()
+    if use_cuda:
+        torch.backends.cudnn.enabled = True
+        cudnn.benchmark = True
+
+    # define number of classes
+    if args.IGBP_simple:
+        numCls = 10
+        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
+    else:
+        numCls = 17
+        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+                      '11', '12', '13', '14', '15', '16', '17']
+    
+    print('num_class: ', numCls)
+
+    # define model
+    if args.model == 'VGG16':
+        model = VGG16(n_inputs, numCls)
+    elif args.model == 'VGG19':
+        model = VGG19(n_inputs, numCls)
+    elif args.model == 'ResNet50':
+        model = ResNet50(n_inputs, numCls)
+    elif args.model == 'ResNet50_1x1':
+        model = ResNet50_1x1(n_inputs, numCls)
+    elif args.model == 'ResNet101':
+        model = ResNet101(n_inputs, numCls)
+    elif args.model == 'ResNet152':
+        model = ResNet152(n_inputs, numCls)
+    elif args.model == 'DenseNet121':
+        model = DenseNet121(n_inputs, numCls)
+    elif args.model == 'DenseNet161':
+        model = DenseNet161(n_inputs, numCls)
+    elif args.model == 'DenseNet169':
+        model = DenseNet169(n_inputs, numCls)
+    elif args.model == 'DenseNet201':
+        model = DenseNet201(n_inputs, numCls)
+    # finetune moco pre-trained model
+    elif args.model.startswith("Moco"):
+        pt_path = os.path.join(args.pt_dir, f"{args.pt_name}_{args.pt_type}_converted.pth")
+        assert os.path.exists(pt_path)
+        if args.model == 'Moco':
+            print("transfer backbone weights but no conv 1x1 input module")
+            model = Moco(torch.load(pt_path), n_inputs, numCls)
+        elif args.model == 'Moco_1x1':
+            print("transfer backbone weights and input module weights")
+            model = Moco_1x1(torch.load(pt_path), n_inputs, numCls)
+        elif args.model == 'Moco_1x1RND':
+            print("transfer backbone weights but initialize input module random with random weights")
+            model = Moco_1x1(torch.load(pt_path), n_inputs, numCls)
+        else:  # Assume Moco2 at present
+            raise NameError("no model")
+
+    else:
+        raise NameError("no model")
+
+    # move model to GPU if is available
+    if use_cuda:
+        model = model.cuda() 
+
+    # define loss function
+    if label_type == 'multi_label':
+        lossfunc = torch.nn.BCEWithLogitsLoss()
+    else:
+        lossfunc = torch.nn.CrossEntropyLoss()
+
+    print(model)
+
+    # Freezing Conv1x1 and Encoder
+    # list(model.children())[0].training
+    # child_seq = list(model.children())[0]
+    # child_seq.training = False
+
+    # # Freezing Conv1x1
+    # child_conv1x1 = list(list(model.children())[0])[0]
+    # child_conv1x1.training = False
+
+    # Freezing first n layers
+    lyer = 0
+    nl = 4
+    for child  in list(model.children())[0]:
+        child.training = False
+        lyer += 1
+        if lyer > nl:
+            break
+
+
+
+
+    # set up optimizer
+    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
+
+    best_acc = 0
+    start_epoch = 0
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            checkpoint = torch.load(args.resume)
+            checkpoint_nm = os.path.basename(args.resume)
+            sv_name = checkpoint_nm.split('_')[0] + '_' + checkpoint_nm.split('_')[1]
+            print('saving file name is ', sv_name)
+
+            if checkpoint['epoch'] > start_epoch:
+                start_epoch = checkpoint['epoch']
+            best_acc = checkpoint['best_prec']
+            model.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+
+
+    # set up tensorboard logging
+    # train_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'training'))
+    # val_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'val'))
+
+
+# ----------------------------- executing Train/Val. 
+    # train network
+    # wandb.watch(model, log="all")
+
+    scheduler = None
+    if args.use_lr_step:
+        # Ex: If initial Lr is 0.0001, step size is 25, and gamma is 0.1, then lr will be changed for every 20 steps
+        # 0.0001 - first 25 epochs
+        # 0.00001 - 25 to 50 epochs
+        # 0.000001 - 50 to 75 epochs
+        # 0.0000001 - 75 to 100 epochs
+        # https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
+         scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_step_gamma)
+
+    for epoch in range(start_epoch, args.epochs):
+        if args.use_lr_step:
+            scheduler.step()
+            print('Epoch {}/{} lr: {}'.format(epoch, args.epochs - 1, optimizer.param_groups[0]['lr']))
+        else:
+            print('Epoch {}/{}'.format(epoch, args.epochs - 1))
+        print('-' * 25)
+
+        train(train_data_loader, model, optimizer, lossfunc, label_type, epoch, use_cuda)
+        micro_f1 = val(val_data_loader, model, optimizer, label_type, epoch, use_cuda)
+
+        is_best_acc = micro_f1 > best_acc
+        best_acc = max(best_acc, micro_f1)
+
+        save_checkpoint({
+            'epoch': epoch,
+            'arch': args.model,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'best_prec': best_acc
+            }, is_best_acc, sv_name)
+
+        wandb.log({'epoch': epoch, 'micro_f1': micro_f1})
+
+    print("=============")
+    print("done training")
+    print("=============")
+
+    if args.eval:
+        eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS)
+
+def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
+
+    model.eval()
+    # define metrics
+    prec_score_ = Precision_score()
+    recal_score_ = Recall_score()
+    f1_score_ = F1_score()
+    f2_score_ = F2_score()
+    hamming_loss_ = Hamming_loss()
+    subset_acc_ = Subset_accuracy()
+    acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
+    one_err_ = One_error()
+    coverage_err_ = Coverage_error()
+    rank_loss_ = Ranking_loss()
+    labelAvgPrec_score_ = LabelAvgPrec_score()
+
+    calssification_report_ = calssification_report(ORG_LABELS)
+
+    # -------------------------------- prediction
+    y_true = []
+    predicted_probs = []
+
+    with torch.no_grad():
+        for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
+
+            # unpack sample
+            bands = data["image"]
+            labels = data["label"]
+
+            # move data to gpu if model is on gpu
+            if use_cuda:
+                bands = bands.to(torch.device("cuda"))
+                # labels = labels.to(torch.device("cuda"))
+
+            # forward pass
+            logits = model(bands)
+
+            # convert logits to probabilies
+            if label_type == 'multi_label':
+                probs = torch.sigmoid(logits).cpu().numpy()
+            else:
+                sm = torch.nn.Softmax(dim=1)
+                probs = sm(logits).cpu().numpy()
+
+            labels = labels.cpu().numpy()  # keep true & pred label at same loc.
+            predicted_probs += list(probs)
+            y_true += list(labels)
+
+    predicted_probs = np.asarray(predicted_probs)
+    # convert predicted probabilities into one/multi-hot labels
+    if label_type == 'multi_label':
+        y_predicted = (predicted_probs >= 0.5).astype(np.float32)
+    else:
+        loc = np.argmax(predicted_probs, axis=-1)
+        y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
+        for i in range(len(loc)):
+            y_predicted[i, loc[i]] = 1
+
+    y_true = np.asarray(y_true)
+
+    # --------------------------- evaluation with metrics
+    # general
+    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    hamming_loss = hamming_loss_(y_predicted, y_true)
+    subset_acc = subset_acc_(y_predicted, y_true)
+    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+    # ranking-based
+    one_error = one_err_(predicted_probs, y_true)
+    coverage_error = coverage_err_(predicted_probs, y_true)
+    rank_loss = rank_loss_(predicted_probs, y_true)
+    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+
+    cls_report = calssification_report_(y_predicted, y_true)
+
+    if label_type == 'multi_label':
+        [conf_mat, cls_acc, aa] = multi_conf_mat(y_predicted, y_true, n_classes=numCls)
+        # the results derived from multilabel confusion matrix are not recommended to use
+        oa = OA_multi(y_predicted, y_true)
+        # this oa can be Jaccard index
+
+        info = {
+            "macroPrec": macro_prec,
+            "microPrec": micro_prec,
+            "samplePrec": sample_prec,
+            "macroRec": macro_rec,
+            "microRec": micro_rec,
+            "sampleRec": sample_rec,
+            "macroF1": macro_f1,
+            "microF1": micro_f1,
+            "sampleF1": sample_f1,
+            "macroF2": macro_f2,
+            "microF2": micro_f2,
+            "sampleF2": sample_f2,
+            "HammingLoss": hamming_loss,
+            "subsetAcc": subset_acc,
+            "macroAcc": macro_acc,
+            "microAcc": micro_acc,
+            "sampleAcc": sample_acc,
+            "oneError": one_error,
+            "coverageError": coverage_error,
+            "rankLoss": rank_loss,
+            "labelAvgPrec": labelAvgPrec,
+            "clsReport": cls_report,
+            "multilabel_conf_mat": conf_mat,
+            "class-wise Acc": cls_acc,
+            "AverageAcc": aa,
+            "OverallAcc": oa}
+
+    else:
+        conf_mat = conf_mat_nor(y_predicted, y_true, n_classes=numCls)
+        aa = get_AA(y_predicted, y_true, n_classes=numCls)  # average accuracy, \
+        # zero-sample classes are not excluded
+
+        info = {
+            "macroPrec": macro_prec,
+            "microPrec": micro_prec,
+            "samplePrec": sample_prec,
+            "macroRec": macro_rec,
+            "microRec": micro_rec,
+            "sampleRec": sample_rec,
+            "macroF1": macro_f1,
+            "microF1": micro_f1,
+            "sampleF1": sample_f1,
+            "macroF2": macro_f2,
+            "microF2": micro_f2,
+            "sampleF2": sample_f2,
+            "HammingLoss": hamming_loss,
+            "subsetAcc": subset_acc,
+            "macroAcc": macro_acc,
+            "microAcc": micro_acc,
+            "sampleAcc": sample_acc,
+            "oneError": one_error,
+            "coverageError": coverage_error,
+            "rankLoss": rank_loss,
+            "labelAvgPrec": labelAvgPrec,
+            "clsReport": cls_report,
+            "conf_mat": conf_mat,
+            "AverageAcc": aa}
+
+    wandb.run.summary.update(info)
+    print("saving metrics...")
+    # pkl.dump(info, open("test_scores.pkl", "wb"))
+
+
+def train(trainloader, model, optimizer, lossfunc, label_type, epoch, use_cuda):
+
+    lossTracker = MetricTracker()
+
+    # set model to train mode
+    model.train()
+
+
+    # main training loop
+    for idx, data in enumerate(tqdm(trainloader, desc="training")):
+        
+        numSample = data["image"].size(0)
+        
+        # unpack sample
+        bands = data["image"]
+        if label_type == 'multi_label':
+            labels = data["label"]
+        else:
+           labels = (torch.max(data["label"], 1)[1]).type(torch.long) 
+               
+        # move data to gpu if model is on gpu
+        if use_cuda:
+            bands = bands.to(torch.device("cuda"))
+            labels = labels.to(torch.device("cuda"))
+        
+        # reset gradients
+        optimizer.zero_grad()
+        
+        # forward pass
+        logits = model(bands)
+        loss = lossfunc(logits, labels)
+        
+        # backward pass
+        loss.backward()
+        optimizer.step()
+        
+        #
+        lossTracker.update(loss.item(), numSample)
+
+    # train_writer.add_scalar("loss", lossTracker.avg, epoch)
+    wandb.log({'loss': lossTracker.avg, 'epoch': epoch})
+
+    print('Train loss: {:.6f}'.format(lossTracker.avg))
+
+    
+def val(valloader, model, optimizer, label_type, epoch, use_cuda):
+
+    prec_score_ = Precision_score()
+    recal_score_ = Recall_score()
+    f1_score_ = F1_score()
+    f2_score_ = F2_score()
+    hamming_loss_ = Hamming_loss()
+    subset_acc_ = Subset_accuracy()
+    acc_score_ = Accuracy_score()
+    one_err_ = One_error()
+    coverage_err_ = Coverage_error()
+    rank_loss_ = Ranking_loss()
+    labelAvgPrec_score_ = LabelAvgPrec_score()
+
+    # set model to evaluation mode
+    model.eval()
+    
+    # main validation loop
+    y_true = []
+    predicted_probs = []
+
+    with torch.no_grad():
+        for batch_idx, data in enumerate(tqdm(valloader, desc="validation")):
+
+            # unpack sample
+            bands = data["image"]
+            labels = data["label"]
+    
+            # move data to gpu if model is on gpu
+            if use_cuda:
+                bands = bands.to(torch.device("cuda"))
+                #labels = labels.to(torch.device("cuda"))
+            
+            # forward pass 
+            logits = model(bands)
+            
+            # convert logits to probabilies
+            if label_type == 'multi_label':
+                probs = torch.sigmoid(logits).cpu().numpy()
+            else:
+                sm = torch.nn.Softmax(dim=1)
+                probs = sm(logits).cpu().numpy()
+                  
+            labels = labels.cpu().numpy() # keep true & pred label at same loc.
+            predicted_probs += list(probs)
+            y_true += list(labels)
+            
+        
+    predicted_probs = np.asarray(predicted_probs)
+    # convert predicted probabilities into one/multi-hot labels 
+    if label_type == 'multi_label':
+        y_predicted = (predicted_probs >= 0.5).astype(np.float32)
+    else:
+        loc = np.argmax(predicted_probs, axis=-1)
+        y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
+        for i in range(len(loc)):
+            y_predicted[i,loc[i]] = 1
+        
+    y_true = np.asarray(y_true)
+    
+
+    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    hamming_loss = hamming_loss_(y_predicted, y_true)
+    subset_acc = subset_acc_(y_predicted, y_true)
+    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+
+    # Note that below 4 ranking-based metrics are not applicable to single-label
+    # (multi-class) classification, but they will still show the scores during 
+    # validation on tensorboard
+    one_error = one_err_(predicted_probs, y_true)
+    coverage_error = coverage_err_(predicted_probs, y_true)
+    rank_loss = rank_loss_(predicted_probs, y_true)
+    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+
+    info = {
+            "macroPrec" : macro_prec,
+            "microPrec" : micro_prec,
+            "samplePrec" : sample_prec,
+            "macroRec" : macro_rec,
+            "microRec" : micro_rec,
+            "sampleRec" : sample_rec,
+            "macroF1" : macro_f1,
+            "microF1" : micro_f1,
+            "sampleF1" : sample_f1,
+            "macroF2" : macro_f2,
+            "microF2" : micro_f2,
+            "sampleF2" : sample_f2,
+            "HammingLoss" : hamming_loss,
+            "subsetAcc" : subset_acc,
+            "macroAcc" : macro_acc,
+            "microAcc" : micro_acc,
+            "sampleAcc" : sample_acc,
+            "oneError" : one_error,
+            "coverageError" : coverage_error,
+            "rankLoss" : rank_loss,
+            "labelAvgPrec" : labelAvgPrec
+            }
+
+    wandb.run.summary.update(info)
+    for tag, value in info.items():
+        wandb.log({tag: value, 'epoch': epoch})
+        # val_writer.add_scalar(tag, value, epoch)
+
+    print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
+            micro_prec,
+            micro_f1,
+            sample_f1,
+            micro_f2,
+            sample_f2
+            ))
+    return micro_f1
+
+
+
+if __name__ == "__main__":
+    main()
+    
+    
\ No newline at end of file
diff --git a/classification/models/ResNet_bu.py b/classification/models/ResNet_bu.py
new file mode 100644
index 0000000..ee5af90
--- /dev/null
+++ b/classification/models/ResNet_bu.py
@@ -0,0 +1,463 @@
+# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
+# Modified by Yu-Lun Wu, TUM
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.autograd import Function
+from torchvision import models
+
+
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+def weights_init_kaiming(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        init.kaiming_normal_(m.weight.data)
+
+def fc_init_weights(m):
+    if type(m) == nn.Linear:
+        init.kaiming_normal_(m.weight.data)
+
+
+class ResNet18(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet18(pretrained=False)
+        
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+
+class ResNet34(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet34(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet50(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class ResNet50_1x1(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(3, 64, kernel_size=(256, 256), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            self.conv1, # self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+class Moco_1x1(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        print("n_inputs :",n_inputs)
+
+        Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # Update input module
+        input_module_pre_trained = mocoModel["input_module"]
+        conv1x1_default_state_dict = Conv1x1Block.state_dict()
+        migrated_data_dict = {}
+        for k, v in input_module_pre_trained.items():
+            if k == "input_module.net.0.weight":
+                if n_inputs == 10:
+                    # Set the value only if the n_inputs are 10 (i.e only S2). If they are 12 (both S2 and S1),
+                    # the below assignment will result in an error during execution.
+                    # Error: "size mismatch for 0.weight: copying a param with shape torch.Size([3, 10, 1, 1]) from checkpoint,
+                    # the shape in current model is torch.Size([3, 12, 1, 1])"
+                    # The reason is that during pre-training, we have the input set to 10 (for the query block)
+                    migrated_data_dict["0.weight"] = input_module_pre_trained["input_module.net.0.weight"]
+
+            elif k == "input_module.net.1.weight":
+                migrated_data_dict["1.weight"] = input_module_pre_trained["input_module.net.1.weight"]
+            elif k == "input_module.net.1.bias":
+                migrated_data_dict["1.bias"] = input_module_pre_trained["input_module.net.1.bias"]
+            elif k == "input_module.net.1.running_mean":
+                migrated_data_dict["1.running_mean"] = input_module_pre_trained["input_module.net.1.running_mean"]
+            elif k == "input_module.net.1.running_var":
+                migrated_data_dict["1.running_var"] = input_module_pre_trained["input_module.net.1.running_var"]
+
+        conv1x1_default_state_dict.update(migrated_data_dict)
+        Conv1x1Block.load_state_dict(conv1x1_default_state_dict)
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            Conv1x1Block,
+            # self.conv1,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class Moco_1x1RND(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        print("n_inputs :",n_inputs)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # We don't need to initialize here as we are transferring the weights
+        #self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+# This class uses Conv1x1Block block, but it doesn't get initialized from the pre-trained model.
+# Only the backbone gets initialized from the pre-trained model
+class Moco(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+#class ResNet50_em512(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC1 = nn.Linear(2048, 512)
+#        self.FC2 = nn.Linear(512, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        x = self.FC1(x)
+#        logits = self.FC2(x)
+#
+#        return logits
+#
+#
+#class ResNet50_em(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC = nn.Linear(2048, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        logits = self.FC(x)
+#
+#        return logits, x
+
+class ResNet101(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet101(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet152(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet152(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+if __name__ == "__main__":
+    
+    inputs = torch.randn((1, 12, 256, 256)) # (how many images, spectral channels, pxl, pxl)
+
+    net = ResNet18()
+    #net = ResNet34()
+    #net = ResNet50()
+    #net = ResNet101()
+    #net = ResNet152()
+
+    outputs = net(inputs)
+
+    print(outputs)
+    print(outputs.shape)
+
+    numParams = count_parameters(net)
+
+    print(f"{numParams:.2E}")
+
+
diff --git a/classification/models/ResNet_nnedition.py b/classification/models/ResNet_nnedition.py
new file mode 100644
index 0000000..0b63dc5
--- /dev/null
+++ b/classification/models/ResNet_nnedition.py
@@ -0,0 +1,467 @@
+# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
+# Modified by Yu-Lun Wu, TUM
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.autograd import Function
+from torchvision import models
+
+
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+def weights_init_kaiming(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        init.kaiming_normal_(m.weight.data)
+
+def fc_init_weights(m):
+    if type(m) == nn.Linear:
+        init.kaiming_normal_(m.weight.data)
+
+
+class ResNet18(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet18(pretrained=False)
+        
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+
+class ResNet34(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet34(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet50(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class ResNet50_1x1(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(3, 64, kernel_size=(256, 256), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            self.conv1, # self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+class Moco_1x1(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        print("n_inputs :",n_inputs)
+
+        Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # Update input module
+        input_module_pre_trained = mocoModel["input_module"]
+        conv1x1_default_state_dict = Conv1x1Block.state_dict()
+        migrated_data_dict = {}
+        for k, v in input_module_pre_trained.items():
+            if k == "input_module.net.0.weight":
+                if n_inputs == 10:
+                    # Set the value only if the n_inputs are 10 (i.e only S2). If they are 12 (both S2 and S1),
+                    # the below assignment will result in an error during execution.
+                    # Error: "size mismatch for 0.weight: copying a param with shape torch.Size([3, 10, 1, 1]) from checkpoint,
+                    # the shape in current model is torch.Size([3, 12, 1, 1])"
+                    # The reason is that during pre-training, we have the input set to 10 (for the query block)
+                    migrated_data_dict["0.weight"] = input_module_pre_trained["input_module.net.0.weight"]
+
+            elif k == "input_module.net.1.weight":
+                migrated_data_dict["1.weight"] = input_module_pre_trained["input_module.net.1.weight"]
+            elif k == "input_module.net.1.bias":
+                migrated_data_dict["1.bias"] = input_module_pre_trained["input_module.net.1.bias"]
+            elif k == "input_module.net.1.running_mean":
+                migrated_data_dict["1.running_mean"] = input_module_pre_trained["input_module.net.1.running_mean"]
+            elif k == "input_module.net.1.running_var":
+                migrated_data_dict["1.running_var"] = input_module_pre_trained["input_module.net.1.running_var"]
+
+        conv1x1_default_state_dict.update(migrated_data_dict)
+        Conv1x1Block.load_state_dict(conv1x1_default_state_dict)
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            Conv1x1Block,
+            # self.conv1,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class Moco_1x1RND(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        print("n_inputs :",n_inputs)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # We don't need to initialize here as we are transferring the weights
+        #self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+# This class uses Conv1x1Block block, but it doesn't get initialized from the pre-trained model.
+# Only the backbone gets initialized from the pre-trained model
+class Moco(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        conv1.apply(weights_init_kaiming)
+        self.encoder = nn.Sequential(
+            # self.conv1,
+            conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # self.conv1.apply(weights_init_kaiming)
+
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+#class ResNet50_em512(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC1 = nn.Linear(2048, 512)
+#        self.FC2 = nn.Linear(512, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        x = self.FC1(x)
+#        logits = self.FC2(x)
+#
+#        return logits
+#
+#
+#class ResNet50_em(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC = nn.Linear(2048, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        logits = self.FC(x)
+#
+#        return logits, x
+
+class ResNet101(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet101(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet152(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet152(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+if __name__ == "__main__":
+    
+    inputs = torch.randn((1, 12, 256, 256)) # (how many images, spectral channels, pxl, pxl)
+
+    net = ResNet18()
+    #net = ResNet34()
+    #net = ResNet50()
+    #net = ResNet101()
+    #net = ResNet152()
+
+    outputs = net(inputs)
+
+    print(outputs)
+    print(outputs.shape)
+
+    numParams = count_parameters(net)
+
+    print(f"{numParams:.2E}")
+
+

From 736af59aadd9c34ec2dd10ccca150ea5218081c8 Mon Sep 17 00:00:00 2001
From: eoropeza <ernesto.oropeza@ischool.berkeley.edu>
Date: Sun, 4 Apr 2021 17:43:37 -0500
Subject: [PATCH 3/9] Output JSON File with predictions on evaluation

---
 classification/main_train.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/classification/main_train.py b/classification/main_train.py
index 00d1839..f829bfb 100644
--- a/classification/main_train.py
+++ b/classification/main_train.py
@@ -6,6 +6,7 @@
 import numpy as np
 from datetime import datetime 
 from tqdm import tqdm
+import json
 
 import torch
 import torch.optim as optim 
@@ -126,6 +127,8 @@
     os.makedirs(logs_dir)
 
 # ----------------------------- saving files ---------------------------------
+sv_name_eval = '' # Used to save a file during the test evaluation
+
 def write_arguments_to_file(args, filename):
     with open(filename, 'w') as f:
         for key, value in vars(args).items():
@@ -143,9 +146,10 @@ def save_checkpoint(state, is_best, name):
 # -------------------------------- Main Program -------------------------------
 def main():
     global args
-    
+    global sv_name_eval
     # save configuration to file
     sv_name = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')
+    sv_name_eval = sv_name
     print('saving file name is ', sv_name)
 
     write_arguments_to_file(args, os.path.join(logs_dir, sv_name+'_arguments.txt'))
@@ -376,6 +380,8 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
     y_true = []
     predicted_probs = []
 
+    pred_dic = {}
+
     with torch.no_grad():
         for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
 
@@ -402,6 +408,15 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
             predicted_probs += list(probs)
             y_true += list(labels)
 
+            for j in range(len(data['id'])):
+                pred_dic[data['id'][j]] = {'true': str(list(list(labels)[j])),
+                                           'prediction': str(list(list(probs)[j]))
+                                           }
+
+    fileout = f"{checkpoint_dir}/{sv_name_eval}_{args.model}_{label_type}.json"
+    with open(fileout,'w') as fp:
+        json.dump(pred_dic, fp)
+
     predicted_probs = np.asarray(predicted_probs)
     # convert predicted probabilities into one/multi-hot labels
     if label_type == 'multi_label':
@@ -593,8 +608,7 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
             labels = labels.cpu().numpy() # keep true & pred label at same loc.
             predicted_probs += list(probs)
             y_true += list(labels)
-            
-        
+
     predicted_probs = np.asarray(predicted_probs)
     # convert predicted probabilities into one/multi-hot labels 
     if label_type == 'multi_label':

From b1f653e5e5acc015bcedf9ddb57733d067212886 Mon Sep 17 00:00:00 2001
From: taeil <taeil.goh@gmail.com>
Date: Mon, 5 Apr 2021 19:33:42 -0700
Subject: [PATCH 4/9] merge big earth net change

---
 README.md                                     | 41 +++++++++++--------
 classification/dataset.py                     |  5 ++-
 classification/main_train.py                  | 19 +++++----
 .../{run_ft_s2.sh => run_finetune.sh}         | 10 ++---
 classification/run_ft_s1s2.sh                 | 24 -----------
 5 files changed, 41 insertions(+), 58 deletions(-)
 rename classification/{run_ft_s2.sh => run_finetune.sh} (63%)
 delete mode 100644 classification/run_ft_s1s2.sh

diff --git a/README.md b/README.md
index 8749d96..a8ab815 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ In this folder, other utilities that can help to load, process, or analyze the d
 ```
 export WANDB_API_KEY=<use your API key>
 export WANDB_ENTITY=cal-capstone
-export WANDB_PROJECT=SEN12MS
+export WANDB_PROJECT=scene_classification
 #export WANDB_MODE=dryrun
 ```
 
@@ -95,38 +95,43 @@ export WANDB_PROJECT=SEN12MS
 CUDA_VISIBLE_DEVICES=0 python classification/main_train.py --exp_name sem12ms_baseline --data_dir /home/ubuntu/SEN12MS/data/sen12ms/data --label_split_dir /home/ubuntu/SEN12MS/splits --use_RGB --IGBP_simple --label_type multi_label --threshold 0.1 --model ResNet50 --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 1000 --epochs 1
 
 ```
- 
- #### finetune (training from pre-trained model)   :anguished:
- 
- These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
-- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:  
-- `convert_moco_to_resnet50.py`: convert moco models to pytorch resnet50 format
 
-download pretrained models from `s3://sen12ms/pretrained_sup`
+#### convert Moco pretrained model for sen12ms eval 
+ (optional) download pretrained models from `s3://sen12ms/pretrained`
+
+Some pretrained models: 
+-  [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
+- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) 
+
 ```
 ## remove dryrun param
-aws s3 sync s3://sen12ms/pretrained_sup . --dryrun 
+aws s3 sync s3://sen12ms/pretrained . --dryrun 
 ```
 
-convert models 
+convert moco models to pytorch resnet50 format
 ```
-# convert backbone to resnet50 
-python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth 
+# convert local file
+python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -o pretrained/moco/
+
+# download the model from W&B and convert for 12 channels 
+python classification/models/convert_moco_to_resnet50.py -n 12 -i hpt4/367tz8vs -o pretrained/moco/ 
 
-# convert query-encoder to resnet50 
-python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -bb false 
+# rename file with more user-friendly name (TODO automate this)
+mv pretrained/moco/367tz8vs_bb_converted.pth pretrained/moco/laced-water-61_bb_converted.pth
 
 ```
 
-finetune with pretrained models 
--  [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
-- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) 
+ #### finetune (training from pre-trained model)   :anguished:
+ 
+ These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
+- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:  
 
  ``` 
-CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --use_RGB --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
+CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --sensor_type s1s2 --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
  ```
 - `pt_name`: the name of the model (wandb run name)
 - `--eval`: remove this param if you want to skip evaluating after finishing the training 
+- `sensor_type`: s1, s2, s1s2 
 
 Evaluate trained models for classification (this is only if you downloaded the trained model)
 ```
diff --git a/classification/dataset.py b/classification/dataset.py
index 6e72e19..4c12526 100644
--- a/classification/dataset.py
+++ b/classification/dataset.py
@@ -58,13 +58,14 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD)
         else:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD_BIGEARTHNET)
-    # load only RGB   
+
+    # load only RGB
     if use_RGB and use_s2==False:
         if not for_bigearthnet:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB)
         else:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB_BIGEARTHNET)
-        
+
     # load s1 data
     if use_s1:
         if use_s2 or use_RGB:
diff --git a/classification/main_train.py b/classification/main_train.py
index 0363e7c..009d009 100644
--- a/classification/main_train.py
+++ b/classification/main_train.py
@@ -162,24 +162,24 @@ def main():
     use_s1 = (args.sensor_type == 's1') | (args.sensor_type == 's1s2')
     use_s2 = (args.sensor_type == 's2') | (args.sensor_type == 's1s2')
 
-    data_dir = os.path.join("data", args.dataset, "data")
+    dataset = args.dataset
+    data_dir = os.path.join("data", dataset, "data")
 
     bands_mean = {}
     bands_std = {}
     train_dataGen = None
     val_dataGen = None
     test_dataGen = None
-    if not args.use_bigearthnet:
-        print("Using SEN12MS dataset")
+
+    print(f"Using {dataset} dataset")
+    if dataset == 'sen12ms':
         bands_mean = {'s1_mean': [-11.76858, -18.294598],
                       's2_mean': [1226.4215, 1137.3799, 1139.6792, 1350.9973, 1932.9058,
                                   2211.1584, 2154.9846, 2409.1128, 2001.8622, 1356.0801]}
         bands_std = {'s1_std': [4.525339, 4.3586307],
                      's2_std': [741.6254, 740.883, 960.1045, 946.76056, 985.52747,
                                 1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]}
-    else:
-        # Assume bigearthnet
-        print("Using BigEarthNet dataset")
+    elif dataset == 'bigearthnet':
         # THE S2 BAND STATISTICS WERE PROVIDED BY THE BIGEARTHNET TEAM
         bands_mean = {'s1_mean': [-11.76858, -18.294598],
                       's2_mean': [340.76769064,429.9430203,614.21682446,590.23569706,950.68368468,1792.46290469,
@@ -187,10 +187,12 @@ def main():
         bands_std = {'s1_std': [4.525339, 4.3586307],
                      's2_std': [554.81258967,572.41639287,582.87945694,675.88746967,729.89827633,1096.01480586,
                                 1273.45393088,1365.45589904,1356.13789355,1302.3292881,1079.19066363,818.86747235]}
+    else:
+        raise NameError(f"unknown dataset: {dataset}")
 
     # load datasets 
     imgTransform = transforms.Compose([ToTensor(),Normalize(bands_mean, bands_std)])
-    if not args.use_bigearthnet:
+    if dataset == 'sen12ms':
         train_dataGen = SEN12MS(data_dir, args.label_split_dir,
                                 imgTransform=imgTransform,
                                 label_type=label_type, threshold=args.threshold, subset="train",
@@ -261,7 +263,7 @@ def main():
         cudnn.benchmark = True
 
     # define number of classes
-    if not args.use_bigearthnet:
+    if dataset == 'sen12ms':
         if args.simple_scheme:
             numCls = 10
             ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
@@ -281,7 +283,6 @@ def main():
                           '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
                           '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
                           '41', '42', '43']
-
     
     print('num_class: ', numCls)
 
diff --git a/classification/run_ft_s2.sh b/classification/run_finetune.sh
similarity index 63%
rename from classification/run_ft_s2.sh
rename to classification/run_finetune.sh
index a973cdc..0970122 100644
--- a/classification/run_ft_s2.sh
+++ b/classification/run_finetune.sh
@@ -1,22 +1,22 @@
 #!/bin/bash
 
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
+for lr in 0.001 0.0005 0.0001 0.00005 0.00001
 do
   for epoch in 200
   do
     for label_tp in single_label multi_label
     do
-      for model in Moco_1x1RND Moco_1x1 Moco
+      for model in Moco # Moco_1x1RND Moco_1x1
       do
         python classification/main_train.py --exp_name finetune --IGBP_simple \
                                             --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --pt_name dainty-dragon-14 --pt_dir pretrained/moco \
+                                            --pt_name electric-mountain-33 --pt_dir pretrained/moco \
                                             --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
+                                            --dataset sen12ms --label_split_dir splits \
                                             --label_type ${label_tp} \
                                             --model ${model} \
                                             --epochs ${epoch} \
-                                            --use_s2 --eval
+                                            --sensor_type s1s2 --eval
       done
     done
   done
diff --git a/classification/run_ft_s1s2.sh b/classification/run_ft_s1s2.sh
deleted file mode 100644
index 487d5cf..0000000
--- a/classification/run_ft_s1s2.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
-do
-  for epoch in 200
-  do
-    for label_tp in single_label multi_label
-    do
-      for model in Moco_1x1RND Moco_1x1 Moco
-      do
-        python classification/main_train.py --exp_name finetune --IGBP_simple \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --pt_name dainty-dragon-14 --pt_dir pretrained/moco \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --use_s2 --use_s1 --eval
-      done
-    done
-  done
-done
-#
\ No newline at end of file

From 1fae2028b80416e86c08d242bf592916bb5f5972 Mon Sep 17 00:00:00 2001
From: taeil <taeil.goh@gmail.com>
Date: Mon, 5 Apr 2021 19:42:34 -0700
Subject: [PATCH 5/9] merge big earth net change

---
 classification/run_finetune.sh                |  2 +-
 ...run_supervised_s2.sh => run_supervised.sh} |  6 ++---
 classification/run_supervised_s1s2.sh         | 23 -------------------
 3 files changed, 4 insertions(+), 27 deletions(-)
 rename classification/{run_supervised_s2.sh => run_supervised.sh} (79%)
 delete mode 100644 classification/run_supervised_s1s2.sh

diff --git a/classification/run_finetune.sh b/classification/run_finetune.sh
index 0970122..336f332 100644
--- a/classification/run_finetune.sh
+++ b/classification/run_finetune.sh
@@ -8,7 +8,7 @@ do
     do
       for model in Moco # Moco_1x1RND Moco_1x1
       do
-        python classification/main_train.py --exp_name finetune --IGBP_simple \
+        python classification/main_train.py --exp_name finetune --simple_scheme \
                                             --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
                                             --pt_name electric-mountain-33 --pt_dir pretrained/moco \
                                             --batch_size 64 --num_workers 4 --data_size 1024 \
diff --git a/classification/run_supervised_s2.sh b/classification/run_supervised.sh
similarity index 79%
rename from classification/run_supervised_s2.sh
rename to classification/run_supervised.sh
index 6efec37..41685dc 100644
--- a/classification/run_supervised_s2.sh
+++ b/classification/run_supervised.sh
@@ -8,14 +8,14 @@ do
     do
       for model in Supervised_1x1 Supervised
       do
-        python classification/main_train.py --exp_name sup_learning --IGBP_simple \
+        python classification/main_train.py --exp_name sup_learning --simple_scheme \
                                             --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
                                             --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
+                                            --dataset sen12ms --label_split_dir splits \
                                             --label_type ${label_tp} \
                                             --model ${model} \
                                             --epochs ${epoch} \
-                                            --use_s2 --eval
+                                            --sensor_type s1s2 --eval
       done
     done
   done
diff --git a/classification/run_supervised_s1s2.sh b/classification/run_supervised_s1s2.sh
deleted file mode 100644
index 5711d61..0000000
--- a/classification/run_supervised_s1s2.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
-do
-  for epoch in 200
-  do
-    for label_tp in single_label multi_label
-    do
-      for model in Supervised_1x1 Supervised
-      do
-        python classification/main_train.py --exp_name sup_learning --IGBP_simple \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --use_s1 --use_s2 --eval
-      done
-    done
-  done
-done
-#
\ No newline at end of file

From 3184f88781a8cc012c0f2b6ffc8661a27a275d74 Mon Sep 17 00:00:00 2001
From: taeil <taeil.goh@gmail.com>
Date: Mon, 5 Apr 2021 21:13:13 -0700
Subject: [PATCH 6/9] updated parameters

---
 classification/main_train.py     |  4 ++--
 classification/run_finetune.sh   | 32 +++++++++++++++++---------------
 classification/run_supervised.sh | 30 ++++++++++++++++--------------
 3 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/classification/main_train.py b/classification/main_train.py
index 009d009..afcd784 100644
--- a/classification/main_train.py
+++ b/classification/main_train.py
@@ -181,10 +181,10 @@ def main():
                                 1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]}
     elif dataset == 'bigearthnet':
         # THE S2 BAND STATISTICS WERE PROVIDED BY THE BIGEARTHNET TEAM
-        bands_mean = {'s1_mean': [-11.76858, -18.294598],
+        bands_mean = {'s1_mean': [0.4994, 0.2542],
                       's2_mean': [340.76769064,429.9430203,614.21682446,590.23569706,950.68368468,1792.46290469,
                                   2075.46795189,2218.94553375,2266.46036911,2246.0605464,1594.42694882,1009.32729131]}
-        bands_std = {'s1_std': [4.525339, 4.3586307],
+        bands_std = {'s1_std': [0.1902, 0.1720],
                      's2_std': [554.81258967,572.41639287,582.87945694,675.88746967,729.89827633,1096.01480586,
                                 1273.45393088,1365.45589904,1356.13789355,1302.3292881,1079.19066363,818.86747235]}
     else:
diff --git a/classification/run_finetune.sh b/classification/run_finetune.sh
index 336f332..b33e3cd 100644
--- a/classification/run_finetune.sh
+++ b/classification/run_finetune.sh
@@ -1,24 +1,26 @@
 #!/bin/bash
 
-for lr in 0.001 0.0005 0.0001 0.00005 0.00001
+for dataset in bigearthnet sen12ms
 do
-  for epoch in 200
+  for lr in 0.001 0.0005 0.0001 0.00005 0.00001
   do
-    for label_tp in single_label multi_label
+    for epoch in 200
     do
-      for model in Moco # Moco_1x1RND Moco_1x1
+      for label_tp in single_label multi_label
       do
-        python classification/main_train.py --exp_name finetune --simple_scheme \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --pt_name electric-mountain-33 --pt_dir pretrained/moco \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --dataset sen12ms --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --sensor_type s1s2 --eval
+        for model in Moco # Moco_1x1RND Moco_1x1
+        do
+          python classification/main_train.py --exp_name finetune --simple_scheme \
+                                              --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
+                                              --pt_name electric-mountain-33 --pt_dir pretrained/moco \
+                                              --batch_size 64 --num_workers 4 --data_size 1024 \
+                                              --dataset ${dataset} --label_split_dir splits \
+                                              --label_type ${label_tp} \
+                                              --model ${model} \
+                                              --epochs ${epoch} \
+                                              --sensor_type s1s2 --eval
+        done
       done
     done
   done
-done
-#
\ No newline at end of file
+done
\ No newline at end of file
diff --git a/classification/run_supervised.sh b/classification/run_supervised.sh
index 41685dc..d0f4a36 100644
--- a/classification/run_supervised.sh
+++ b/classification/run_supervised.sh
@@ -1,23 +1,25 @@
 #!/bin/bash
 
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
+for dataset in bigearthnet # sen12ms
 do
-  for epoch in 200
+  for lr in 0.001 0.0005 0.0001 0.00005 0.00001
   do
-    for label_tp in single_label multi_label
+    for epoch in 200
     do
-      for model in Supervised_1x1 Supervised
+      for label_tp in single_label multi_label
       do
-        python classification/main_train.py --exp_name sup_learning --simple_scheme \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --dataset sen12ms --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --sensor_type s1s2 --eval
+        for model in Supervised # Supervised_1x1
+        do
+          python classification/main_train2.py --exp_name sup_learning --simple_scheme \
+                                              --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
+                                              --batch_size 64 --num_workers 4 --data_size 1024 \
+                                              --dataset ${dataset} --label_split_dir splits \
+                                              --label_type ${label_tp} \
+                                              --model ${model} \
+                                              --epochs ${epoch} \
+                                              --sensor_type s1s2 --eval
+        done
       done
     done
   done
-done
-#
\ No newline at end of file
+done
\ No newline at end of file

From 565d938b22a93bfdddb77023b6401f14658f6007 Mon Sep 17 00:00:00 2001
From: Ernesto Oropeza <54602115+oropezaev@users.noreply.github.com>
Date: Tue, 6 Apr 2021 09:11:31 -0500
Subject: [PATCH 7/9] Delete main_train_bu.py

---
 classification/main_train_bu.py | 689 --------------------------------
 1 file changed, 689 deletions(-)
 delete mode 100644 classification/main_train_bu.py

diff --git a/classification/main_train_bu.py b/classification/main_train_bu.py
deleted file mode 100644
index 696cb68..0000000
--- a/classification/main_train_bu.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
-# Modified by Yu-Lun Wu, TUM
-
-import os
-import argparse
-import numpy as np
-from datetime import datetime 
-from tqdm import tqdm
-
-import torch
-import torch.optim as optim 
-import torchvision.transforms as transforms
-import torch.backends.cudnn as cudnn
-from torch.utils.data import DataLoader
-# from tensorboardX import SummaryWriter
-
-import shutil 
-import sys
-sys.path.append('../')
-
-from dataset import SEN12MS, ToTensor, Normalize
-from models.VGG import VGG16, VGG19
-from models.ResNet import ResNet50, ResNet50_1x1, ResNet101, ResNet152, Moco, Moco_1x1, Moco_1x1RND
-from models.DenseNet import DenseNet121, DenseNet161, DenseNet169, DenseNet201
-from metrics import MetricTracker, Precision_score, Recall_score, F1_score, \
-    F2_score, Hamming_loss, Subset_accuracy, Accuracy_score, One_error, \
-    Coverage_error, Ranking_loss, LabelAvgPrec_score, calssification_report, \
-    conf_mat_nor, get_AA, multi_conf_mat, OA_multi
-
-import wandb
-
-#sec.2 (done)
-    
-model_choices = ['VGG16', 'VGG19',
-                 'ResNet50','ResNet101','ResNet152', 'ResNet50_1x1',
-                 'DenseNet121','DenseNet161','DenseNet169','DenseNet201', 'Moco', 'Moco_1x1', 'Moco_1x1RND']
-label_choices = ['multi_label', 'single_label']
-
-# ----------------------- define and parse arguments --------------------------
-parser = argparse.ArgumentParser()
-
-# experiment name
-parser.add_argument('--exp_name', type=str, default=None,
-                    help="experiment name. will be used in the path names \
-                         for log- and savefiles. If no input experiment name, \
-                         path would be set to model name.")
-
-# data directory
-parser.add_argument('--data_dir', type=str, default=None,
-                    help='path to SEN12MS dataset')
-parser.add_argument('--label_split_dir', type=str, default=None,
-                    help="path to label data and split list")
-parser.add_argument('--data_size', type=str, default="full",
-                    help="64, 128, 256, 1000, 1024, full")
-# input/output
-parser.add_argument('--use_s2', action='store_true', default=False,
-                    help='use sentinel-2 bands')
-parser.add_argument('--use_s1', action='store_true', default=False,
-                    help='use sentinel-1 data')
-parser.add_argument('--use_RGB', action='store_true', default=False,
-                    help='use sentinel-2 RGB bands')
-parser.add_argument('--IGBP_simple', action='store_true', default=True,
-                    help='use IGBP simplified scheme; otherwise: IGBP original scheme')
-parser.add_argument('--label_type', type=str, choices = label_choices,
-                    default='multi_label',
-                    help="label-type (default: multi_label)")
-parser.add_argument('--threshold', type=float, default=0.1, 
-                    help='threshold to convert probability-labels to multi-hot \
-                    labels, mean/std for normalizatin would not be accurate \
-                    if the threshold is larger than 0.22. \
-                    for single_label threshold would be ignored')
-parser.add_argument('--eval', action='store_true', default=False,
-                    help='evaluate against test set')
-
-# network
-parser.add_argument('--model', type=str, choices = model_choices,
-                    default='ResNet50',
-                    help="network architecture (default: ResNet50)")
-
-# training hyperparameters
-parser.add_argument('--lr', type=float, default=0.001, 
-                    help='initial learning rate')
-parser.add_argument('--use_lr_step', action='store_true', default=False,
-                    help='use learning rate steps')
-parser.add_argument('--lr_step_size', type=int, default=25,
-                    help='Learning rate step size')
-parser.add_argument('--lr_step_gamma', type=float, default=0.1,
-                    help='Learning rate step gamma')
-parser.add_argument('--decay', type=float, default=1e-5,
-                    help='decay rate')
-parser.add_argument('--batch_size', type=int, default=64,
-                    help='mini-batch size (default: 64)')
-parser.add_argument('--num_workers',type=int, default=4,
-                    help='num_workers for data loading in pytorch')
-parser.add_argument('--epochs', type=int, default=100,
-                    help='number of training epochs (default: 100)')
-parser.add_argument('--resume', '-r', type=str, default=None,
-                    help='path to the pretrained weights file', )
-parser.add_argument('--pt_dir', '-pd', type=str, default=None,
-                    help='directory for pretrained model', )
-parser.add_argument('--pt_name', '-pn', type=str, default=None,
-                    help='model name without extension', )
-parser.add_argument('--pt_type', '-pt', type=str, default='bb',
-                    help='bb (backbone) or qe (query encoder)', )
-
-args = parser.parse_args()
-
-wandb.init(config=args)
-
-# -------------------- set directory for saving files -------------------------
-
-if wandb.run is not None:
-    # save to wandb run dir for tracking and saving the models
-    checkpoint_dir = wandb.run.dir
-    logs_dir = wandb.run.dir
-elif args.exp_name:
-    checkpoint_dir = os.path.join('./', args.exp_name, 'checkpoints')
-    logs_dir = os.path.join('./', args.exp_name, 'logs')
-else:
-    checkpoint_dir = os.path.join('./', args.model, 'checkpoints')
-    logs_dir = os.path.join('./', args.model, 'logs')
-
-if not os.path.isdir(checkpoint_dir):
-    os.makedirs(checkpoint_dir)
-if not os.path.isdir(logs_dir):
-    os.makedirs(logs_dir)
-
-# ----------------------------- saving files ---------------------------------
-def write_arguments_to_file(args, filename):
-    with open(filename, 'w') as f:
-        for key, value in vars(args).items():
-            f.write('%s: %s\n' % (key, str(value)))
-
-def save_checkpoint(state, is_best, name):
-
-    filename = os.path.join(checkpoint_dir, name + '_checkpoint.pth')
-
-    torch.save(state, filename)
-    if is_best:
-        shutil.copyfile(filename, os.path.join(checkpoint_dir, name + 
-                                               '_model_best.pth'))
-        
-# -------------------------------- Main Program -------------------------------
-def main():
-    global args
-    
-    # save configuration to file
-    sv_name = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')
-    print('saving file name is ', sv_name)
-
-    write_arguments_to_file(args, os.path.join(logs_dir, sv_name+'_arguments.txt'))
-
-# ----------------------------------- data
-    # define mean/std of the training set (for data normalization)
-    label_type = args.label_type
-        
-    bands_mean = {'s1_mean': [-11.76858, -18.294598],
-                  's2_mean': [1226.4215, 1137.3799, 1139.6792, 1350.9973, 1932.9058,
-                              2211.1584, 2154.9846, 2409.1128, 2001.8622, 1356.0801]}
-                  
-    bands_std = {'s1_std': [4.525339, 4.3586307],
-                 's2_std': [741.6254, 740.883, 960.1045, 946.76056, 985.52747,
-                            1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]} 
-
-    
-    # load datasets 
-    imgTransform = transforms.Compose([ToTensor(),Normalize(bands_mean, bands_std)])
-    
-    train_dataGen = SEN12MS(args.data_dir, args.label_split_dir, 
-                            imgTransform=imgTransform, 
-                            label_type=label_type, threshold=args.threshold, subset="train", 
-                            use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
-                            IGBP_s=args.IGBP_simple, data_size=args.data_size)
-    
-    val_dataGen = SEN12MS(args.data_dir, args.label_split_dir, 
-                          imgTransform=imgTransform, 
-                          label_type=label_type, threshold=args.threshold, subset="val", 
-                          use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
-                          IGBP_s=args.IGBP_simple, data_size=args.data_size)
-
-    if args.eval:
-        test_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
-                               imgTransform=imgTransform,
-                               label_type=label_type, threshold=args.threshold, subset="test",
-                               use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
-                               IGBP_s=args.IGBP_simple)
-
-
-    
-    # number of input channels
-    n_inputs = train_dataGen.n_inputs 
-    print('input channels =', n_inputs)
-    
-    # set up dataloaders
-    train_data_loader = DataLoader(train_dataGen, 
-                                   batch_size=args.batch_size, 
-                                   num_workers=args.num_workers, 
-                                   shuffle=True, 
-                                   pin_memory=True)
-    val_data_loader = DataLoader(val_dataGen, 
-                                 batch_size=args.batch_size, 
-                                 num_workers=args.num_workers, 
-                                 shuffle=False, 
-                                 pin_memory=True)
-
-    if args.eval:
-        test_data_loader = DataLoader(test_dataGen,
-                                  batch_size=args.batch_size,
-                                  num_workers=args.num_workers,
-                                  shuffle=False,
-                                  pin_memory=True)
-
-# -------------------------------- ML setup
-    # cuda
-    use_cuda = torch.cuda.is_available()
-    if use_cuda:
-        torch.backends.cudnn.enabled = True
-        cudnn.benchmark = True
-
-    # define number of classes
-    if args.IGBP_simple:
-        numCls = 10
-        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
-    else:
-        numCls = 17
-        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
-                      '11', '12', '13', '14', '15', '16', '17']
-    
-    print('num_class: ', numCls)
-
-    # define model
-    if args.model == 'VGG16':
-        model = VGG16(n_inputs, numCls)
-    elif args.model == 'VGG19':
-        model = VGG19(n_inputs, numCls)
-    elif args.model == 'ResNet50':
-        model = ResNet50(n_inputs, numCls)
-    elif args.model == 'ResNet50_1x1':
-        model = ResNet50_1x1(n_inputs, numCls)
-    elif args.model == 'ResNet101':
-        model = ResNet101(n_inputs, numCls)
-    elif args.model == 'ResNet152':
-        model = ResNet152(n_inputs, numCls)
-    elif args.model == 'DenseNet121':
-        model = DenseNet121(n_inputs, numCls)
-    elif args.model == 'DenseNet161':
-        model = DenseNet161(n_inputs, numCls)
-    elif args.model == 'DenseNet169':
-        model = DenseNet169(n_inputs, numCls)
-    elif args.model == 'DenseNet201':
-        model = DenseNet201(n_inputs, numCls)
-    # finetune moco pre-trained model
-    elif args.model.startswith("Moco"):
-        pt_path = os.path.join(args.pt_dir, f"{args.pt_name}_{args.pt_type}_converted.pth")
-        assert os.path.exists(pt_path)
-        if args.model == 'Moco':
-            print("transfer backbone weights but no conv 1x1 input module")
-            model = Moco(torch.load(pt_path), n_inputs, numCls)
-        elif args.model == 'Moco_1x1':
-            print("transfer backbone weights and input module weights")
-            model = Moco_1x1(torch.load(pt_path), n_inputs, numCls)
-        elif args.model == 'Moco_1x1RND':
-            print("transfer backbone weights but initialize input module random with random weights")
-            model = Moco_1x1(torch.load(pt_path), n_inputs, numCls)
-        else:  # Assume Moco2 at present
-            raise NameError("no model")
-
-    else:
-        raise NameError("no model")
-
-    # move model to GPU if is available
-    if use_cuda:
-        model = model.cuda() 
-
-    # define loss function
-    if label_type == 'multi_label':
-        lossfunc = torch.nn.BCEWithLogitsLoss()
-    else:
-        lossfunc = torch.nn.CrossEntropyLoss()
-
-    print(model)
-
-    # Freezing Conv1x1 and Encoder
-    # list(model.children())[0].training
-    # child_seq = list(model.children())[0]
-    # child_seq.training = False
-
-    # # Freezing Conv1x1
-    # child_conv1x1 = list(list(model.children())[0])[0]
-    # child_conv1x1.training = False
-
-    # Freezing first n layers
-    lyer = 0
-    nl = 4
-    for child  in list(model.children())[0]:
-        child.training = False
-        lyer += 1
-        if lyer > nl:
-            break
-
-
-
-
-    # set up optimizer
-    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
-
-    best_acc = 0
-    start_epoch = 0
-    if args.resume:
-        if os.path.isfile(args.resume):
-            print("=> loading checkpoint '{}'".format(args.resume))
-            checkpoint = torch.load(args.resume)
-            checkpoint_nm = os.path.basename(args.resume)
-            sv_name = checkpoint_nm.split('_')[0] + '_' + checkpoint_nm.split('_')[1]
-            print('saving file name is ', sv_name)
-
-            if checkpoint['epoch'] > start_epoch:
-                start_epoch = checkpoint['epoch']
-            best_acc = checkpoint['best_prec']
-            model.load_state_dict(checkpoint['state_dict'])
-            optimizer.load_state_dict(checkpoint['optimizer'])
-            print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
-        else:
-            print("=> no checkpoint found at '{}'".format(args.resume))
-
-
-    # set up tensorboard logging
-    # train_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'training'))
-    # val_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'val'))
-
-
-# ----------------------------- executing Train/Val. 
-    # train network
-    # wandb.watch(model, log="all")
-
-    scheduler = None
-    if args.use_lr_step:
-        # Ex: If initial Lr is 0.0001, step size is 25, and gamma is 0.1, then lr will be changed for every 20 steps
-        # 0.0001 - first 25 epochs
-        # 0.00001 - 25 to 50 epochs
-        # 0.000001 - 50 to 75 epochs
-        # 0.0000001 - 75 to 100 epochs
-        # https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
-         scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_step_gamma)
-
-    for epoch in range(start_epoch, args.epochs):
-        if args.use_lr_step:
-            scheduler.step()
-            print('Epoch {}/{} lr: {}'.format(epoch, args.epochs - 1, optimizer.param_groups[0]['lr']))
-        else:
-            print('Epoch {}/{}'.format(epoch, args.epochs - 1))
-        print('-' * 25)
-
-        train(train_data_loader, model, optimizer, lossfunc, label_type, epoch, use_cuda)
-        micro_f1 = val(val_data_loader, model, optimizer, label_type, epoch, use_cuda)
-
-        is_best_acc = micro_f1 > best_acc
-        best_acc = max(best_acc, micro_f1)
-
-        save_checkpoint({
-            'epoch': epoch,
-            'arch': args.model,
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'best_prec': best_acc
-            }, is_best_acc, sv_name)
-
-        wandb.log({'epoch': epoch, 'micro_f1': micro_f1})
-
-    print("=============")
-    print("done training")
-    print("=============")
-
-    if args.eval:
-        eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS)
-
-def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
-
-    model.eval()
-    # define metrics
-    prec_score_ = Precision_score()
-    recal_score_ = Recall_score()
-    f1_score_ = F1_score()
-    f2_score_ = F2_score()
-    hamming_loss_ = Hamming_loss()
-    subset_acc_ = Subset_accuracy()
-    acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
-    one_err_ = One_error()
-    coverage_err_ = Coverage_error()
-    rank_loss_ = Ranking_loss()
-    labelAvgPrec_score_ = LabelAvgPrec_score()
-
-    calssification_report_ = calssification_report(ORG_LABELS)
-
-    # -------------------------------- prediction
-    y_true = []
-    predicted_probs = []
-
-    with torch.no_grad():
-        for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
-
-            # unpack sample
-            bands = data["image"]
-            labels = data["label"]
-
-            # move data to gpu if model is on gpu
-            if use_cuda:
-                bands = bands.to(torch.device("cuda"))
-                # labels = labels.to(torch.device("cuda"))
-
-            # forward pass
-            logits = model(bands)
-
-            # convert logits to probabilies
-            if label_type == 'multi_label':
-                probs = torch.sigmoid(logits).cpu().numpy()
-            else:
-                sm = torch.nn.Softmax(dim=1)
-                probs = sm(logits).cpu().numpy()
-
-            labels = labels.cpu().numpy()  # keep true & pred label at same loc.
-            predicted_probs += list(probs)
-            y_true += list(labels)
-
-    predicted_probs = np.asarray(predicted_probs)
-    # convert predicted probabilities into one/multi-hot labels
-    if label_type == 'multi_label':
-        y_predicted = (predicted_probs >= 0.5).astype(np.float32)
-    else:
-        loc = np.argmax(predicted_probs, axis=-1)
-        y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
-        for i in range(len(loc)):
-            y_predicted[i, loc[i]] = 1
-
-    y_true = np.asarray(y_true)
-
-    # --------------------------- evaluation with metrics
-    # general
-    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
-    hamming_loss = hamming_loss_(y_predicted, y_true)
-    subset_acc = subset_acc_(y_predicted, y_true)
-    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
-    # ranking-based
-    one_error = one_err_(predicted_probs, y_true)
-    coverage_error = coverage_err_(predicted_probs, y_true)
-    rank_loss = rank_loss_(predicted_probs, y_true)
-    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
-
-    cls_report = calssification_report_(y_predicted, y_true)
-
-    if label_type == 'multi_label':
-        [conf_mat, cls_acc, aa] = multi_conf_mat(y_predicted, y_true, n_classes=numCls)
-        # the results derived from multilabel confusion matrix are not recommended to use
-        oa = OA_multi(y_predicted, y_true)
-        # this oa can be Jaccard index
-
-        info = {
-            "macroPrec": macro_prec,
-            "microPrec": micro_prec,
-            "samplePrec": sample_prec,
-            "macroRec": macro_rec,
-            "microRec": micro_rec,
-            "sampleRec": sample_rec,
-            "macroF1": macro_f1,
-            "microF1": micro_f1,
-            "sampleF1": sample_f1,
-            "macroF2": macro_f2,
-            "microF2": micro_f2,
-            "sampleF2": sample_f2,
-            "HammingLoss": hamming_loss,
-            "subsetAcc": subset_acc,
-            "macroAcc": macro_acc,
-            "microAcc": micro_acc,
-            "sampleAcc": sample_acc,
-            "oneError": one_error,
-            "coverageError": coverage_error,
-            "rankLoss": rank_loss,
-            "labelAvgPrec": labelAvgPrec,
-            "clsReport": cls_report,
-            "multilabel_conf_mat": conf_mat,
-            "class-wise Acc": cls_acc,
-            "AverageAcc": aa,
-            "OverallAcc": oa}
-
-    else:
-        conf_mat = conf_mat_nor(y_predicted, y_true, n_classes=numCls)
-        aa = get_AA(y_predicted, y_true, n_classes=numCls)  # average accuracy, \
-        # zero-sample classes are not excluded
-
-        info = {
-            "macroPrec": macro_prec,
-            "microPrec": micro_prec,
-            "samplePrec": sample_prec,
-            "macroRec": macro_rec,
-            "microRec": micro_rec,
-            "sampleRec": sample_rec,
-            "macroF1": macro_f1,
-            "microF1": micro_f1,
-            "sampleF1": sample_f1,
-            "macroF2": macro_f2,
-            "microF2": micro_f2,
-            "sampleF2": sample_f2,
-            "HammingLoss": hamming_loss,
-            "subsetAcc": subset_acc,
-            "macroAcc": macro_acc,
-            "microAcc": micro_acc,
-            "sampleAcc": sample_acc,
-            "oneError": one_error,
-            "coverageError": coverage_error,
-            "rankLoss": rank_loss,
-            "labelAvgPrec": labelAvgPrec,
-            "clsReport": cls_report,
-            "conf_mat": conf_mat,
-            "AverageAcc": aa}
-
-    wandb.run.summary.update(info)
-    print("saving metrics...")
-    # pkl.dump(info, open("test_scores.pkl", "wb"))
-
-
-def train(trainloader, model, optimizer, lossfunc, label_type, epoch, use_cuda):
-
-    lossTracker = MetricTracker()
-
-    # set model to train mode
-    model.train()
-
-
-    # main training loop
-    for idx, data in enumerate(tqdm(trainloader, desc="training")):
-        
-        numSample = data["image"].size(0)
-        
-        # unpack sample
-        bands = data["image"]
-        if label_type == 'multi_label':
-            labels = data["label"]
-        else:
-           labels = (torch.max(data["label"], 1)[1]).type(torch.long) 
-               
-        # move data to gpu if model is on gpu
-        if use_cuda:
-            bands = bands.to(torch.device("cuda"))
-            labels = labels.to(torch.device("cuda"))
-        
-        # reset gradients
-        optimizer.zero_grad()
-        
-        # forward pass
-        logits = model(bands)
-        loss = lossfunc(logits, labels)
-        
-        # backward pass
-        loss.backward()
-        optimizer.step()
-        
-        #
-        lossTracker.update(loss.item(), numSample)
-
-    # train_writer.add_scalar("loss", lossTracker.avg, epoch)
-    wandb.log({'loss': lossTracker.avg, 'epoch': epoch})
-
-    print('Train loss: {:.6f}'.format(lossTracker.avg))
-
-    
-def val(valloader, model, optimizer, label_type, epoch, use_cuda):
-
-    prec_score_ = Precision_score()
-    recal_score_ = Recall_score()
-    f1_score_ = F1_score()
-    f2_score_ = F2_score()
-    hamming_loss_ = Hamming_loss()
-    subset_acc_ = Subset_accuracy()
-    acc_score_ = Accuracy_score()
-    one_err_ = One_error()
-    coverage_err_ = Coverage_error()
-    rank_loss_ = Ranking_loss()
-    labelAvgPrec_score_ = LabelAvgPrec_score()
-
-    # set model to evaluation mode
-    model.eval()
-    
-    # main validation loop
-    y_true = []
-    predicted_probs = []
-
-    with torch.no_grad():
-        for batch_idx, data in enumerate(tqdm(valloader, desc="validation")):
-
-            # unpack sample
-            bands = data["image"]
-            labels = data["label"]
-    
-            # move data to gpu if model is on gpu
-            if use_cuda:
-                bands = bands.to(torch.device("cuda"))
-                #labels = labels.to(torch.device("cuda"))
-            
-            # forward pass 
-            logits = model(bands)
-            
-            # convert logits to probabilies
-            if label_type == 'multi_label':
-                probs = torch.sigmoid(logits).cpu().numpy()
-            else:
-                sm = torch.nn.Softmax(dim=1)
-                probs = sm(logits).cpu().numpy()
-                  
-            labels = labels.cpu().numpy() # keep true & pred label at same loc.
-            predicted_probs += list(probs)
-            y_true += list(labels)
-            
-        
-    predicted_probs = np.asarray(predicted_probs)
-    # convert predicted probabilities into one/multi-hot labels 
-    if label_type == 'multi_label':
-        y_predicted = (predicted_probs >= 0.5).astype(np.float32)
-    else:
-        loc = np.argmax(predicted_probs, axis=-1)
-        y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
-        for i in range(len(loc)):
-            y_predicted[i,loc[i]] = 1
-        
-    y_true = np.asarray(y_true)
-    
-
-    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
-    hamming_loss = hamming_loss_(y_predicted, y_true)
-    subset_acc = subset_acc_(y_predicted, y_true)
-    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
-
-    # Note that below 4 ranking-based metrics are not applicable to single-label
-    # (multi-class) classification, but they will still show the scores during 
-    # validation on tensorboard
-    one_error = one_err_(predicted_probs, y_true)
-    coverage_error = coverage_err_(predicted_probs, y_true)
-    rank_loss = rank_loss_(predicted_probs, y_true)
-    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
-
-    info = {
-            "macroPrec" : macro_prec,
-            "microPrec" : micro_prec,
-            "samplePrec" : sample_prec,
-            "macroRec" : macro_rec,
-            "microRec" : micro_rec,
-            "sampleRec" : sample_rec,
-            "macroF1" : macro_f1,
-            "microF1" : micro_f1,
-            "sampleF1" : sample_f1,
-            "macroF2" : macro_f2,
-            "microF2" : micro_f2,
-            "sampleF2" : sample_f2,
-            "HammingLoss" : hamming_loss,
-            "subsetAcc" : subset_acc,
-            "macroAcc" : macro_acc,
-            "microAcc" : micro_acc,
-            "sampleAcc" : sample_acc,
-            "oneError" : one_error,
-            "coverageError" : coverage_error,
-            "rankLoss" : rank_loss,
-            "labelAvgPrec" : labelAvgPrec
-            }
-
-    wandb.run.summary.update(info)
-    for tag, value in info.items():
-        wandb.log({tag: value, 'epoch': epoch})
-        # val_writer.add_scalar(tag, value, epoch)
-
-    print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
-            micro_prec,
-            micro_f1,
-            sample_f1,
-            micro_f2,
-            sample_f2
-            ))
-    return micro_f1
-
-
-
-if __name__ == "__main__":
-    main()
-    
-    
\ No newline at end of file

From cf842556e15219424221c2b7833f13e7dfdd10f6 Mon Sep 17 00:00:00 2001
From: Ernesto Oropeza <54602115+oropezaev@users.noreply.github.com>
Date: Tue, 6 Apr 2021 09:11:48 -0500
Subject: [PATCH 8/9] Delete tsne_from_main_train.py

---
 classification/tsne_from_main_train.py | 726 -------------------------
 1 file changed, 726 deletions(-)
 delete mode 100644 classification/tsne_from_main_train.py

diff --git a/classification/tsne_from_main_train.py b/classification/tsne_from_main_train.py
deleted file mode 100644
index c52a9e2..0000000
--- a/classification/tsne_from_main_train.py
+++ /dev/null
@@ -1,726 +0,0 @@
-# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
-# Modified by Yu-Lun Wu, TUM
-
-import os
-import argparse
-import numpy as np
-from datetime import datetime 
-from tqdm import tqdm
-
-
-import matplotlib.pyplot as plt
-from matplotlib import colors
-import matplotlib
-
-
-# import sklearn
-from sklearn.manifold import TSNE
-
-import torch
-import torch.optim as optim 
-import torchvision.transforms as transforms
-import torch.backends.cudnn as cudnn
-from torch.utils.data import DataLoader
-# from tensorboardX import SummaryWriter
-
-import shutil 
-import sys
-sys.path.append('../')
-
-
-from dataset import SEN12MS, ToTensor, Normalize
-from models.VGG import VGG16, VGG19
-from models.ResNet import ResNet50, ResNet101, ResNet152, Moco, Moco_1x1, ResNet50_1x1
-from models.DenseNet import DenseNet121, DenseNet161, DenseNet169, DenseNet201
-from metrics import MetricTracker, Precision_score, Recall_score, F1_score, \
-    F2_score, Hamming_loss, Subset_accuracy, Accuracy_score, One_error, \
-    Coverage_error, Ranking_loss, LabelAvgPrec_score, calssification_report, \
-    conf_mat_nor, get_AA, multi_conf_mat, OA_multi
-
-import wandb
-
-#sec.2 (done)
-    
-model_choices = ['VGG16', 'VGG19',
-                 'ResNet50','ResNet101','ResNet152',
-                 'DenseNet121','DenseNet161','DenseNet169','DenseNet201', 'Moco']
-label_choices = ['multi_label', 'single_label']
-
-# ----------------------- define and parse arguments --------------------------
-parser = argparse.ArgumentParser()
-
-# experiment name
-parser.add_argument('--exp_name', type=str, default=None,
-                    help="experiment name. will be used in the path names \
-                         for log- and savefiles. If no input experiment name, \
-                         path would be set to model name.")
-
-# data directory
-parser.add_argument('--data_dir', type=str, default=None,
-                    help='path to SEN12MS dataset')
-parser.add_argument('--label_split_dir', type=str, default=None,
-                    help="path to label data and split list")
-parser.add_argument('--data_size', type=str, default="full",
-                    help="64, 128, 256, 1000, 1024, full")
-# input/output
-parser.add_argument('--use_s2', action='store_true', default=False,
-                    help='use sentinel-2 bands')
-parser.add_argument('--use_s1', action='store_true', default=False,
-                    help='use sentinel-1 data')
-parser.add_argument('--use_RGB', action='store_true', default=False,
-                    help='use sentinel-2 RGB bands')
-parser.add_argument('--IGBP_simple', action='store_true', default=True,
-                    help='use IGBP simplified scheme; otherwise: IGBP original scheme')
-parser.add_argument('--label_type', type=str, choices = label_choices,
-                    default='multi_label',
-                    help="label-type (default: multi_label)")
-parser.add_argument('--threshold', type=float, default=0.1, 
-                    help='threshold to convert probability-labels to multi-hot \
-                    labels, mean/std for normalizatin would not be accurate \
-                    if the threshold is larger than 0.22. \
-                    for single_label threshold would be ignored')
-parser.add_argument('--eval', action='store_true', default=False,
-                    help='evaluate against test set')
-
-# network
-parser.add_argument('--model', type=str, choices = model_choices,
-                    default='ResNet50',
-                    help="network architecture (default: ResNet50)")
-
-# training hyperparameters
-parser.add_argument('--lr', type=float, default=0.001, 
-                    help='initial learning rate')
-parser.add_argument('--decay', type=float, default=1e-5, 
-                    help='decay rate')
-parser.add_argument('--batch_size', type=int, default=64,
-                    help='mini-batch size (default: 64)')
-parser.add_argument('--num_workers',type=int, default=4,
-                    help='num_workers for data loading in pytorch')
-parser.add_argument('--epochs', type=int, default=100,
-                    help='number of training epochs (default: 100)')
-parser.add_argument('--resume', '-r', type=str, default=None,
-                    help='path to the pretrained weights file', )
-parser.add_argument('--pt_dir', '-pd', type=str, default=None,
-                    help='directory for pretrained model', )
-parser.add_argument('--pt_name', '-pn', type=str, default=None,
-                    help='model name without extension', )
-parser.add_argument('--pt_type', '-pt', type=str, default=None,
-                    help='model name without extension', )
-
-args = parser.parse_args()
-
-wandb.init(config=args)
-
-# -------------------- set directory for saving files -------------------------
-
-if wandb.run is not None:
-    # save to wandb run dir for tracking and saving the models
-    checkpoint_dir = wandb.run.dir
-    logs_dir = wandb.run.dir
-elif args.exp_name:
-    checkpoint_dir = os.path.join('./', args.exp_name, 'checkpoints')
-    logs_dir = os.path.join('./', args.exp_name, 'logs')
-else:
-    checkpoint_dir = os.path.join('./', args.model, 'checkpoints')
-    logs_dir = os.path.join('./', args.model, 'logs')
-
-if not os.path.isdir(checkpoint_dir):
-    os.makedirs(checkpoint_dir)
-if not os.path.isdir(logs_dir):
-    os.makedirs(logs_dir)
-
-# ----------------------------- saving files ---------------------------------
-def write_arguments_to_file(args, filename):
-    with open(filename, 'w') as f:
-        for key, value in vars(args).items():
-            f.write('%s: %s\n' % (key, str(value)))
-
-def save_checkpoint(state, is_best, name):
-
-    filename = os.path.join(checkpoint_dir, name + '_checkpoint.pth')
-
-    torch.save(state, filename)
-    if is_best:
-        shutil.copyfile(filename, os.path.join(checkpoint_dir, name + 
-                                               '_model_best.pth'))
-        
-# -------------------------------- Main Program -------------------------------
-def main():
-    global args
-    
-    # save configuration to file
-    sv_name = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')
-    print('saving file name is ', sv_name)
-
-    write_arguments_to_file(args, os.path.join(logs_dir, sv_name+'_arguments.txt'))
-
-# ----------------------------------- data
-    # define mean/std of the training set (for data normalization)
-    label_type = args.label_type
-        
-    bands_mean = {'s1_mean': [-11.76858, -18.294598],
-                  's2_mean': [1226.4215, 1137.3799, 1139.6792, 1350.9973, 1932.9058,
-                              2211.1584, 2154.9846, 2409.1128, 2001.8622, 1356.0801]}
-                  
-    bands_std = {'s1_std': [4.525339, 4.3586307],
-                 's2_std': [741.6254, 740.883, 960.1045, 946.76056, 985.52747,
-                            1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]} 
-
-    print(os.listdir(args.data_dir))
-    
-    # load datasets 
-    imgTransform = transforms.Compose([ToTensor(),Normalize(bands_mean, bands_std)])
-    
-    # train_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
-    #                         imgTransform=imgTransform,
-    #                         label_type=label_type, threshold=args.threshold, subset="train",
-    #                         use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
-    #                         IGBP_s=args.IGBP_simple, data_size=args.data_size)
-    #
-    # val_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
-    #                       imgTransform=imgTransform,
-    #                       label_type=label_type, threshold=args.threshold, subset="val",
-    #                       use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
-    #                       IGBP_s=args.IGBP_simple, data_size=args.data_size)
-
-    if args.eval:
-        test_dataGen = SEN12MS(args.data_dir, args.label_split_dir,
-                               imgTransform=imgTransform,
-                               label_type=label_type, threshold=args.threshold, subset="test",
-                               use_s1=args.use_s1, use_s2=args.use_s2, use_RGB=args.use_RGB,
-                               IGBP_s=args.IGBP_simple)
-
-    # number of input channels
-    # n_inputs = train_dataGen.n_inputs
-    n_inputs = test_dataGen.n_inputs
-    # print('input channels =', n_inputs)
-    
-    # set up dataloaders
-    # train_data_loader = DataLoader(train_dataGen,
-    #                                batch_size=args.batch_size,
-    #                                num_workers=args.num_workers,
-    #                                shuffle=True,
-    #                                pin_memory=True)
-    # val_data_loader = DataLoader(val_dataGen,
-    #                              batch_size=args.batch_size,
-    #                              num_workers=args.num_workers,
-    #                              shuffle=False,
-    #                              pin_memory=True)
-
-    if args.eval:
-        test_data_loader = DataLoader(test_dataGen,
-                                  batch_size=args.batch_size,
-                                  num_workers=args.num_workers,
-                                  shuffle=False,
-                                  pin_memory=True)
-
-# -------------------------------- ML setup
-    # cuda
-    use_cuda = torch.cuda.is_available()
-    if use_cuda:
-        torch.backends.cudnn.enabled = True
-        cudnn.benchmark = True
-
-    # define number of classes
-    if args.IGBP_simple:
-        numCls = 10
-        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
-    else:
-        numCls = 17
-        ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
-                      '11', '12', '13', '14', '15', '16', '17']
-    
-    print('num_class: ', numCls)
-
-    # define model
-    if args.model == 'VGG16':
-        model = VGG16(n_inputs, numCls)
-    elif args.model == 'VGG19':
-        model = VGG19(n_inputs, numCls)
-    elif args.model == 'ResNet50':
-        model = ResNet50(n_inputs, numCls)
-    elif args.model == 'ResNet101':
-        model = ResNet101(n_inputs, numCls)
-    elif args.model == 'ResNet152':
-        model = ResNet152(n_inputs, numCls)
-    elif args.model == 'DenseNet121':
-        model = DenseNet121(n_inputs, numCls)
-    elif args.model == 'DenseNet161':
-        model = DenseNet161(n_inputs, numCls)
-    elif args.model == 'DenseNet169':
-        model = DenseNet169(n_inputs, numCls)
-    elif args.model == 'DenseNet201':
-        model = DenseNet201(n_inputs, numCls)
-    # finetune moco pre-trained model
-    elif args.model == 'Moco':
-        pt_path = os.path.join(args.pt_dir, f"{args.pt_name}_{args.pt_type}_converted.pth")
-        assert os.path.exists(pt_path)
-        model = Moco(torch.load(pt_path), n_inputs, numCls)
-    else:
-        raise NameError("no model")
-
-    # move model to GPU if is available
-    if use_cuda:
-        model = model.cuda() 
-
-    # # define loss function
-    # if label_type == 'multi_label':
-    #     lossfunc = torch.nn.BCEWithLogitsLoss()
-    # else:
-    #     lossfunc = torch.nn.CrossEntropyLoss()
-    #
-    print(model)
-
-    # model.encoder = torch.nn.Sequential(*[model.encoder[i] for i in range(8)])
-    print(model.encoder)
-
-    # print(model)
-    # # set up optimizer
-    # optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
-
-    # best_acc = 0
-    # start_epoch = 0
-    # if args.resume:
-    #     if os.path.isfile(args.resume):
-    #         print("=> loading checkpoint '{}'".format(args.resume))
-    #         checkpoint = torch.load(args.resume)
-    #         checkpoint_nm = os.path.basename(args.resume)
-    #         sv_name = checkpoint_nm.split('_')[0] + '_' + checkpoint_nm.split('_')[1]
-    #         print('saving file name is ', sv_name)
-    #
-    #         if checkpoint['epoch'] > start_epoch:
-    #             start_epoch = checkpoint['epoch']
-    #         best_acc = checkpoint['best_prec']
-    #         model.load_state_dict(checkpoint['state_dict'])
-    #         optimizer.load_state_dict(checkpoint['optimizer'])
-    #         print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
-    #     else:
-    #         print("=> no checkpoint found at '{}'".format(args.resume))
-
-
-    # set up tensorboard logging
-    # train_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'training'))
-    # val_writer = SummaryWriter(os.path.join(logs_dir, 'runs', sv_name, 'val'))
-
-
-# ----------------------------- executing Train/Val. 
-    # train network
-
-    # wandb.watch(model, log="all")
-    # for epoch in range(start_epoch, args.epochs):
-    #
-    #     print('Epoch {}/{}'.format(epoch, args.epochs - 1))
-    #     print('-' * 10)
-    #
-    #     train(train_data_loader, model, optimizer, lossfunc, label_type, epoch, use_cuda)
-    #     micro_f1 = val(val_data_loader, model, optimizer, label_type, epoch, use_cuda)
-    #
-    #     is_best_acc = micro_f1 > best_acc
-    #     best_acc = max(best_acc, micro_f1)
-    #
-    #     save_checkpoint({
-    #         'epoch': epoch,
-    #         'arch': args.model,
-    #         'model_state_dict': model.state_dict(),
-    #         'optimizer_state_dict': optimizer.state_dict(),
-    #         'best_prec': best_acc
-    #         }, is_best_acc, sv_name)
-    #
-    #     wandb.log({'epoch': epoch, 'micro_f1': micro_f1})
-    #
-    # print("=============")
-    # print("done training")
-    # print("=============")
-
-    # model.eval()
-    model.encoder.eval()
-
-    y_true = []
-    predicted_output = []
-    with torch.no_grad():
-        for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
-
-            # unpack sample
-            bands = data["image"]
-            labels = data["label"]
-
-            # move data to gpu if model is on gpu
-            if use_cuda:
-                bands = bands.to(torch.device("cuda"))
-                # labels = labels.to(torch.device("cuda"))
-
-            # forward pass
-            logits = model.encoder(bands)
-
-            outputs = logits.cpu().numpy()
-            predicted_output += list(outputs.reshape(data['image'].shape[0],2048))
-
-            labels = labels.cpu().numpy()  # keep true & pred label at same loc.
-            y_true += list(labels)
-
-
-    X = np.array(predicted_output)
-    # print(f'Activation Vector Shape: {X.shape}')
-    # # images = np.concatenate(images)
-    # # print(f'Image Vector Shape: {images.shape}')
-    #
-    class_simp = ['Forest', 'Shrublands', 'Savana', 'Grassland', 'Wetlands',
-                  'Croplands', 'Urban and Built-Up Lands','Permanent Snow and Ice',
-                  'Barren','Water Bodies']
-
-    color10 = ['#009900','#c6b044','#fbff13', '#b6ff05',
-               '#27ff87','#c24f44','#a5a5a5','#69fff8',
-               '#f9ffa4','#1c0dff']
-
-    color = []
-    for i in range(10):
-        color.append(np.array(y_true)[:,i] == 1)
-    color = np.array(color)
-
-    tsne = TSNE(n_components=2, learning_rate=150, perplexity=30, angle=0.2, verbose=2).fit_transform(X)
-    #
-
-    samples = np.random.choice([True, False], size=X.shape[0], p=[1.0,0.0])
-    color_sam = color[:,samples]
-    tx, ty = tsne[samples,0], tsne[samples,1]
-    tx = (tx-np.min(tx)) / (np.max(tx) - np.min(tx))
-    ty = (ty-np.min(ty)) / (np.max(ty) - np.min(ty))
-    tsne[samples,0] = tx
-    tsne[samples,1] = ty
-
-
-
-    fig = plt.figure(figsize=(20,10))
-
-    for j in range(10):
-        plt.scatter(tsne[samples,0][color_sam[j]], tsne[samples,1][color_sam[j]],
-                    c=color10[j],label = class_simp[j])
-
-    plt.yticks([])
-    plt.xticks([])
-    plt.grid(False)
-    plt.title('TSNE Output MoCo', fontsize=14, color= 'black')
-    plt.legend()
-
-    # cb = plt.colorbar(cax = fig.add_axes([0.92, 0.2, 0.025, 0.6]))
-    # cb.set_ticklabels(class_simp)
-    # loc = np.arange(0.5,10.5,1)
-    # cb.set_ticks(loc)
-    plt.show()
-    print('done...')
-
-    # if args.eval:
-    #     eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS)
-
-# def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
-#
-#     model.eval()
-#     # define metrics
-#     prec_score_ = Precision_score()
-#     recal_score_ = Recall_score()
-#     f1_score_ = F1_score()
-#     f2_score_ = F2_score()
-#     hamming_loss_ = Hamming_loss()
-#     subset_acc_ = Subset_accuracy()
-#     acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
-#     one_err_ = One_error()
-#     coverage_err_ = Coverage_error()
-#     rank_loss_ = Ranking_loss()
-#     labelAvgPrec_score_ = LabelAvgPrec_score()
-#
-#     calssification_report_ = calssification_report(ORG_LABELS)
-#
-#     # -------------------------------- prediction
-#     y_true = []
-#     predicted_probs = []
-#
-#     with torch.no_grad():
-#         for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
-#
-#             # unpack sample
-#             bands = data["image"]
-#             labels = data["label"]
-#
-#             # move data to gpu if model is on gpu
-#             if use_cuda:
-#                 bands = bands.to(torch.device("cuda"))
-#                 # labels = labels.to(torch.device("cuda"))
-#
-#             # forward pass
-#             logits = model(bands)
-#
-#             # convert logits to probabilies
-#             if label_type == 'multi_label':
-#                 probs = torch.sigmoid(logits).cpu().numpy()
-#             else:
-#                 sm = torch.nn.Softmax(dim=1)
-#                 probs = sm(logits).cpu().numpy()
-#
-#             labels = labels.cpu().numpy()  # keep true & pred label at same loc.
-#             predicted_probs += list(probs)
-#             y_true += list(labels)
-#
-#     predicted_probs = np.asarray(predicted_probs)
-#     # convert predicted probabilities into one/multi-hot labels
-#     if label_type == 'multi_label':
-#         y_predicted = (predicted_probs >= 0.5).astype(np.float32)
-#     else:
-#         loc = np.argmax(predicted_probs, axis=-1)
-#         y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
-#         for i in range(len(loc)):
-#             y_predicted[i, loc[i]] = 1
-#
-#     y_true = np.asarray(y_true)
-#
-#     # --------------------------- evaluation with metrics
-#     # general
-#     macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-#     macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-#     macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-#     macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
-#     hamming_loss = hamming_loss_(y_predicted, y_true)
-#     subset_acc = subset_acc_(y_predicted, y_true)
-#     macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
-#     # ranking-based
-#     one_error = one_err_(predicted_probs, y_true)
-#     coverage_error = coverage_err_(predicted_probs, y_true)
-#     rank_loss = rank_loss_(predicted_probs, y_true)
-#     labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
-#
-#     cls_report = calssification_report_(y_predicted, y_true)
-#
-#     if label_type == 'multi_label':
-#         [conf_mat, cls_acc, aa] = multi_conf_mat(y_predicted, y_true, n_classes=numCls)
-#         # the results derived from multilabel confusion matrix are not recommended to use
-#         oa = OA_multi(y_predicted, y_true)
-#         # this oa can be Jaccard index
-#
-#         info = {
-#             "macroPrec": macro_prec,
-#             "microPrec": micro_prec,
-#             "samplePrec": sample_prec,
-#             "macroRec": macro_rec,
-#             "microRec": micro_rec,
-#             "sampleRec": sample_rec,
-#             "macroF1": macro_f1,
-#             "microF1": micro_f1,
-#             "sampleF1": sample_f1,
-#             "macroF2": macro_f2,
-#             "microF2": micro_f2,
-#             "sampleF2": sample_f2,
-#             "HammingLoss": hamming_loss,
-#             "subsetAcc": subset_acc,
-#             "macroAcc": macro_acc,
-#             "microAcc": micro_acc,
-#             "sampleAcc": sample_acc,
-#             "oneError": one_error,
-#             "coverageError": coverage_error,
-#             "rankLoss": rank_loss,
-#             "labelAvgPrec": labelAvgPrec,
-#             "clsReport": cls_report,
-#             "multilabel_conf_mat": conf_mat,
-#             "class-wise Acc": cls_acc,
-#             "AverageAcc": aa,
-#             "OverallAcc": oa}
-#
-#     else:
-#         conf_mat = conf_mat_nor(y_predicted, y_true, n_classes=numCls)
-#         aa = get_AA(y_predicted, y_true, n_classes=numCls)  # average accuracy, \
-#         # zero-sample classes are not excluded
-#
-#         info = {
-#             "macroPrec": macro_prec,
-#             "microPrec": micro_prec,
-#             "samplePrec": sample_prec,
-#             "macroRec": macro_rec,
-#             "microRec": micro_rec,
-#             "sampleRec": sample_rec,
-#             "macroF1": macro_f1,
-#             "microF1": micro_f1,
-#             "sampleF1": sample_f1,
-#             "macroF2": macro_f2,
-#             "microF2": micro_f2,
-#             "sampleF2": sample_f2,
-#             "HammingLoss": hamming_loss,
-#             "subsetAcc": subset_acc,
-#             "macroAcc": macro_acc,
-#             "microAcc": micro_acc,
-#             "sampleAcc": sample_acc,
-#             "oneError": one_error,
-#             "coverageError": coverage_error,
-#             "rankLoss": rank_loss,
-#             "labelAvgPrec": labelAvgPrec,
-#             "clsReport": cls_report,
-#             "conf_mat": conf_mat,
-#             "AverageAcc": aa}
-#
-#     wandb.run.summary.update(info)
-#     print("saving metrics...")
-#     # pkl.dump(info, open("test_scores.pkl", "wb"))
-#
-#
-# def train(trainloader, model, optimizer, lossfunc, label_type, epoch, use_cuda):
-#
-#     lossTracker = MetricTracker()
-#
-#     # set model to train mode
-#     model.train()
-#
-#
-#     # main training loop
-#     for idx, data in enumerate(tqdm(trainloader, desc="training")):
-#
-#         numSample = data["image"].size(0)
-#
-#         # unpack sample
-#         bands = data["image"]
-#         if label_type == 'multi_label':
-#             labels = data["label"]
-#         else:
-#            labels = (torch.max(data["label"], 1)[1]).type(torch.long)
-#
-#         # move data to gpu if model is on gpu
-#         if use_cuda:
-#             bands = bands.to(torch.device("cuda"))
-#             labels = labels.to(torch.device("cuda"))
-#
-#         # reset gradients
-#         optimizer.zero_grad()
-#
-#         # forward pass
-#         logits = model(bands)
-#         loss = lossfunc(logits, labels)
-#
-#         # backward pass
-#         loss.backward()
-#         optimizer.step()
-#
-#         #
-#         lossTracker.update(loss.item(), numSample)
-#
-#     # train_writer.add_scalar("loss", lossTracker.avg, epoch)
-#     wandb.log({'loss': lossTracker.avg, 'epoch': epoch})
-#
-#     print('Train loss: {:.6f}'.format(lossTracker.avg))
-#
-#
-# def val(valloader, model, optimizer, label_type, epoch, use_cuda):
-#
-#     prec_score_ = Precision_score()
-#     recal_score_ = Recall_score()
-#     f1_score_ = F1_score()
-#     f2_score_ = F2_score()
-#     hamming_loss_ = Hamming_loss()
-#     subset_acc_ = Subset_accuracy()
-#     acc_score_ = Accuracy_score()
-#     one_err_ = One_error()
-#     coverage_err_ = Coverage_error()
-#     rank_loss_ = Ranking_loss()
-#     labelAvgPrec_score_ = LabelAvgPrec_score()
-#
-#     # set model to evaluation mode
-#     model.eval()
-#
-#     # main validation loop
-#     y_true = []
-#     predicted_probs = []
-#
-#     with torch.no_grad():
-#         for batch_idx, data in enumerate(tqdm(valloader, desc="validation")):
-#
-#             # unpack sample
-#             bands = data["image"]
-#             labels = data["label"]
-#
-#             # move data to gpu if model is on gpu
-#             if use_cuda:
-#                 bands = bands.to(torch.device("cuda"))
-#                 #labels = labels.to(torch.device("cuda"))
-#
-#             # forward pass
-#             logits = model(bands)
-#
-#             # convert logits to probabilies
-#             if label_type == 'multi_label':
-#                 probs = torch.sigmoid(logits).cpu().numpy()
-#             else:
-#                 sm = torch.nn.Softmax(dim=1)
-#                 probs = sm(logits).cpu().numpy()
-#
-#             labels = labels.cpu().numpy() # keep true & pred label at same loc.
-#             predicted_probs += list(probs)
-#             y_true += list(labels)
-#
-#
-#     predicted_probs = np.asarray(predicted_probs)
-#     # convert predicted probabilities into one/multi-hot labels
-#     if label_type == 'multi_label':
-#         y_predicted = (predicted_probs >= 0.5).astype(np.float32)
-#     else:
-#         loc = np.argmax(predicted_probs, axis=-1)
-#         y_predicted = np.zeros_like(predicted_probs).astype(np.float32)
-#         for i in range(len(loc)):
-#             y_predicted[i,loc[i]] = 1
-#
-#     y_true = np.asarray(y_true)
-#
-#
-#     macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-#     macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-#     macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-#     macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
-#     hamming_loss = hamming_loss_(y_predicted, y_true)
-#     subset_acc = subset_acc_(y_predicted, y_true)
-#     macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
-#
-#     # Note that below 4 ranking-based metrics are not applicable to single-label
-#     # (multi-class) classification, but they will still show the scores during
-#     # validation on tensorboard
-#     one_error = one_err_(predicted_probs, y_true)
-#     coverage_error = coverage_err_(predicted_probs, y_true)
-#     rank_loss = rank_loss_(predicted_probs, y_true)
-#     labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
-#
-#     info = {
-#             "macroPrec" : macro_prec,
-#             "microPrec" : micro_prec,
-#             "samplePrec" : sample_prec,
-#             "macroRec" : macro_rec,
-#             "microRec" : micro_rec,
-#             "sampleRec" : sample_rec,
-#             "macroF1" : macro_f1,
-#             "microF1" : micro_f1,
-#             "sampleF1" : sample_f1,
-#             "macroF2" : macro_f2,
-#             "microF2" : micro_f2,
-#             "sampleF2" : sample_f2,
-#             "HammingLoss" : hamming_loss,
-#             "subsetAcc" : subset_acc,
-#             "macroAcc" : macro_acc,
-#             "microAcc" : micro_acc,
-#             "sampleAcc" : sample_acc,
-#             "oneError" : one_error,
-#             "coverageError" : coverage_error,
-#             "rankLoss" : rank_loss,
-#             "labelAvgPrec" : labelAvgPrec
-#             }
-#
-#     wandb.run.summary.update(info)
-#     for tag, value in info.items():
-#         wandb.log({tag: value, 'epoch': epoch})
-#         # val_writer.add_scalar(tag, value, epoch)
-#
-#     print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
-#             micro_prec,
-#             micro_f1,
-#             sample_f1,
-#             micro_f2,
-#             sample_f2
-#             ))
-#     return micro_f1
-
-
-if __name__ == "__main__":
-    main()
-    
-    
\ No newline at end of file

From ff89f2ebd4c15a3d44a84713b079a58239d5a38d Mon Sep 17 00:00:00 2001
From: eoropeza <ernesto.oropeza@ischool.berkeley.edu>
Date: Fri, 9 Apr 2021 07:12:49 -0500
Subject: [PATCH 9/9] Binary using Multilabel Framework

---
 classification/dataset.py    |  12 ++-
 classification/main_train.py | 154 ++++++++++++++++++++---------------
 classification/metrics.py    |  22 ++---
 3 files changed, 106 insertions(+), 82 deletions(-)

diff --git a/classification/dataset.py b/classification/dataset.py
index 4c12526..e299c33 100644
--- a/classification/dataset.py
+++ b/classification/dataset.py
@@ -90,7 +90,7 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
     else:
         loc = np.argmax(lc, axis=-1)
         lc_hot = np.zeros_like(lc).astype(np.float32)
-        lc_hot[loc] = 1
+        lc_hot[loc] = lc[0]
              
     rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]}
     
@@ -353,7 +353,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
         if CLC_s == True:
             self.n_classes = 19
         else:
-            self.n_classes = 43
+            self.n_classes = 1
+            # self.n_classes = 43
 
             # make sure parent dir exists
         assert os.path.exists(path)
@@ -365,7 +366,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
             sample_list = None
             total_sample_size = 0
             if subset == "train" or subset == "val":
-                file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
+                # file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
+                file = os.path.join(ls_dir, f'BigEarthNet_train_balanced_Permanently_irrigated_land_{data_size}.pkl')
                 print("BigEarthNet: Loading file ",file)
                 sample_list = pkl.load(open(file, "rb"))
                 total_sample_size = len(sample_list)
@@ -385,6 +387,7 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
             else:
                 pbar = tqdm(total=125866)  # 125866 samples in test set
                 file = os.path.join(ls_dir, 'bigearthnet_test.pkl')
+
                 sample_list = pkl.load(open(file, "rb"))
                 print("bigearthnet_test should be 125866:", len(sample_list))
 
@@ -408,7 +411,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
               "samples from the bigearthnet subset", subset)
 
         # import lables as a dictionary
-        label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
+        # label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
+        label_file = os.path.join(ls_dir, 'BigEarthNet_binary_labels_Permanently_irrigated_land.pkl')
 
         a_file = open(label_file, "rb")
         self.labels = pkl.load(a_file)
diff --git a/classification/main_train.py b/classification/main_train.py
index 35c696e..9e00c5b 100644
--- a/classification/main_train.py
+++ b/classification/main_train.py
@@ -66,7 +66,7 @@
 #                     help='use sentinel-1 data')
 parser.add_argument('--use_RGB', action='store_true', default=False,
                     help='use sentinel-2 RGB bands')
-parser.add_argument('--simple_scheme', action='store_true', default=True,
+parser.add_argument('--simple_scheme', action='store_true', default=False,
                     help='use IGBP simplified scheme; otherwise: IGBP original scheme')
 parser.add_argument('--label_type', type=str, choices = label_choices,
                     default='multi_label',
@@ -281,12 +281,14 @@ def main():
             ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                           '11', '12', '13', '14', '15', '16', '17', '18', '19']
         else:
-            numCls = 43
-            ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
-                          '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
-                          '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
-                          '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
-                          '41', '42', '43']
+            numCls = 1
+            ORG_LABELS = ['0','1']
+            # numCls = 43
+            # ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+            #               '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
+            #               '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
+            #               '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
+            #               '41', '42', '43']
     
     print('num_class: ', numCls)
 
@@ -426,11 +428,11 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
     f2_score_ = F2_score()
     hamming_loss_ = Hamming_loss()
     subset_acc_ = Subset_accuracy()
-    acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
+    # acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
     one_err_ = One_error()
-    coverage_err_ = Coverage_error()
-    rank_loss_ = Ranking_loss()
-    labelAvgPrec_score_ = LabelAvgPrec_score()
+    # coverage_err_ = Coverage_error()
+    # rank_loss_ = Ranking_loss()
+    # labelAvgPrec_score_ = LabelAvgPrec_score()
 
     calssification_report_ = calssification_report(ORG_LABELS)
 
@@ -489,18 +491,27 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
 
     # --------------------------- evaluation with metrics
     # general
-    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+    # macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+    # macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+    # macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # hamming_loss = hamming_loss_(y_predicted, y_true)
+    # subset_acc = subset_acc_(y_predicted, y_true)
+    # macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+
+    macro_f1, micro_f1 = f1_score_(y_predicted, y_true)
+    macro_f2, micro_f2 = f2_score_(y_predicted, y_true)
+    macro_prec, micro_prec = prec_score_(y_predicted, y_true)
+    macro_rec, micro_rec = recal_score_(y_predicted, y_true)
     hamming_loss = hamming_loss_(y_predicted, y_true)
     subset_acc = subset_acc_(y_predicted, y_true)
-    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+    # macro_acc, micro_acc = acc_score_(y_predicted, y_true)
+
     # ranking-based
     one_error = one_err_(predicted_probs, y_true)
-    coverage_error = coverage_err_(predicted_probs, y_true)
-    rank_loss = rank_loss_(predicted_probs, y_true)
-    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+    # coverage_error = coverage_err_(predicted_probs, y_true)
+    # rank_loss = rank_loss_(predicted_probs, y_true)
+    # labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
 
     cls_report = calssification_report_(y_predicted, y_true)
 
@@ -513,25 +524,25 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
         info = {
             "macroPrec": macro_prec,
             "microPrec": micro_prec,
-            "samplePrec": sample_prec,
+            # "samplePrec": sample_prec,
             "macroRec": macro_rec,
             "microRec": micro_rec,
-            "sampleRec": sample_rec,
+            # "sampleRec": sample_rec,
             "macroF1": macro_f1,
             "microF1": micro_f1,
-            "sampleF1": sample_f1,
+            # "sampleF1": sample_f1,
             "macroF2": macro_f2,
             "microF2": micro_f2,
-            "sampleF2": sample_f2,
+            # "sampleF2": sample_f2,
             "HammingLoss": hamming_loss,
-            "subsetAcc": subset_acc,
-            "macroAcc": macro_acc,
-            "microAcc": micro_acc,
-            "sampleAcc": sample_acc,
+            # "subsetAcc": subset_acc,
+            # "macroAcc": macro_acc,
+            # "microAcc": micro_acc,
+            # "sampleAcc": sample_acc,
             "oneError": one_error,
-            "coverageError": coverage_error,
-            "rankLoss": rank_loss,
-            "labelAvgPrec": labelAvgPrec,
+            # "coverageError": coverage_error,
+            # "rankLoss": rank_loss,
+            # "labelAvgPrec": labelAvgPrec,
             "clsReport": cls_report,
             "multilabel_conf_mat": conf_mat,
             "class-wise Acc": cls_acc,
@@ -546,25 +557,25 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
         info = {
             "macroPrec": macro_prec,
             "microPrec": micro_prec,
-            "samplePrec": sample_prec,
+            # "samplePrec": sample_prec,
             "macroRec": macro_rec,
             "microRec": micro_rec,
-            "sampleRec": sample_rec,
+            # "sampleRec": sample_rec,
             "macroF1": macro_f1,
             "microF1": micro_f1,
-            "sampleF1": sample_f1,
+            # "sampleF1": sample_f1,
             "macroF2": macro_f2,
             "microF2": micro_f2,
-            "sampleF2": sample_f2,
+            # "sampleF2": sample_f2,
             "HammingLoss": hamming_loss,
             "subsetAcc": subset_acc,
             "macroAcc": macro_acc,
             "microAcc": micro_acc,
-            "sampleAcc": sample_acc,
+            # "sampleAcc": sample_acc,
             "oneError": one_error,
-            "coverageError": coverage_error,
-            "rankLoss": rank_loss,
-            "labelAvgPrec": labelAvgPrec,
+            # "coverageError": coverage_error,
+            # "rankLoss": rank_loss,
+            # "labelAvgPrec": labelAvgPrec,
             "clsReport": cls_report,
             "conf_mat": conf_mat,
             "AverageAcc": aa}
@@ -593,9 +604,10 @@ def train(trainloader, model, optimizer, lossfunc, label_type, epoch, use_cuda):
         if label_type == 'multi_label':
             labels = data["label"]
         else:
-           labels = (torch.max(data["label"], 1)[1]).type(torch.long) 
-               
-        # move data to gpu if model is on gpu
+           labels = (torch.max(data["label"], 1)[1]).type(torch.long)
+           # labels = data["label"]
+
+           # move data to gpu if model is on gpu
         if use_cuda:
             bands = bands.to(torch.device("cuda"))
             labels = labels.to(torch.device("cuda"))
@@ -628,11 +640,11 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
     f2_score_ = F2_score()
     hamming_loss_ = Hamming_loss()
     subset_acc_ = Subset_accuracy()
-    acc_score_ = Accuracy_score()
+    # acc_score_ = Accuracy_score()
     one_err_ = One_error()
-    coverage_err_ = Coverage_error()
-    rank_loss_ = Ranking_loss()
-    labelAvgPrec_score_ = LabelAvgPrec_score()
+    # coverage_err_ = Coverage_error()
+    # rank_loss_ = Ranking_loss()
+    # labelAvgPrec_score_ = LabelAvgPrec_score()
 
     # set model to evaluation mode
     model.eval()
@@ -680,44 +692,51 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
     y_true = np.asarray(y_true)
     
 
-    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+    # macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+    # macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+    # macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # hamming_loss = hamming_loss_(y_predicted, y_true)
+    # subset_acc = subset_acc_(y_predicted, y_true)
+    # macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+
+    macro_f1, micro_f1 = f1_score_(y_predicted, y_true)
+    macro_f2, micro_f2 = f2_score_(y_predicted, y_true)
+    macro_prec, micro_prec = prec_score_(y_predicted, y_true)
+    macro_rec, micro_rec = recal_score_(y_predicted, y_true)
     hamming_loss = hamming_loss_(y_predicted, y_true)
     subset_acc = subset_acc_(y_predicted, y_true)
-    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
-
+    # macro_acc, micro_acc = acc_score_(y_predicted, y_true)
     # Note that below 4 ranking-based metrics are not applicable to single-label
     # (multi-class) classification, but they will still show the scores during 
     # validation on tensorboard
     one_error = one_err_(predicted_probs, y_true)
-    coverage_error = coverage_err_(predicted_probs, y_true)
-    rank_loss = rank_loss_(predicted_probs, y_true)
-    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+    # coverage_error = coverage_err_(predicted_probs, y_true)
+    # rank_loss = rank_loss_(predicted_probs, y_true)
+    # labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
 
     info = {
             "macroPrec" : macro_prec,
             "microPrec" : micro_prec,
-            "samplePrec" : sample_prec,
+            # "samplePrec" : sample_prec,
             "macroRec" : macro_rec,
             "microRec" : micro_rec,
-            "sampleRec" : sample_rec,
+            # "sampleRec" : sample_rec,
             "macroF1" : macro_f1,
             "microF1" : micro_f1,
-            "sampleF1" : sample_f1,
+            # "sampleF1" : sample_f1,
             "macroF2" : macro_f2,
             "microF2" : micro_f2,
-            "sampleF2" : sample_f2,
+            # "sampleF2" : sample_f2,
             "HammingLoss" : hamming_loss,
             "subsetAcc" : subset_acc,
-            "macroAcc" : macro_acc,
-            "microAcc" : micro_acc,
-            "sampleAcc" : sample_acc,
+            # "macroAcc" : macro_acc,
+            # "microAcc" : micro_acc,
+            # "sampleAcc" : sample_acc,
             "oneError" : one_error,
-            "coverageError" : coverage_error,
-            "rankLoss" : rank_loss,
-            "labelAvgPrec" : labelAvgPrec
+            # "coverageError" : coverage_error,
+            # "rankLoss" : rank_loss,
+            # "labelAvgPrec" : labelAvgPrec
             }
 
     wandb.run.summary.update(info)
@@ -725,12 +744,13 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
         wandb.log({tag: value, 'epoch': epoch})
         # val_writer.add_scalar(tag, value, epoch)
 
-    print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
+    # print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
+    print('Validation microPrec: {:.6f} microF1: {:.6f} microF2: {:.6f}'.format(
             micro_prec,
             micro_f1,
-            sample_f1,
+            # sample_f1,
             micro_f2,
-            sample_f2
+            # sample_f2
             ))
     return micro_f1
 
diff --git a/classification/metrics.py b/classification/metrics.py
index 3db79fa..7adfdad 100644
--- a/classification/metrics.py
+++ b/classification/metrics.py
@@ -103,11 +103,11 @@ def __init__(self):
 
     def forward(self, predict_labels, true_labels):
 
-        sample_prec = precision_score(true_labels, predict_labels, average='samples')
+        # sample_prec = precision_score(true_labels, predict_labels, average='samples')
         micro_prec = precision_score(true_labels, predict_labels, average='micro')
         macro_prec = precision_score(true_labels, predict_labels, average='macro')
 
-        return macro_prec, micro_prec, sample_prec    
+        return macro_prec, micro_prec#, sample_prec
 
 
 class Recall_score(nn.Module):
@@ -117,11 +117,11 @@ def __init__(self):
 
     def forward(self, predict_labels, true_labels):
 
-        sample_rec = recall_score(true_labels, predict_labels, average='samples')
+        # sample_rec = recall_score(true_labels, predict_labels, average='samples')
         micro_rec = recall_score(true_labels, predict_labels, average='micro')
         macro_rec = recall_score(true_labels, predict_labels, average='macro')
 
-        return macro_rec, micro_rec, sample_rec
+        return macro_rec, micro_rec#, sample_rec
 
 
 class F1_score(nn.Module):
@@ -133,9 +133,9 @@ def forward(self, predict_labels, true_labels):
 
         macro_f1 = f1_score(true_labels, predict_labels, average="macro")
         micro_f1 = f1_score(true_labels, predict_labels, average="micro")
-        sample_f1 = f1_score(true_labels, predict_labels, average="samples")
+        # sample_f1 = f1_score(true_labels, predict_labels, average="samples")
 
-        return macro_f1, micro_f1, sample_f1
+        return macro_f1, micro_f1#, sample_f1
 
 
 class F2_score(nn.Module):
@@ -147,9 +147,9 @@ def forward(self, predict_labels, true_labels):
 
         macro_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="macro")
         micro_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="micro")
-        sample_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="samples")
+        # sample_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="samples")
 
-        return macro_f2, micro_f2, sample_f2
+        return macro_f2, micro_f2#, sample_f2
 
 class Hamming_loss(nn.Module):
 
@@ -188,9 +188,9 @@ def forward(self, predict_labels, true_labels):
         TP_sample = TP.sum(axis=1)
         union_sample = union.sum(axis=1)
 
-        sample_Acc = TP_sample/union_sample
+        # sample_Acc = TP_sample/union_sample
 
-        assert np.isfinite(sample_Acc).all(), 'Nan found in sample accuracy'
+        # assert np.isfinite(sample_Acc).all(), 'Nan found in sample accuracy'
 
         FP = (np.logical_and((predict_labels == 1), (true_labels == 0))).astype(int)
         TN = (np.logical_and((predict_labels == 0), (true_labels == 0))).astype(int)
@@ -207,7 +207,7 @@ def forward(self, predict_labels, true_labels):
 
         micro_Acc = (TP_cls.mean() + TN_cls.mean()) / (TP_cls.mean() + FP_cls.mean() + TN_cls.mean() + FN_cls.mean())
 
-        return macro_Acc, micro_Acc, sample_Acc.mean()
+        return macro_Acc, micro_Acc#, sample_Acc.mean()
 
 
 class One_error(nn.Module):