Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions a.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "not enough values to unpack (expected 3, got 1)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-12-d368886d3a52>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mimage\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;31m# 이미지의 각 채널을 분리합니다.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mblue_channel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgreen_channel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mred_channel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;31m# 각 채널의 값을 확인합니다.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: not enough values to unpack (expected 3, got 1)"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import os\n",
"import cv2\n",
"import numpy as np\n",
"\n",
"path = './train/self_supervised/'\n",
"file_path = os.listdir('./train/self_supervised/')\n",
"\n",
"image = cv2.imread(path+file_path[0], 0)\n",
"if image is not None:\n",
" # 이미지의 각 채널을 분리합니다.\n",
" blue_channel, green_channel, red_channel = cv2.split(image)\n",
"\n",
" # 각 채널의 값을 확인합니다.\n",
" print(\"Blue Channel:\")\n",
" print(blue_channel)\n",
"\n",
" print(\"\\nGreen Channel:\")\n",
" print(green_channel)\n",
"\n",
" print(\"\\nRed Channel:\")\n",
" print(red_channel)\n",
"\n",
" # 예를 들어, 각 채널의 평균 값을 출력할 수도 있습니다.\n",
" print(\"\\nMean Value - Blue Channel:\", np.mean(blue_channel))\n",
" print(\"Mean Value - Green Channel:\", np.mean(green_channel))\n",
" print(\"Mean Value - Red Channel:\", np.mean(red_channel))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(512, 512)\n"
]
}
],
"source": [
"print(image.shape)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n"
]
}
],
"source": [
"print(np.min(image))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import torchvision.transforms as transforms\n",
"from torchvision import datasets\n",
"import os\n",
"data_path = './train/'\n",
"transform_train = transforms.Compose([\n",
" transforms.RandomHorizontalFlip(),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])\n",
"dataset_train = datasets.ImageFolder(os.path.join(data_path), transform=transform_train)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[[-0.8678, -0.8849, -0.9020, ..., 1.0502, 0.8961, 0.7419],\n",
" [-0.8678, -0.8849, -0.9020, ..., 1.0502, 0.8961, 0.7419],\n",
" [-0.8507, -0.8507, -0.8678, ..., 1.1015, 0.9817, 0.8276],\n",
" ...,\n",
" [-1.2617, -1.2445, -1.2445, ..., -0.9705, -1.0733, -1.1418],\n",
" [-1.2959, -1.2788, -1.2274, ..., -0.9705, -1.0904, -1.1932],\n",
" [-1.3644, -1.3302, -1.2959, ..., -0.9363, -1.0219, -1.1760]],\n",
"\n",
" [[-0.7577, -0.7752, -0.7927, ..., 1.2031, 1.0455, 0.8880],\n",
" [-0.7577, -0.7752, -0.7927, ..., 1.2031, 1.0455, 0.8880],\n",
" [-0.7402, -0.7402, -0.7577, ..., 1.2556, 1.1331, 0.9755],\n",
" ...,\n",
" [-1.1604, -1.1429, -1.1429, ..., -0.8627, -0.9678, -1.0378],\n",
" [-1.1954, -1.1779, -1.1253, ..., -0.8627, -0.9853, -1.0903],\n",
" [-1.2654, -1.2304, -1.1954, ..., -0.8277, -0.9153, -1.0728]],\n",
"\n",
" [[-0.5321, -0.5495, -0.5670, ..., 1.4200, 1.2631, 1.1062],\n",
" [-0.5321, -0.5495, -0.5670, ..., 1.4200, 1.2631, 1.1062],\n",
" [-0.5147, -0.5147, -0.5321, ..., 1.4722, 1.3502, 1.1934],\n",
" ...,\n",
" [-0.9330, -0.9156, -0.9156, ..., -0.6367, -0.7413, -0.8110],\n",
" [-0.9678, -0.9504, -0.8981, ..., -0.6367, -0.7587, -0.8633],\n",
" [-1.0376, -1.0027, -0.9678, ..., -0.6018, -0.6890, -0.8458]]])\n",
"tensor(-1.3644)\n"
]
}
],
"source": [
"import numpy as np\n",
"import torch\n",
"print(dataset_train[0][0])\n",
"print(torch.min(dataset_train[0][0]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
104 changes: 75 additions & 29 deletions mae_visualize copy.ipynb

Large diffs are not rendered by default.

36 changes: 30 additions & 6 deletions main_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
import wandb
import argparse
import timm
from PIL import Image

from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

assert timm.__version__ == "0.4.5" # version check
import timm.optim.optim_factory as optim_factory
Expand Down Expand Up @@ -115,6 +120,25 @@ def get_args_parser():

return parser

# class CustomDataset(Dataset):
# def __init__(self, folder_path, transform=None):
# self.folder_path = folder_path
# self.image_paths = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg', '.gif'))]
# self.transform = transform
# def __len__(self):
# return len(self.image_paths)

# def __getitem__(self, idx):
# image_path = self.image_paths[idx]
# image = Image.open(image_path)
# image = np.stack([image] * 3, axis=-1)
# # 이미지 변환
# if self.transform:
# # numpy.ndarray를 PIL Image로 변환
# image = transforms.ToPILImage()(image)
# image = self.transform(image)

# return image, 0

def main(args):
misc.init_distributed_mode(args)
Expand All @@ -132,15 +156,15 @@ def main(args):

cudnn.benchmark = True


# simple augmentation
transform_train = transforms.Compose([
transforms.Grayscale(num_output_channels=3),
transforms.RandomResizedCrop(args.input_size, scale=(0.2, 1.0), interpolation=3), # 3 is bicubic
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
transforms.RandomVerticalFlip(),
transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),
transforms.ToTensor()])

dataset_train = datasets.ImageFolder(os.path.join(args.data_path), transform=transform_train)
print(dataset_train)

if True: # args.distributed:
num_tasks = misc.get_world_size()
Expand Down Expand Up @@ -208,7 +232,7 @@ def main(args):
log_writer=log_writer,
args=args
)
if args.output_dir and (epoch % 20 == 0 or epoch + 1 == args.epochs):
if args.output_dir and (epoch % 1 == 0 or epoch + 1 == args.epochs):
misc.save_model(
args=args, model=model, model_without_ddp=model_without_ddp, optimizer=optimizer,
loss_scaler=loss_scaler, epoch=epoch)
Expand Down
6 changes: 3 additions & 3 deletions util/pos_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
return:
pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
"""
grid_h = np.arange(grid_size, dtype=np.float32)
grid_w = np.arange(grid_size, dtype=np.float32)
grid_h = np.arange(grid_size, dtype=np.float64)
grid_w = np.arange(grid_size, dtype=np.float64)
grid = np.meshgrid(grid_w, grid_h) # here w goes first
grid = np.stack(grid, axis=0)

Expand Down Expand Up @@ -53,7 +53,7 @@ def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
out: (M, D)
"""
assert embed_dim % 2 == 0
omega = np.arange(embed_dim // 2, dtype=np.float)
omega = np.arange(embed_dim // 2, dtype=np.float64)
omega /= embed_dim / 2.
omega = 1. / 10000**omega # (D/2,)

Expand Down