Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ Moreover, this paper relies heavily on previous work from the Lab, notably [Lear
**Evaluate your own model (pytorch and tensorflow)**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Mp0vxUcIsX1QY-_Byo1LU2IRVcqu7gUl)

[![Psychophysics benchmark](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Ec6WEtDP2BOueEBmlHAkHjjZgIvm0RN_)


<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/2/2d/Tensorflow_logo.svg/230px-Tensorflow_logo.svg.png" width=35>
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1bttp-hVnV_agJGhwdRRW6yUBbf-eImRN)
<img src="https://pytorch.org/assets/images/pytorch-logo.png" width=35>
Expand Down
128 changes: 128 additions & 0 deletions harmonization/common/clickme_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@
"""

import tensorflow as tf
import numpy as np
import pandas as pd
import os
import glob
from .utils import get_synset

from .blur import gaussian_kernel, gaussian_blur

CLICKME_BASE_URL = 'https://storage.googleapis.com/serrelab/prj_harmonization/dataset/click-me'
PSYCH_BASE_URL = 'https://storage.googleapis.com/serrelab/prj_harmonization/dataset/psychophysics/clicktionary'

NB_VAL_SHARDS = 17
NB_TRAIN_SHARDS = 318

Expand Down Expand Up @@ -139,3 +146,124 @@ def load_clickme_val(batch_size = 64):
]

return load_clickme(shards_paths, batch_size)

def get_human_data(stimuli_folder):
"""
Loads the human data from the click-me dataset.

Parameters
----------
stimuli_folder : str
Path to the stimuli folder.


Returns
-------
ims : list
List of images.
human_data : list
List of human data.
column_names : list
List of column names.


"""
data = np.load(os.path.join(stimuli_folder,"data_for_zahra.npz"), allow_pickle=True, encoding="latin1")

ims = data["ims"]
human_data = data["data_human"]
column_names = data["columns"]

im_cats = [x.split(os.path.sep)[1][:-1] for x in np.concatenate(ims)]
unique_categories = np.unique(im_cats)

df = pd.DataFrame(human_data, columns=column_names.astype(str))
mean_perfs = df.groupby("Revelation").mean().reset_index()
std_perfs = df.groupby("Revelation").std().reset_index()
exp_perfs = df[df.Revelation < 200.]
exp_perf_means = mean_perfs[:-1]
full_perf = df[df.Revelation == 200.]
exp_perfs['correct'] = exp_perfs['correct']/exp_perfs['correct'].max()

mpx = mean_perfs.iloc[:-1]["Revelation"]
mpy = mean_perfs.iloc[:-1]["correct"]
mpz = std_perfs.iloc[:-1]["correct"]

mpy =(mpy -np.min(mpy))/(mpy.max()-np.min(mpy))

mpx = mpx.tolist()
mpy= mpy.tolist()
mpx = mpx[:-1]+mpx[-1:]
mpy = mpy[:-1]+mpy[-1:]
return mpx, mpy , mpz

def get_stimuli_paths(stimuli_folder = None):
"""
Returns the paths to the stimuli.

Parameters
----------
stimuli_folder : str, optional
Path to the stimuli folder, by default None

Returns
-------
list
List of paths to the stimuli.
"""
p1 = os.path.join(stimuli_folder,'exp_1_clicktionary_probabilistic_region_growth_centered')
p2 = os.path.join(stimuli_folder,'exp_2_clicktionary_probabilistic_region_growth_centered')
images1 = glob.glob(os.path.join(p1,'*png'))
images2 = glob.glob(os.path.join(p2,'*png'))
return images1+images2



def load_psychophysics():
"""
Loads the psychophysics dataset.


Parameters
----------
batch_size : int, optional
Batch size, by default 64

Returns
-------
dataset
A `tf.dataset` of the psychophysics dataset.
Each element contains a batch of (images, heatmaps, labels).
"""
_,_,revmap = get_synset()
dataset=[]
stimuli = get_stimuli_paths()
for im in stimuli:
file = im.split('/')[-1]
label = ''.join(file[:-5].split('_')[1:])
indx_label = revmap[label]['index']
if indx_label <398:
task_label = 1
else:
task_label = 0
diff = file.split('_')[0]
sample = file[-5]
dataset.append([im,file,label,diff,sample,indx_label,task_label])
exp_df = pd.DataFrame(dataset,columns=['path','name','label','difficulty','sample number','imagenet_index_label','task_label'])
return exp_df

def get_psychophysics():
"""
Loads the psychophysics dataset.

Returns
-------
dataset
"""

folder = tf.keras.utils.get_file("psychophysics",PSYCH_BASE_URL,cache_subdir="datasets/psychophysics")
mpx, mpy , mpz = get_human_data(folder)
exp_df = load_psychophysics()
stimuli_paths = get_stimuli_paths(folder)
return mpx, mpy , mpz, stimuli_paths,exp_df

Loading