GLRec2021
Train
Config
from google.colab import drive
drive.mount('/content/drive')
import os
import re
import sys
import math
import random
import logging
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow_probability as tfp
from tqdm.notebook import *
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import *
from tensorflow.keras import mixed_precision
try:
    import albumentations as A
    import tensorflow_addons as tfa
    from vit_keras import vit, layers
    import efficientnet.tfkeras as efn
    from classification_models.tfkeras import Classifiers
except:
    !pip install -qq gcsfs
    !pip install -qq vit-keras
    !pip install -qq efficientnet
    !pip install -qq albumentations
    !pip install -qq image-classifiers
    !pip install -qq tensorflow-addons
    !gdown --id 16MJuZ3wovKcc1B7V6eaUZKwkH1Dipykg
  Â
    import albumentations as A
    import tensorflow_addons as tfa
    from vit_keras import vit, layers
    import efficientnet.tfkeras as efn
    from classification_models.tfkeras import Classifiers
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
print('Running on TPU ', tpu.master())
#tf.config.set_soft_device_placement(True)
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)
print("REPLICAS: ", strategy.num_replicas_in_sync)
AUTO = tf.data.experimental.AUTOTUNE
config = {
    "seed": 1312,
  Â
    "lr": 1e-4,
    "valid_size": 0.05,
    "epochs": 50,
    "batch_size": 8 * strategy.num_replicas_in_sync,
    "image_size": [800, 800, 3],
    "embedding_dimensions": 1024,
    "n_classes": 81313,
    "image_paths": [
                    'gs://kds-02a5b8675d55c9a79251760390f626ffd3a0807438e67d2c7edea3cb',
                    'gs://kds-7aa79b5bc6f9af00ef7fd0c00f645a0abe32ebc8426ce4dd299077e6',
                    'gs://kds-971e58e5965ae894e73daa42bc53c93538d7afd16f8cc31a7e0ec68b',
                    'gs://kds-9c55456bf87ba673337d07f52d56d36e16e1ead2da1bb15e16610dd7',
                    'gs://kds-5949695dad43c3d30cd209773b1365e354a3613d9b63a25051eb4305',
                    'gs://kds-c4a1c215158b3e3002adac53b7de364a742bae7c6557212557d0378e',
                    'gs://kds-969690135ac88129bd11436ab06669e2aa1a23d66083087e9a692255',
                    'gs://kds-1793eae3b59c9d40461a1b04d82452040a42b8b0f063554dcd024ccf',
                    'gs://kds-73589684dcf1e8ebd2d37605cdb19ebfa650cccb17f36221e0ee48ac',
                    'gs://kds-d0e89541a75c0bf3642ef1f91a8f5ed5ff25630b417d504fba0d77ab',
                    'gs://kds-ebc1e3faa8dbe5cc846799207a330dd245c63befa4c93241ce526d1a',
                    'gs://kds-3b830115dd341cae8ac39c8d611fe4bc6fcad798e1e84ec336de6b36',
                    'gs://kds-8cb07756a69b20d2e4f294f26a928e7d285cf6d6db6640d9873c7ee1',
                    'gs://kds-e9f50c957467dcf68c27f8b865d87720e2cd4f1ff058733e474920d0',
                    'gs://kds-20d0c45f1756ef8664edcf2aa2db83ca4a0efbfb8ea533cd0bf85633',
                    'gs://kds-6ea4f0da52f0c996a7fbe5835459d1301a824236e793dadea8776b7d',
                    'gs://kds-05fdfc56d42ee43c29806c0dc9a06edb5d6f9a1cd82871b67538986a',
                    'gs://kds-6fb2e328eab4580255aa16b7f9bc7074babb1f68488144750b2e1c9f',
                    'gs://kds-02f60c478a0a861b6b80dba0ebb07e4d8547ad036ea01065a560d520',
                    'gs://kds-75583a573fde7550697fd8f591a2db35399acd2e772ca31eaecce602'
    ],
    "encoded_csv_path": 'train_encoded.csv',
    "save_path": "/content/drive/My Drive/2021_GLR/",
    "margin": "AdaCos",
    "backbone": "efficientnetv2-b3",
    "last_epoch": 34,
    "total_save": 5
}
config["save_path"] += config["backbone"]
if (config["last_epoch"] != 0):
    config["weight_path"] = os.path.join(config["save_path"], "model_{}_{}_{}.h5".format(config["backbone"], config["margin"], (config["last_epoch"])%config["total_save"]))
else:
    config["weight_path"] = None
augmentation = A.Compose([
                          A.HorizontalFlip(p = 0.5),
                          A.RandomBrightnessContrast(brightness_limit=0.1, p=0.2),
                          A.JpegCompression(quality_lower=95, quality_upper=100, p=0.25),
                          A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.25),
                          A.Cutout(num_holes=2, max_h_size=4, max_w_size=4, p=0.1),
])
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
seed_everything(config["seed"])
Data
def transform(image, label):
    image = augmentation(image=image)["image"]
    return image, label
def decode_image(image_data, image_size = config['image_size']):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    image = tf.cast(image, tf.float32) / 255.0
    #image = tf.image.resize(image, (config["image_size"][0], config["image_size"][1]))
    image = tf.image.resize_with_pad(image, target_height = config["image_size"][0], target_width = config["image_size"][1])
    image = tf.reshape(image, image_size)
    return image
def count_data_items(filenames):
    records = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    df = pd.read_csv(config["encoded_csv_path"])
    n = df[df['group'].isin(records)].shape[0]
    return n
def read_tfrecord(example):
    TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(example, TFREC_FORMAT)
    image = decode_image(example['image'], config['image_size'])
    target = tf.cast(example["target"], tf.int32)
    if (config['margin'] != "ArcMargin"):
        target = tf.one_hot(target, tf.constant(config["n_classes"], name = "C"), on_value = 1.0, off_value = 0.0, axis =-1)
    return image, target
def load_dataset(filenames, ordered = False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
  Â
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_tfrecord, num_parallel_calls = AUTO)
    return dataset
def arcface_format(image, target):
    return {'input': image, 'label': target}, target
def get_training_dataset(filenames, ordered = False, do_aug = False):
    dataset = load_dataset(filenames, ordered = ordered)
    if (do_aug):
        dataset = dataset.map(transform, num_parallel_calls = AUTO)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    dataset = dataset.repeat()
    dataset = dataset.shuffle(config["seed"])
    dataset = dataset.batch(config["batch_size"])
    dataset = dataset.prefetch(AUTO)
    return dataset
def get_validation_dataset(filenames, ordered = True, prediction = False):
    dataset = load_dataset(filenames, ordered = ordered)
    dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
    if prediction:
        dataset = dataset.batch(config["batch_size"])
    else:
        dataset = dataset.batch(config["batch_size"])
    dataset = dataset.prefetch(AUTO)
    return dataset
Model
class CosineSimilarity(tf.keras.layers.Layer):
    def __init__(self, num_classes, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.W = self.add_weight(shape=(input_dim, self.num_classes),
                                 initializer='random_normal',
                                 trainable=True)
    def call(self, inputs):
        x = tf.nn.l2_normalize(inputs, axis=-1) # (batch_size, ndim)
        w = tf.nn.l2_normalize(self.W, axis=0)  # (ndim, nclass)       Â
        cos = tf.matmul(x, w) # (batch_size, nclass)
        return cos
  Â
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_classes': self.num_classes
        })
        return config
class ArcFace(tf.keras.layers.Layer):
    def __init__(self, num_classes, margin=0.5, scale=64, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.margin = margin
        self.scale = scale
        self.cos_similarity = CosineSimilarity(num_classes)
    def call(self, inputs, training):
        # If not training (prediction), labels are ignored
        feature, labels = inputs
        cos = self.cos_similarity(feature)
        if training:
            theta = tf.acos(tf.clip_by_value(cos, -1, 1))
            cos_add = tf.cos(theta + self.margin)
  Â
            mask = tf.cast(labels, dtype=cos_add.dtype)
            logits = mask*cos_add + (1-mask)*cos
            logits *= self.scale
            return logits
        else:
            return cos
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_classes': self.num_classes,
            'margin': self.margin,
            'scale': self.scale
        })
        return config
class GeMPooling(tf.keras.layers.Layer):
    def __init__(self, p=1.0, eps=1e-7):
        super().__init__()
        self.p = p
        self.eps = 1e-7
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'p': self.p,
            'eps': self.eps
        })
        return config
    def call(self, inputs: tf.Tensor, **kwargs):
        inputs = tf.clip_by_value(inputs, clip_value_min=self.eps, clip_value_max=tf.reduce_max(inputs))
        inputs = tf.pow(inputs, self.p)
        inputs = tf.reduce_mean(inputs, axis=[1, 2])
        inputs = tf.pow(inputs, 1./self.p)
        return inputs
class CosineSimilarity(tf.keras.layers.Layer):
    """
    Cosine similarity with classwise weights
    """
    def __init__(self, num_classes, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.W = self.add_weight(shape=(input_dim, self.num_classes),
                                 initializer='random_normal',
                                 trainable=True)
    def call(self, inputs):
        x = tf.nn.l2_normalize(inputs, axis=-1) # (batch_size, ndim)
        w = tf.nn.l2_normalize(self.W, axis=0)  # (ndim, nclass)
        cos = tf.matmul(x, w) # (batch_size, nclass)
        return cos
  Â
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_classes': self.num_classes
        })
        return config
class ArcMargin(tf.keras.layers.Layer):
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False, ls_eps=0.0, **kwargs):
        super(ArcMargin, self).__init__(**kwargs)
        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config
    def build(self, input_shape):
        super(ArcMargin, self).build(input_shape[0])
        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None
        )
    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output
class ArcFace(tf.keras.layers.Layer):
    """
    Implementation of https://arxiv.org/pdf/1801.07698.pdf
    """     Â
    def __init__(self, num_classes, margin=0.5, scale=64, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.margin = margin
        self.scale = scale
        self.cos_similarity = CosineSimilarity(num_classes)
    def call(self, inputs, training):
        # If not training (prediction), labels are ignored
        feature, labels = inputs
        cos = self.cos_similarity(feature)
        if training:
            theta = tf.acos(tf.clip_by_value(cos, -1, 1))
            cos_add = tf.cos(theta + self.margin)
  Â
            mask = tf.cast(labels, dtype=cos_add.dtype)
            logits = mask*cos_add + (1-mask)*cos
            logits *= self.scale
            return logits
        else:
            return cos
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_classes': self.num_classes,
            'margin': self.margin,
            'scale': self.scale
        })
        return config
class AdaCos(tf.keras.layers.Layer):
    def __init__(self, num_classes, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.cos_similarity = CosineSimilarity(num_classes)
        self.scale = tf.Variable(tf.sqrt(2.0)*tf.math.log(num_classes - 1.0),
                                 trainable=False)
    def call(self, inputs, training):
        # In inference, labels are ignored
        feature, labels = inputs
        cos = self.cos_similarity(feature)
        if training:
            mask = tf.cast(labels, dtype=cos.dtype)
      Â
            # Collect cosine values at only false labels
            B = (1 - mask)*tf.exp(self.scale*cos)
            B_avg = tf.reduce_mean(tf.reduce_sum(B, axis=-1), axis=0)
            theta = tf.acos(tf.clip_by_value(cos, -1, 1))
            # Collect cosine at true labels
            theta_true = tf.reduce_sum(mask*theta, axis=-1)
            # get median (=50-percentile)
            theta_med = tfp.stats.percentile(theta_true, q=50)
Â
            scale = tf.math.log(B_avg) / tf.cos(tf.minimum(np.pi/4, theta_med))
            scale = tf.stop_gradient(scale)
            logits = scale*cos
          Â
            self.scale.assign(scale)
            return logits
        else:
            return cos
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_classes': self.num_classes
        })
        return config
class CircleLoss(tf.keras.layers.Layer):
    """
    Implementation of https://arxiv.org/abs/2002.10857 (pair-level label version)
    """
    def __init__(self, margin=0.25, scale=256, **kwargs):
        """
        Args
          margin: a float value, margin for the true label (default 0.25)
          scale: a float value, final scale value,
            stated as gamma in the original paper (default 256)
        Returns:
          a tf.keras.layers.Layer object, outputs logit values of each class
        In the original paper, margin and scale (=gamma) are set depends on tasks
        - Face recognition: m=0.25, scale=256 (default)
        - Person re-identification: m=0.25, scale=256
        - Fine-grained image retrieval: m=0.4, scale=64
        """
        super().__init__(**kwargs)
        self.margin = margin
        self.scale = scale
        self._Op = 1 + margin # O_positive
        self._On = -margin  # O_negative
        self._Dp = 1 - margin # Delta_positive
        self._Dn = margin   # Delta_negative
    def call(self, inputs, training):
        feature, labels = inputs
        x = tf.nn.l2_normalize(feature, axis=-1)
        cos = tf.matmul(x, x, transpose_b=True) # (batch_size, batch_size)
        if training:
            # pairwise version
            mask = tf.cast(labels, dtype=cos.dtype)
            mask_p = tf.matmul(mask, mask, transpose_b=True)
            mask_n = 1 - mask_p
            mask_p = mask_p - tf.eye(mask_p.shape[0])
            logits_p = - self.scale * tf.nn.relu(self._Op - cos) * (cos - self._Dp)
            logits_n = self.scale * tf.nn.relu(cos - self._On) * (cos - self._Dn)
            logits_p = tf.where(mask_p == 1, logits_p, -np.inf)
            logits_n = tf.where(mask_n == 1, logits_n, -np.inf)
            logsumexp_p = tf.reduce_logsumexp(logits_p, axis=-1)
            logsumexp_n = tf.reduce_logsumexp(logits_n, axis=-1)
            mask_p_row = tf.reduce_max(mask_p, axis=-1)
            mask_n_row = tf.reduce_max(mask_n, axis=-1)
            logsumexp_p = tf.where(mask_p_row == 1, logsumexp_p, 0)
            logsumexp_n = tf.where(mask_n_row == 1, logsumexp_n, 0)
            losses = tf.nn.softplus(logsumexp_p + logsumexp_n)
            mask_paired = mask_p_row*mask_n_row
            losses = mask_paired * losses
            return losses
        else:
            return cos
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'margin': self.margin,
            'scale': self.scale
        })
        return config
class CircleLossCL(tf.keras.layers.Layer):
    """
    Implementation of https://arxiv.org/abs/2002.10857 (class-level label version)
    """   Â
    def __init__(self, num_classes, margin=0.25, scale=256, **kwargs):
        """
        Args
          num_classes: an int value, number of target classes
          margin: a float value, margin for the true label (default 0.25)
          scale: a float value, final scale value,
            stated as gamma in the original paper (default 256)
        Returns:
          a tf.keras.layers.Layer object, outputs logit values of each class
        In the original paper, margin and scale (=gamma) are set depends on tasks
        - Face recognition: m=0.25, scale=256 (default)
        - Person re-identification: m=0.25, scale=256
        - Fine-grained image retrieval: m=0.4, scale=64
        """
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.margin = margin
        self.scale = scale
        self._Op = 1 + margin # O_positive
        self._On = -margin  # O_negative
        self._Dp = 1 - margin # Delta_positive
        self._Dn = margin   # Delta_negative
        self.cos_similarity = CosineSimilarity(num_classes)
    def call(self, inputs, training):
        feature, labels = inputs
        cos = self.cos_similarity(feature)
      Â
        if training:
            # class-lebel version
            mask = tf.cast(labels, dtype=cos.dtype)
            alpha_p = tf.nn.relu(self._Op - cos)
            alpha_n = tf.nn.relu(cos - self._On)
            logits_p = self.scale*alpha_p*(cos - self._Dp)
            logits_n = self.scale*alpha_n*(cos - self._Dn)
            logits = mask*logits_p + (1-mask)*logits_n
            return logits
        else:
            return cos
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_classes': self.num_classes,
            'margin': self.margin,
            'scale': self.scale
        })
        return config
def get_margin(margin):
    if (margin == "ArcMargin"):
        return ArcMargin(n_classes = config["n_classes"], m = 0.1, s = 32)
    elif (margin == "ArcFace"):
        return ArcFace(num_classes = config["n_classes"], margin = 0.1, scale = 32)
    elif (margin == "AdaCos"):
        return AdaCos(num_classes = config["n_classes"])
    elif (margin == "CircleLossCL"):
        return CircleLossCL(num_classes = config["n_classes"], margin = 0.6, scale = 32)
    elif (margin == "CosFace"):
        return ArcFace(num_classes = config["n_classes"], margin = -0.1, scale = 32)
def get_backbone(backbone, x):
    if (hasattr(efn, backbone)):
        return GeMPooling(p = 3.0)(getattr(efn, backbone)(weights = "noisy-student", include_top = False)(x))
    elif hasattr(tf.keras.applications, backbone):
        return GeMPooling(p = 3.0)(getattr(tf.keras.applications, backbone)(weights = "imagenet", include_top = False)(x))
    elif hasattr(vit, backbone):
        return getattr(vit, backbone)(image_size = (config["image_size"][0], config["image_size"][1]),
                                      pretrained=True,
                                      include_top=False,
                                      pretrained_top=False)(x)
    elif "eff" in backbone:
        return hub.KerasLayer("gs://cloud-tpu-checkpoints/efficientnet/v2/hub/"+backbone+"-21k-ft1k/feature-vector", trainable=True)(x)
def model_factory(backbone, image_size, embedding_dimensions, margin):
    x = Input(shape = (*image_size,), name = 'input')
    label = Input(shape = (), name = 'label')
    headModel = get_backbone(backbone, x)
    headModel = Dense(embedding_dimensions, activation = "linear")(headModel)
    headModel = BatchNormalization()(headModel)
    headModel = PReLU()(headModel)
    headModel = get_margin(margin = margin)([headModel, label])
    output = Softmax(dtype='float32')(headModel)
    model = tf.keras.models.Model(inputs = [x, label], outputs = [output])
    return model
Callbacks
def get_lr_callback(plot=False):
    LR_START = config["lr"] * (config["batch_size"] / 256)
    LR_MAX = 5 * LR_START
    LR_MIN = LR_START/10
    LR_RAMPUP_EPOCHS = 5
    LR_SUSTAIN_EPOCHS = 0
    LR_EXP_DECAY = 0.8
    def lrfn(epoch):
        if epoch < LR_RAMPUP_EPOCHS:
            lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
        elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
            lr = LR_MAX
        else:
            lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
        return lr
    if plot:
        epochs = list(range(config["epochs"]))
        learning_rates = [lrfn(x) for x in epochs]
        plt.plot(epochs, learning_rates)
        print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(learning_rates[0], max(learning_rates), learning_rates[-1]))
        plt.show()
    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback
class SaveModelCheckpoint(tf.keras.callbacks.Callback):
    def __init__(self, path):
        self.path = path
    def on_epoch_end(self, epoch, logs={}):
        self.model.save(os.path.join(self.path, "model_{}_{}_{}.h5".format(config["backbone"], config["margin"], (epoch + 1)%config["total_save"])))
Run
df = pd.read_csv(config["encoded_csv_path"])
FILENAMES = []
for GCS_PATH in config["image_paths"]:
    FILENAMES += tf.io.gfile.glob(GCS_PATH + '/train*.tfrec')
TRAINING_FILENAMES, VALIDATION_FILENAMES = train_test_split(FILENAMES,
                                                            test_size=config["valid_size"],
                                                            random_state=42)
training_groups = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in TRAINING_FILENAMES]
validation_groups = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in VALIDATION_FILENAMES]
n_trn_classes = df[df['group'].isin(training_groups)]['landmark_id_encode'].nunique()
n_val_classes = df[df['group'].isin(validation_groups)]['landmark_id_encode'].nunique()
print(f'The number of unique training classes is {n_trn_classes} of {config["n_classes"]} total classes')
print(f'The number of unique validation classes is {n_val_classes} of {config["n_classes"]} total classes')
STEPS_PER_EPOCH = count_data_items(TRAINING_FILENAMES) // config["batch_size"]
train_dataset = get_training_dataset(TRAINING_FILENAMES, ordered = False, do_aug = True)
valid_dataset = get_validation_dataset(VALIDATION_FILENAMES, ordered = True, prediction = False)
with strategy.scope():
    optimizer = Adam(learning_rate = config["lr"])
    model = model_factory(margin = config["margin"],
                          backbone = config["backbone"],
                          image_size = config["image_size"],
                          embedding_dimensions = config["embedding_dimensions"])
  Â
    if (config["weight_path"]):
        model.load_weights(config["weight_path"])
    model.compile(optimizer = optimizer,
                loss = [tf.keras.losses.CategoricalCrossentropy() if (config['margin'] != "ArcMargin") else tf.keras.losses.SparseCategoricalCrossentropy()],
                metrics = [tf.keras.metrics.CategoricalAccuracy() if (config['margin'] != "ArcMargin") else tf.keras.metrics.SparseCategoricalAccuracy()])
checkpoint = SaveModelCheckpoint(path = config["save_path"])
lr_callback = get_lr_callback(plot = True)
H = model.fit(train_dataset,
              steps_per_epoch = STEPS_PER_EPOCH,
              epochs = config["epochs"],
              callbacks = [checkpoint, lr_callback],
              validation_data = valid_dataset,
              initial_epoch = config["last_epoch"],
              verbose = 1)
Infer
import gc
import os
import csv
import cv2
import sys
import math
import random
import shutil
import logging
import numpy as np
import albumentations as A
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import *
from tqdm.notebook import *
from sklearn.metrics import *
from sklearn.preprocessing import QuantileTransformer
try:
    from vit_keras import vit
    import efficientnet.tfkeras as efn
   Â
    from ffyytt_tools.metric_learning.metric_learning_layers import *
except:
    sys.path.append("../input/ffyytt-tools")
    !pip install -qq vit-keras --no-index --find-links=file:///kaggle/input/2021-glr-lib
    !pip install -qq efficientnet --no-index --find-links=file:///kaggle/input/2021-glr-lib
       Â
    from vit_keras import vit
    import efficientnet.tfkeras as efn
   Â
    from ffyytt_tools.metric_learning.metric_learning_layers import *
model_paths = [
    "../input/2021-glr-all-best-model/model_efficientnetv2-b3_1024_ArcMargin_0.h5",
    "../input/2021-glr-all-best-model/model_efficientnetv2-m_1024_ArcMargin_0.h5",
    "../input/2021-glr-all-best-model/model_EfficientNetB4_512_ArcMargin_0.h5",
   Â
   Â
    #"../input/2021-glr-efficientnetv2b3-adacos-padding-1024/model_efficientnetv2-b3_AdaCos_1.h5",
    "../input/2021-glr-efficientnetv2b3-adacos-padding-1024/model_efficientnetv2-b3_AdaCos_2.h5",
    #"../input/2021-glr-efficientnetv2s-adacos-padding-1024/model_efficientnetv2-s_AdaCos_1.h5",
    "../input/2021-glr-efficientnetv2s-adacos-padding-1024/model_efficientnetv2-s_AdaCos_2.h5",
    "../input/2021-glr-efficientnetb6-adacos-padding-1024/model_EfficientNetB6_AdaCos_2.h5",
    "../input/2021-glr-efficientnetb5-adacos-padding-1024/model_EfficientNetB5_AdaCos_0.h5",
    "../input/2021-glr-craw-weights-13092021/model_InceptionResNetV2_AdaCos_3.h5",
    "../input/2021-glr-craw-weights-13092021/model_EfficientNetB5_ArcMargin_3.h5",
    "../input/2021-glr-craw-weights-13092021/model_EfficientNetB5_ArcFace_3.h5",
    "../input/2021-glr-vit-b32-adacos-padding-1024/model_vit_b32_AdaCos_1.h5",
   Â
   Â
    "../input/2021-glr-model-temp/model_vit_b32_AdaCos_1.h5",
]
backbones = [
    "efficientnetv2-b3",
    "efficientnetv2-m",
    "EfficientNetB4",
   Â
   Â
    #"efficientnetv2-b3",
    "efficientnetv2-b3",
    #"efficientnetv2-s",
    "efficientnetv2-s",
    "EfficientNetB6",
    "EfficientNetB5",
    "InceptionResNetV2",
    "EfficientNetB5",
    "EfficientNetB5",
    "vit_b32",
   Â
   Â
    #"vit_b32",
    "vit_b32"
]
embedding_sizes = [
    1024,
    1024,
    512,
   Â
    #1024,
    1024,
    #1024,
    1024,
    1024,
    1024,
    1024,
    1024,
    1024,
    1024,
   Â
    #1024,
    1024
]
paddings = [
    False,
    False,
    False,
   Â
    #True,
    True,
    #True,
    True,
    True,
    True,
    True,
    True,
    True,
    True,
   Â
    #False,
    False,
]
margins = [
    "ArcMargin",
    "ArcMargin",
    "ArcMargin",
   Â
    #"AdaCos",
    "AdaCos",
    #"AdaCos",
    "AdaCos",
    "AdaCos",
    "AdaCos",
    "AdaCos",
    "ArcMargin",
    "ArcFace",
    "AdaCos",
   Â
    #"AdaCos",
    "AdaCos",
]
image_sizes = [
    (512, 512, 3),
    (512, 512, 3),
    (512, 512, 3),
   Â
    #(800, 800, 3),
    (800, 800, 3),
    #(800, 800, 3),
    (800, 800, 3),
    (800, 800, 3),
    (800, 800, 3),
    (800, 800, 3),
    (800, 800, 3),
    (800, 800, 3),
    (800, 800, 3),
   Â
    #(512, 512, 3),
    (512, 512, 3),
]
ensemble_weight = np.array([1]*len(backbones))
augmentation = A.Compose([])
config = {
    "n_workers": 4,
    "batch_size": 16,
    "n_classes": 81313,
    "distance_batch": 512,
    "NUM_PUBLIC_TEST_IMAGES": 1129,
}
TOP_K = 100
DEBUG = True
NUM_EMBEDDING_DIMENSIONS = max(embedding_sizes)
def get_margin(margin):
    if (margin == "ArcMargin"):
        return ArcMargin(n_classes = config["n_classes"], m = 0.1, s = 32)
    elif (margin == "ArcFace"):
        return ArcFace(num_classes = config["n_classes"], margin = 0.1, scale = 32)
    elif (margin == "AdaCos"):
        return AdaCos(num_classes = config["n_classes"])
    elif (margin == "CircleLossCL"):
        return CircleLossCL(num_classes = config["n_classes"], margin = 0.25, scale = 32)
    elif (margin == "CosFace"):
        return ArcFace(num_classes = config["n_classes"], margin = -0.1, scale = 32)
def get_backbone(backbone, x, image_size):
    if (hasattr(efn, backbone)):
        return GeMPooling(p = 3.0)(getattr(efn, backbone)(weights = None, include_top = False)(x))
    elif hasattr(tf.keras.applications, backbone):
        return GeMPooling(p = 3.0)(getattr(tf.keras.applications, backbone)(weights = None, include_top = False)(x))
    elif hasattr(vit, backbone):
        return getattr(vit, backbone)(image_size = (image_size[0], image_size[1]),
                                      pretrained=False,
                                      include_top=False,
                                      pretrained_top=False)(x)
    else:
        return hub.KerasLayer("../input/efficientnetv2-tfhub-weight-files/tfhub_models/"+backbone+"/feature_vector", trainable=True)(x)
def model_factory(backbone, image_size, embedding_dimensions, margin):
    x = Input(shape = (*image_size,), name = 'input')
    label = Input(shape = (), name = 'label')
    headModel = get_backbone(backbone, x, image_size)
    headModel = Dense(embedding_dimensions, activation = "linear")(headModel)
    headModel = BatchNormalization()(headModel)
    headModel = PReLU()(headModel)
    headModel = get_margin(margin = margin)([headModel, label])
    output = Softmax(dtype='float32')(headModel)
    model = tf.keras.models.Model(inputs = [x, label], outputs = [output])
    return model
global_models = [None]*len(model_paths)
for model_index in trange(len(model_paths)):
    model = model_factory(image_size = image_sizes[model_index],
                          margin = margins[model_index],
                          backbone = backbones[model_index],
                          embedding_dimensions = embedding_sizes[model_index])
   Â
    model.load_weights(model_paths[model_index])
    global_models[model_index] = tf.keras.models.Model(inputs = model.input[0],
                                                       outputs = model.layers[-4].output)
def do_image_normal(image):
    image = augmentation(image=image)["image"]
    image = cv2.resize(image, (512, 512))
    image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
    image = tf.image.decode_jpeg(image, channels = 3)
    return tf.cast(image, tf.float32) / 255.0
def do_image_padding(image):
    image = augmentation(image=image)["image"]
    image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize_with_pad(image, target_height = 800, target_width = 800)
    return tf.cast(image, tf.float32) / 255.0
def process_image(image, padding):
    if (padding):
        return do_image_padding(image)
    else:
        return do_image_normal(image)
def read_image(image_path):
    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    return image
def read_image_batch(image_paths):
    X_normal = [None]*len(image_paths)
    X_padding = [None]*len(image_paths)
    for image_index, image_path in enumerate(image_paths):
        image = read_image(image_path)
        X_normal[image_index] = process_image(image, False)
        X_padding[image_index] = process_image(image, True)
    return tf.convert_to_tensor(X_normal), tf.convert_to_tensor(X_padding)
def make_global_model_predict(X, index, embeddings, model_index):
    features = global_models[model_index].predict(X)
    embeddings[index, :features.shape[1]] += ensemble_weight[model_index]*tf.nn.l2_normalize(features, axis=1).numpy()
    return None
def extract_global_features(image_root_dir):
    ids, image_paths = my_glob(image_root_dir, ".jpg")
    num_embeddings = len(image_paths)
    steps = math.ceil(num_embeddings/config["batch_size"])
    embeddings = np.zeros((num_embeddings, NUM_EMBEDDING_DIMENSIONS))
   Â
    for step in trange(steps):
        index = range(step*config["batch_size"], min(num_embeddings, (step+1)*config["batch_size"]))
        X_normal, X_padding = read_image_batch([image_paths[i] for i in index])
        for model_index in range(len(global_models)):
            if (paddings[model_index]):
                make_global_model_predict(X_padding, index, embeddings, model_index)
            else:
                make_global_model_predict(X_normal, index, embeddings, model_index)
               Â
        gc.collect()
    tf.keras.backend.clear_session()
    return ids, embeddings
def compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings):
    train_ids_and_scores = [None] * len(test_ids)
    steps = math.ceil(len(test_ids)/config["distance_batch"])
   Â
    for step in trange(steps):
        index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
        distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
                                       metric = 'cosine', n_jobs = -1)
       Â
        for test_index in index:
            partition = np.argpartition(distances[test_index%config["distance_batch"]], min(TOP_K, len(train_ids)-1))[:TOP_K]
            nearest = sorted([(train_ids[p], distances[test_index%config["distance_batch"]][p]) for p in partition], key=lambda x: x[1])
            train_ids_and_scores[test_index] = [(train_id, 1 - distance) for train_id, distance in nearest][:TOP_K]
        gc.collect()
       Â
    return train_ids_and_scores
def compute_remove_top_global(train_ids, test_ids, train_embeddings, test_embeddings):
    train_remove = set()
    train_socres = [None]*len(train_ids)
    steps = math.ceil(len(train_ids)/config["distance_batch"])
   Â
    for step in trange(steps):
        index = range(step*config["distance_batch"], min(len(train_ids), (step+1)*config["distance_batch"]))
        distances = pairwise_distances(train_embeddings[index, :], test_embeddings,
                                       metric = 'cosine', n_jobs = -1)
       Â
        for train_index in index:
            partition = np.argpartition(distances[train_index%config["distance_batch"]], 3)[:3]
            train_socres[train_index] = sum([1-distances[train_index%config["distance_batch"]][p] for p in partition])
        gc.collect()
   Â
    remove_thresh = np.percentile(train_socres, REMOVE_TOP_GLOBAL, interpolation='nearest')
    for train_id, train_socre in zip(train_ids, train_socres):
        if (train_socre < remove_thresh):
            train_remove.add(train_id)
   Â
    return train_remove
def compute_remove_times_app(train_ids, test_ids, train_embeddings, test_embeddings):
    train_time_dict = {train_id:0 for train_id in train_ids}
    steps = math.ceil(len(test_ids)/config["distance_batch"])
   Â
    for step in trange(steps):
        index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
        distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
                                       metric = 'cosine', n_jobs = -1)
       Â
        for test_index in index:
            partition = np.argpartition(distances[test_index%config["distance_batch"]], 100)[:100]
            for p in partition:
                train_time_dict[train_ids[p]] += 1
        gc.collect()
   Â
    train_remove = set(sorted(train_ids, key = train_time_dict.get, reverse = True)[:MAX_TIME_REMOVE])
    return train_remove
def remove_train_by_remove_set(train_ids, train_embeddings, train_remove):
    index = [i for i in range(len(train_ids)) if train_ids[i] not in train_remove]
    return [train_ids[i] for i in index], train_embeddings[index]
   Â
def global_predictions():
    train_ids, train_embeddings = extract_global_features(TRAIN_IMAGE_DIR + "/0/0"*DEBUG)
    gc.collect()
   Â
    test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR + "/0"*DEBUG)
    gc.collect()
   Â
    train_ids_and_scores = compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings)
    return test_ids, train_ids_and_scores
def get_prediction_map(test_ids, train_ids_and_scores):
    prediction_map = dict()
    for test_index, test_id in enumerate(test_ids):
        prediction_map[test_id] = " ".join(train_id for train_id,_ in train_ids_and_scores[test_index])
    return prediction_map
def get_predictions():
    test_ids, train_ids_and_scores = global_predictions()
    gc.collect()
   Â
    verification_predictions = get_prediction_map(test_ids, train_ids_and_scores)   Â
    return verification_predictions
def do_image_normal(image):
    image = augmentation(image=image)["image"]
    image = cv2.resize(image, (512, 512))
    image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
    image = tf.image.decode_jpeg(image, channels = 3)
    return tf.cast(image, tf.float32) / 255.0
def do_image_padding(image):
    image = augmentation(image=image)["image"]
    image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize_with_pad(image, target_height = 800, target_width = 800)
    return tf.cast(image, tf.float32) / 255.0
def process_image(image, padding):
    if (padding):
        return do_image_padding(image)
    else:
        return do_image_normal(image)
def read_image(image_path):
    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    return image
def read_image_batch(image_paths):
    X_normal = [None]*len(image_paths)
    X_padding = [None]*len(image_paths)
    for image_index, image_path in enumerate(image_paths):
        image = read_image(image_path)
        X_normal[image_index] = process_image(image, False)
        X_padding[image_index] = process_image(image, True)
    return tf.convert_to_tensor(X_normal), tf.convert_to_tensor(X_padding)
def make_global_model_predict(X, index, embeddings, model_index):
    features = global_models[model_index].predict(X)
    embeddings[index, :features.shape[1]] += ensemble_weight[model_index]*tf.nn.l2_normalize(features, axis=1).numpy()
    return None
def extract_global_features(image_root_dir):
    ids, image_paths = my_glob(image_root_dir, ".jpg")
    num_embeddings = len(image_paths)
    steps = math.ceil(num_embeddings/config["batch_size"])
    embeddings = np.zeros((num_embeddings, NUM_EMBEDDING_DIMENSIONS))
   Â
    for step in trange(steps):
        index = range(step*config["batch_size"], min(num_embeddings, (step+1)*config["batch_size"]))
        X_normal, X_padding = read_image_batch([image_paths[i] for i in index])
        for model_index in range(len(global_models)):
            if (paddings[model_index]):
                make_global_model_predict(X_padding, index, embeddings, model_index)
            else:
                make_global_model_predict(X_normal, index, embeddings, model_index)
               Â
        gc.collect()
    tf.keras.backend.clear_session()
    return ids, embeddings
def compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings):
    train_ids_and_scores = [None] * len(test_ids)
    steps = math.ceil(len(test_ids)/config["distance_batch"])
   Â
    for step in trange(steps):
        index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
        distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
                                       metric = 'cosine', n_jobs = -1)
       Â
        for test_index in index:
            partition = np.argpartition(distances[test_index%config["distance_batch"]], min(TOP_K, len(train_ids)-1))[:TOP_K]
            nearest = sorted([(train_ids[p], distances[test_index%config["distance_batch"]][p]) for p in partition], key=lambda x: x[1])
            train_ids_and_scores[test_index] = [(train_id, 1 - distance) for train_id, distance in nearest][:TOP_K]
        gc.collect()
       Â
    return train_ids_and_scores
def compute_remove_top_global(train_ids, test_ids, train_embeddings, test_embeddings):
    train_remove = set()
    train_socres = [None]*len(train_ids)
    steps = math.ceil(len(train_ids)/config["distance_batch"])
   Â
    for step in trange(steps):
        index = range(step*config["distance_batch"], min(len(train_ids), (step+1)*config["distance_batch"]))
        distances = pairwise_distances(train_embeddings[index, :], test_embeddings,
                                       metric = 'cosine', n_jobs = -1)
       Â
        for train_index in index:
            partition = np.argpartition(distances[train_index%config["distance_batch"]], 3)[:3]
            train_socres[train_index] = sum([1-distances[train_index%config["distance_batch"]][p] for p in partition])
        gc.collect()
   Â
    remove_thresh = np.percentile(train_socres, REMOVE_TOP_GLOBAL, interpolation='nearest')
    for train_id, train_socre in zip(train_ids, train_socres):
        if (train_socre < remove_thresh):
            train_remove.add(train_id)
   Â
    return train_remove
def compute_remove_times_app(train_ids, test_ids, train_embeddings, test_embeddings):
    train_time_dict = {train_id:0 for train_id in train_ids}
    steps = math.ceil(len(test_ids)/config["distance_batch"])
   Â
    for step in trange(steps):
        index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
        distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
                                       metric = 'cosine', n_jobs = -1)
       Â
        for test_index in index:
            partition = np.argpartition(distances[test_index%config["distance_batch"]], 100)[:100]
            for p in partition:
                train_time_dict[train_ids[p]] += 1
        gc.collect()
   Â
    train_remove = set(sorted(train_ids, key = train_time_dict.get, reverse = True)[:MAX_TIME_REMOVE])
    return train_remove
def remove_train_by_remove_set(train_ids, train_embeddings, train_remove):
    index = [i for i in range(len(train_ids)) if train_ids[i] not in train_remove]
    return [train_ids[i] for i in index], train_embeddings[index]
   Â
def global_predictions():
    train_ids, train_embeddings = extract_global_features(TRAIN_IMAGE_DIR + "/0/0"*DEBUG)
    gc.collect()
   Â
    test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR + "/0"*DEBUG)
    gc.collect()
   Â
    train_ids_and_scores = compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings)
    return test_ids, train_ids_and_scores
def get_prediction_map(test_ids, train_ids_and_scores):
    prediction_map = dict()
    for test_index, test_id in enumerate(test_ids):
        prediction_map[test_id] = " ".join(train_id for train_id,_ in train_ids_and_scores[test_index])
    return prediction_map
def get_predictions():
    test_ids, train_ids_and_scores = global_predictions()
    gc.collect()
   Â
    verification_predictions = get_prediction_map(test_ids, train_ids_and_scores)   Â
    return verification_predictions
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)
def my_glob(path, filetype):
    ids = []
    filepaths = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(filetype):
                filepaths.append(os.path.join(root, file))
                ids.append(file[:-4])
    return ids, filepaths
   Â
def save_submission_csv(predictions=None):
    if predictions is None:
        shutil.copyfile(os.path.join(DATASET_DIR, 'sample_submission.csv'), 'submission.csv')
        return True
    with open('submission.csv', 'w') as submission_csv:
        csv_writer = csv.DictWriter(submission_csv, fieldnames=['id', 'images'])
        csv_writer.writeheader()
        for image_id, prediction in predictions.items():
            csv_writer.writerow({'id': image_id, 'images': f'{prediction}'})
In [10]:
seed_everything(1312)
INPUT_DIR = os.path.join('..', 'input')
DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-retrieval-2021')
TEST_IMAGE_DIR = os.path.join(DATASET_DIR, 'test')
TRAIN_IMAGE_DIR = os.path.join(DATASET_DIR, 'index')
In [11]:
_, test_image_list = my_glob(TEST_IMAGE_DIR, ".jpg")
if len(test_image_list) == config["NUM_PUBLIC_TEST_IMAGES"] and not DEBUG:
    print("Copying sample submission")
    save_submission_csv()
else:
    if (len(test_image_list) != config["NUM_PUBLIC_TEST_IMAGES"]):
        DEBUG = False
       Â
    verification_predictions = get_predictions()
    save_submission_csv(verification_predictions)