Source code for autoaug.autoaugment_learners.UcbLearner

import numpy as np

from tqdm import trange

from ..child_networks import *
from .RsLearner import RsLearner


[docs]class UcbLearner(RsLearner):
    """
    Uses the UCB1 algorithm originally developed for multi-armed bandit problems.
    Recommended when 

    - Optimal hyperparameters for training the CNN (such as batch size and learning rate) has not been found yet.

    - Using a toy dataset or a toy CNN

    Args:
        num_sub_policies (int, optional): number of subpolicies per policy. Defaults to 5.

        p_bins (int, optional): number of bins we divide the interval [0,1] for 
                        probabilities. e.g. (0.0, 0.1, ... 1.0) Defaults to 11.

        m_bins (int, optional): number of bins we divide the magnitude space.
                        Defaults to 10.

        exclude_method (list, optional): list of names(:type:str) of image operations
                        the user wants to exclude from the search space. Defaults to [].

        batch_size (int, optional): child_network training parameter. Defaults to 32.

        toy_size (int, optional): child_network training parameter. ratio of original
                            dataset used in toy dataset. Defaults to 0.1.

        learning_rate (float, optional): child_network training parameter. Defaults to 1e-2.

        max_epochs (Union[int, float], optional): child_network training parameter. 
                            Defaults to float('inf').

        early_stop_num (int, optional): child_network training parameter. Defaults to 20.
    
        num_policies (int, optional): Number of policies we want to serach over. 
                            Defaults to 100.
        
    Attributes:
        history (list): list of policies that has been input into 
                        self._test_autoaugment_policy as well as their respective obtained
                        accuracies

        augmentation_space (list): list of image functions that the user has chosen to 
                        include in the search space.

        policies (list): A list of policies which we are currently searching over.

        avg_accs (list): A list where the nth element indicates the average accuracy 
                        obtained by the nth policy.



    Notes
    -----
    As opposed the the other learners, this searches over a subset of the entire
    search space (specified in the AutoAugment paper). The size of the subset is 
    initialized to be ``self.num_policies``. But we can increase it by running 
    self.make_more_policies(). For example, we initialize the learner with 
    ``self.num_policies=7``, run ``self.learn(iterations=20)`` to learn about the
    seven policies we have in our ``self.policies``. Then run 
    ``self.make_more_policies(n=5)`` to add 5 more policies to ``self.policies``.
    Then we can run ``self.learn(iterations=20)`` to continue the UCB1 algorithm
    with the extended search space.

    References
    ----------
    Peter Auer, et al.
        "Finite-time Analysis of the Multiarmed Bandit Problem"
        https://homes.di.unimi.it/~cesabian/Pubblicazioni/ml-02.pdf
    
    """
    def __init__(self,
                # parameters that define the search space
                num_sub_policies=5,
                p_bins=11,
                m_bins=10,
                exclude_method=[],
                # hyperparameters for when training the child_network
                batch_size=8,
                toy_size=1,
                learning_rate=1e-1,
                max_epochs=float('inf'),
                early_stop_num=30,
                # UcbLearner specific hyperparameter
                num_policies=100
                ):
        
        super().__init__(
                        num_sub_policies=num_sub_policies, 
                        p_bins=p_bins, 
                        m_bins=m_bins, 
                        batch_size=batch_size,
                        toy_size=toy_size,
                        learning_rate=learning_rate,
                        max_epochs=max_epochs,
                        early_stop_num=early_stop_num,
                        exclude_method=exclude_method,
                        )
        

        

        # attributes used in the UCB1 algorithm
        self.num_policies = num_policies

        self.policies = [self._generate_new_policy() for _ in range(num_policies)]

        self.avg_accs = [None]*self.num_policies
        self.best_avg_accs = []

        self.cnts = [0]*self.num_policies
        self.q_plus_cnt = [0]*self.num_policies
        self.total_count = 0




[docs]    def make_more_policies(self, n):
        """generates n more random policies and adds it to self.policies

        Args:
            n (int): how many more policies to we want to randomly generate
                    and add to our list of policies
        """

        self.policies += [self._generate_new_policy() for _ in range(n)]

        # all the below need to be lengthened to store information for the 
        # new policies
        self.avg_accs += [None for _ in range(n)]
        self.cnts += [0 for _ in range(n)]
        self.q_plus_cnt += [None for _ in range(n)]
        self.num_policies += n



[docs]    def learn(self, 
            train_dataset, 
            test_dataset, 
            child_network_architecture, 
            iterations=15,):
        """continue the UCB algorithm for ``iterations`` number of turns

        """

        for this_iter in trange(iterations):

            # choose which policy we want to test
            if None in self.avg_accs:
                # if there is a policy we haven't tested yet, we 
                # test that one
                this_policy_idx = self.avg_accs.index(None)
                this_policy = self.policies[this_policy_idx]
                acc = self._test_autoaugment_policy(
                                this_policy,
                                child_network_architecture,
                                train_dataset,
                                test_dataset,
                                )
                # update q_values (average accuracy)
                self.avg_accs[this_policy_idx] = acc
            else:
                # if we have tested all policies before, we test the
                # one with the best q_plus_cnt value
                this_policy_idx = np.argmax(self.q_plus_cnt)
                this_policy = self.policies[this_policy_idx]
                acc = self._test_autoaugment_policy(
                                this_policy,
                                child_network_architecture,
                                train_dataset,
                                test_dataset,
                                logging=False,
                                )
                # update q_values (average accuracy)
                self.avg_accs[this_policy_idx] = (self.avg_accs[this_policy_idx]*self.cnts[this_policy_idx] + acc) / (self.cnts[this_policy_idx] + 1)
    
            # logging the best avg acc up to now
            best_avg_acc = max([x for x in self.avg_accs if x is not None])
            self.best_avg_accs.append(best_avg_acc)

            # print progress for user
            if (this_iter+1) % 5 == 0:
                print("Iteration: {},\tQ-Values: {}, Best this_iter: {}".format(
                                this_iter+1, 
                                list(np.around(np.array(self.avg_accs),2)), 
                                max(list(np.around(np.array(self.avg_accs),2)))
                                )
                    )

            # update counts
            self.cnts[this_policy_idx] += 1
            self.total_count += 1

            # update q_plus_cnt values every turn after the initial sweep through
            for i in range(self.num_policies):
                if self.avg_accs[i] is not None:
                    self.q_plus_cnt[i] = self.avg_accs[i] + np.sqrt(2*np.log(self.total_count)/self.cnts[i])
            
            print(self.cnts)

            
[docs]    def get_mega_policy(self, number_policies=5):
        """
        Produces a mega policy, based on the n best subpolicies (evo learner)/policies
        (other learners)

        
        Args: 
            number_policies -> int: Number of (sub)policies to be included in the mega
            policy

        Returns:
            megapolicy -> [subpolicy, subpolicy, ...]
        """

        temp_avg_accs = [x if x is not None  else 0 for x in self.avg_accs]

        temp_history = list(zip(self.policies, temp_avg_accs))

        number_policies = min(number_policies, len(temp_history))

        inter_pol = sorted(temp_history, key=lambda x: x[1], reverse = True)[:number_policies]

        megapol = []
        for pol in inter_pol:
            megapol += pol[0]

        return megapol


[docs]    def get_n_best_policies(self, number_policies=5):
        """
        returns the n best policies

        
        Args: 
            number_policies (int): Number of (sub)policies to return

        Returns:
            list of best n policies
        """

        temp_avg_accs = [x if x is not None  else 0 for x in self.avg_accs]

        temp_history = list(zip(self.policies, temp_avg_accs))

        number_policies = min(number_policies, len(temp_history))

        inter_pol = sorted(temp_history, key=lambda x: x[1], reverse = True)[:number_policies]

        return inter_pol


       




if __name__=='__main__':
    batch_size = 32       # size of batch the inner NN is trained with
    learning_rate = 1e-1  # fix learning rate
    ds = "MNIST"          # pick dataset (MNIST, KMNIST, FashionMNIST, CIFAR10, CIFAR100)
    toy_size = 0.02       # total propeortion of training and test set we use
    max_epochs = 100      # max number of epochs that is run if early stopping is not hit
    early_stop_num = 10   # max number of worse validation scores before early stopping is triggered
    early_stop_flag = True        # implement early stopping or not
    average_validation = [15,25]  # if not implementing early stopping, what epochs are we averaging over
    num_policies = 5      # fix number of policies
    num_sub_policies = 5  # fix number of sub-policies in a policy
    iterations = 100      # total iterations, should be more than the number of policies
    IsLeNet = "SimpleNet" # using LeNet or EasyNet or SimpleNet