Optimizing Your Diet With Genetic Algorithms in Python

In [1]:
import pandas as pd
import numpy as np
import random
from deap import base
from deap import creator
from deap import tools

Setting the goal percentages

In [2]:
#goal percentages
total_calories = 2500 * 7
percentage_prot = 0.3
percentage_carb = 0.5
percentage_fat = 0.2

# compute total calories per macro
cal_prot = round(percentage_prot * total_calories)
cal_carb = round(percentage_carb * total_calories)
cal_fat = round(percentage_fat * total_calories)
print(cal_prot, cal_carb, cal_fat)

# fixed info on macro nutriments: calories per gram of protein, carb and fat
prot_cal_p_gram = 4
carb_cal_p_gram = 4
fat_cal_p_gram = 9

#goal grams
gram_prot = cal_prot / prot_cal_p_gram
gram_carb = cal_carb / carb_cal_p_gram
gram_fat = cal_fat / fat_cal_p_gram
print(gram_prot, gram_carb, gram_fat)
5250 8750 3500
1312.5 2187.5 388.8888888888889

Setting up the products table

In [3]:
# per week: min, max, cal unit, prot g,  fat g, carb g
products_table = pd.DataFrame.from_records([
    ['Banana 1u', 0, 4, 89, 1, 0, 23],
    ['Mandarin 1u', 0, 4, 40, 1, 0, 10],
    ['Ananas 100g', 0, 7, 50, 1, 0, 13],
    ['Grapes 100g', 0, 7, 76, 1, 0, 17],
    ['Chocolate 1 bar', 0, 4, 230, 3, 13, 25],
    
    ['Hard Cheese 100g', 0, 8, 350, 28, 26, 2],
    ['Soft Cheese 100g', 0, 8, 374, 18, 33, 1],
    ['Pesto 100g', 0, 8, 303, 3, 30, 4],
    ['Hoummous 100g', 0, 8, 306, 7, 25, 11],
    ['Aubergine Paste 100g', 0, 4, 228, 1, 20, 8],
    
    ['Protein Shake', 0, 5, 160, 30, 3, 5],
    ['Veggie Burger 1', 0, 5, 220, 21, 12, 3],
    ['Veggie Burger 2', 0, 12, 165, 16, 9, 2],
    ['Boiled Egg', 0, 8, 155, 13, 11, 1],
    ['Backed Egg', 0, 16, 196, 14, 15, 1],
    
    ['Baguette Bread Half', 0, 3, 274, 10, 0, 52],
    ['Square Bread 1 slice', 0, 3, 97, 3, 1, 17],
    ['Cheese Pizza 1u', 0, 3, 903, 36, 47, 81],
    ['Veggie Pizza 1u', 0, 3, 766, 26, 35, 85],
    
    ['Soy Milk 200ml', 0, 1, 115, 8, 4, 11],
    ['Soy Chocolate Milk 250ml', 0, 3, 160, 7, 6,20],
    
])
products_table.columns = ['Name', 'Min', 'Max', 'Calories', 'Gram_Prot', 'Gram_Fat', 'Gram_Carb']

products_table
Out[3]:
Name Min Max Calories Gram_Prot Gram_Fat Gram_Carb
0 Banana 1u 0 4 89 1 0 23
1 Mandarin 1u 0 4 40 1 0 10
2 Ananas 100g 0 7 50 1 0 13
3 Grapes 100g 0 7 76 1 0 17
4 Chocolate 1 bar 0 4 230 3 13 25
5 Hard Cheese 100g 0 8 350 28 26 2
6 Soft Cheese 100g 0 8 374 18 33 1
7 Pesto 100g 0 8 303 3 30 4
8 Hoummous 100g 0 8 306 7 25 11
9 Aubergine Paste 100g 0 4 228 1 20 8
10 Protein Shake 0 5 160 30 3 5
11 Veggie Burger 1 0 5 220 21 12 3
12 Veggie Burger 2 0 12 165 16 9 2
13 Boiled Egg 0 8 155 13 11 1
14 Backed Egg 0 16 196 14 15 1
15 Baguette Bread Half 0 3 274 10 0 52
16 Square Bread 1 slice 0 3 97 3 1 17
17 Cheese Pizza 1u 0 3 903 36 47 81
18 Veggie Pizza 1u 0 3 766 26 35 85
19 Soy Milk 200ml 0 1 115 8 4 11
20 Soy Chocolate Milk 250ml 0 3 160 7 6 20

Optimize The Shopping List Univariately To Match Calories

In [4]:
# extract the information of products in a format that is easier to use in the deap algorithms cost function
cal_data = products_table[['Gram_Prot', 'Gram_Fat', 'Gram_Carb']]

prot_data = list(cal_data['Gram_Prot'])
fat_data = list(cal_data['Gram_Fat'])
carb_data = list(cal_data['Gram_Carb'])
In [5]:
# the random initialization of the genetic algorithm is done here
# it gives a list of integers with for each products the number of times it is bought
def n_per_product():
    return random.choices( range(0, 10), k = 21)
In [6]:
# this is the function used by the algorithm for evaluation
# I chose it to be the absolute difference of the number of calories in the planning and the goal of calories
def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    return abs(cals - total_calories),
In [7]:
# this is the setup of the deap library: registering the different function into the toolbox
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
In [8]:
# as an example, this is what a population of 10 shopping lists looks like
toolbox.population(n=10)
Out[8]:
[[[6, 6, 8, 3, 9, 1, 3, 9, 1, 8, 6, 9, 6, 2, 8, 0, 7, 5, 7, 7, 2]],
 [[5, 2, 8, 6, 8, 1, 0, 2, 4, 3, 7, 0, 0, 9, 2, 3, 1, 6, 9, 7, 9]],
 [[9, 8, 2, 4, 2, 5, 0, 1, 4, 6, 2, 1, 3, 0, 6, 9, 0, 0, 9, 6, 6]],
 [[0, 1, 5, 6, 9, 8, 0, 0, 4, 4, 6, 5, 3, 7, 5, 0, 0, 9, 2, 3, 1]],
 [[8, 3, 9, 7, 9, 4, 5, 8, 8, 9, 9, 6, 9, 9, 3, 9, 2, 5, 1, 3, 4]],
 [[6, 5, 6, 9, 5, 2, 4, 9, 1, 6, 4, 8, 7, 7, 4, 2, 3, 3, 9, 6, 7]],
 [[2, 1, 9, 2, 3, 8, 0, 5, 5, 0, 5, 1, 8, 0, 9, 0, 5, 1, 0, 9, 3]],
 [[9, 8, 8, 7, 0, 9, 1, 2, 3, 4, 0, 9, 7, 7, 5, 1, 4, 1, 3, 4, 6]],
 [[1, 2, 1, 9, 4, 5, 9, 8, 9, 7, 3, 8, 1, 6, 5, 8, 8, 1, 3, 5, 2]],
 [[1, 4, 0, 2, 3, 7, 1, 4, 0, 7, 3, 9, 6, 6, 8, 7, 3, 3, 8, 6, 0]]]
In [9]:
# this is the definition of the total genetic algorithm is executed, it is almost literally copied from the deap library
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    best = pop[np.argmin([toolbox.evaluate(x) for x in pop])]
    return best
In [10]:
best_solution = main()
In [11]:
products_table['univariate_choice'] = pd.Series(best_solution[0])
products_table.head()
Out[11]:
Name Min Max Calories Gram_Prot Gram_Fat Gram_Carb univariate_choice
0 Banana 1u 0 4 89 1 0 23 6
1 Mandarin 1u 0 4 40 1 0 10 1
2 Ananas 100g 0 7 50 1 0 13 8
3 Grapes 100g 0 7 76 1 0 17 5
4 Chocolate 1 bar 0 4 230 3 13 25 7

Optimize The Shopping List Multivariately To Match Calories, Protein, Fat and Carbs

In [12]:
# in this second version, we optimize for the four components of the shopping list: calories, protein, fat and carbs
# if we need to make everything as important, we should add a weight to them
# we know that there are 30% protein calories, 20% fat and 50% carbs.
weights = (-1., -1. / 0.3, -1. / 0.2, -1./0.5)
In [13]:
creator.create("FitnessMin", base.Fitness, weights=weights)
creator.create("Individual", list, fitness=creator.FitnessMin)
C:\py32bit\lib\site-packages\deap\creator.py:141: RuntimeWarning: A class named 'FitnessMin' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.
  RuntimeWarning)
C:\py32bit\lib\site-packages\deap\creator.py:141: RuntimeWarning: A class named 'Individual' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.
  RuntimeWarning)
In [14]:
def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    
    
    return abs(cals - total_calories), \
            abs(tot_prot - gram_prot), \
            abs(tot_fat - gram_fat), \
            abs(tot_carb - gram_carb), \
In [15]:
# this is the setup of the deap library: registering the different function into the toolbox
toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
In [16]:
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
                
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    
    best = pop[np.argmin([sum(toolbox.evaluate(x)) for x in pop])]
    return best
In [17]:
best_solution = main()
In [18]:
products_table['multivariate_choice'] = pd.Series(best_solution[0])

Analyzing the results

In [19]:
products_table['univariate_gr_prot'] = products_table['univariate_choice'] * products_table['Gram_Prot']
products_table['univariate_gr_fat'] = products_table['univariate_choice'] * products_table['Gram_Fat']
products_table['univariate_gr_carb'] = products_table['univariate_choice'] * products_table['Gram_Carb']
products_table['univariate_cal'] = products_table['univariate_choice'] * products_table['Calories']

products_table['multivariate_gr_prot'] = products_table['multivariate_choice'] * products_table['Gram_Prot']
products_table['multivariate_gr_fat'] = products_table['multivariate_choice'] * products_table['Gram_Fat']
products_table['multivariate_gr_carb'] = products_table['multivariate_choice'] * products_table['Gram_Carb']
products_table['multivariate_cal'] = products_table['multivariate_choice'] * products_table['Calories']

Making a summary of the univariate and multivariate results

In [20]:
summary = pd.DataFrame.from_records(
[
    [products_table['univariate_gr_prot'].sum(), products_table['multivariate_gr_prot'].sum(), gram_prot],
    [products_table['univariate_gr_fat'].sum(), products_table['multivariate_gr_fat'].sum(), gram_fat],
    [products_table['univariate_gr_carb'].sum(), products_table['multivariate_gr_carb'].sum(), gram_carb],
    [products_table['univariate_cal'].sum(), products_table['multivariate_cal'].sum(), sum((cal_prot, cal_carb, cal_fat))]
])
summary.columns = ['univariate', 'multivariate', 'goal']
summary.index = ['prot', 'fat', 'carb', 'cal']
summary["univ_error"] = (summary["goal"] - summary["univariate"]).apply(abs)
summary["multiv_error"] = (summary["goal"] - summary["multivariate"]).apply(abs)
summary
Out[20]:
univariate multivariate goal univ_error multiv_error
prot 1000 864 1312.500000 312.500000 448.500000
fat 1080 1012 388.888889 691.111111 623.111111
carb 945 1234 2187.500000 1242.500000 953.500000
cal 17782 17753 17500.000000 282.000000 253.000000
In [21]:
summary["univ_error"].sum(), summary["multiv_error"].sum()
Out[21]:
(2528.1111111111113, 2278.1111111111113)
In [22]:
# Shopping list
products_table[['Name', 'multivariate_choice', 'univariate_choice']]
Out[22]:
Name multivariate_choice univariate_choice
0 Banana 1u 0 6
1 Mandarin 1u 1 1
2 Ananas 100g 3 8
3 Grapes 100g 5 5
4 Chocolate 1 bar 3 7
5 Hard Cheese 100g 1 1
6 Soft Cheese 100g 1 7
7 Pesto 100g 9 5
8 Hoummous 100g 0 9
9 Aubergine Paste 100g 1 1
10 Protein Shake 5 8
11 Veggie Burger 1 4 9
12 Veggie Burger 2 3 9
13 Boiled Egg 6 3
14 Backed Egg 2 0
15 Baguette Bread Half 1 0
16 Square Bread 1 slice 2 0
17 Cheese Pizza 1u 8 1
18 Veggie Pizza 1u 0 0
19 Soy Milk 200ml 3 8
20 Soy Chocolate Milk 250ml 8 2