Optimizing Your Diet With Genetic Algorithms in Python¶

import pandas as pd
import numpy as np
import random
from deap import base
from deap import creator
from deap import tools

Setting the goal percentages¶

#goal percentages
total_calories = 2500 * 7
percentage_prot = 0.3
percentage_carb = 0.5
percentage_fat = 0.2

# compute total calories per macro
cal_prot = round(percentage_prot * total_calories)
cal_carb = round(percentage_carb * total_calories)
cal_fat = round(percentage_fat * total_calories)
print(cal_prot, cal_carb, cal_fat)

# fixed info on macro nutriments: calories per gram of protein, carb and fat
prot_cal_p_gram = 4
carb_cal_p_gram = 4
fat_cal_p_gram = 9

#goal grams
gram_prot = cal_prot / prot_cal_p_gram
gram_carb = cal_carb / carb_cal_p_gram
gram_fat = cal_fat / fat_cal_p_gram
print(gram_prot, gram_carb, gram_fat)

5250 8750 3500
1312.5 2187.5 388.8888888888889

Setting up the products table¶

# per week: min, max, cal unit, prot g,  fat g, carb g
products_table = pd.DataFrame.from_records([
    ['Banana 1u', 0, 4, 89, 1, 0, 23],
    ['Mandarin 1u', 0, 4, 40, 1, 0, 10],
    ['Ananas 100g', 0, 7, 50, 1, 0, 13],
    ['Grapes 100g', 0, 7, 76, 1, 0, 17],
    ['Chocolate 1 bar', 0, 4, 230, 3, 13, 25],
    
    ['Hard Cheese 100g', 0, 8, 350, 28, 26, 2],
    ['Soft Cheese 100g', 0, 8, 374, 18, 33, 1],
    ['Pesto 100g', 0, 8, 303, 3, 30, 4],
    ['Hoummous 100g', 0, 8, 306, 7, 25, 11],
    ['Aubergine Paste 100g', 0, 4, 228, 1, 20, 8],
    
    ['Protein Shake', 0, 5, 160, 30, 3, 5],
    ['Veggie Burger 1', 0, 5, 220, 21, 12, 3],
    ['Veggie Burger 2', 0, 12, 165, 16, 9, 2],
    ['Boiled Egg', 0, 8, 155, 13, 11, 1],
    ['Backed Egg', 0, 16, 196, 14, 15, 1],
    
    ['Baguette Bread Half', 0, 3, 274, 10, 0, 52],
    ['Square Bread 1 slice', 0, 3, 97, 3, 1, 17],
    ['Cheese Pizza 1u', 0, 3, 903, 36, 47, 81],
    ['Veggie Pizza 1u', 0, 3, 766, 26, 35, 85],
    
    ['Soy Milk 200ml', 0, 1, 115, 8, 4, 11],
    ['Soy Chocolate Milk 250ml', 0, 3, 160, 7, 6,20],
    
])
products_table.columns = ['Name', 'Min', 'Max', 'Calories', 'Gram_Prot', 'Gram_Fat', 'Gram_Carb']

products_table

Optimize The Shopping List Univariately To Match Calories¶

# extract the information of products in a format that is easier to use in the deap algorithms cost function
cal_data = products_table[['Gram_Prot', 'Gram_Fat', 'Gram_Carb']]

prot_data = list(cal_data['Gram_Prot'])
fat_data = list(cal_data['Gram_Fat'])
carb_data = list(cal_data['Gram_Carb'])

# the random initialization of the genetic algorithm is done here
# it gives a list of integers with for each products the number of times it is bought
def n_per_product():
    return random.choices( range(0, 10), k = 21)

# this is the function used by the algorithm for evaluation
# I chose it to be the absolute difference of the number of calories in the planning and the goal of calories
def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    return abs(cals - total_calories),

# this is the setup of the deap library: registering the different function into the toolbox
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

# as an example, this is what a population of 10 shopping lists looks like
toolbox.population(n=10)

[[[6, 6, 8, 3, 9, 1, 3, 9, 1, 8, 6, 9, 6, 2, 8, 0, 7, 5, 7, 7, 2]],
 [[5, 2, 8, 6, 8, 1, 0, 2, 4, 3, 7, 0, 0, 9, 2, 3, 1, 6, 9, 7, 9]],
 [[9, 8, 2, 4, 2, 5, 0, 1, 4, 6, 2, 1, 3, 0, 6, 9, 0, 0, 9, 6, 6]],
 [[0, 1, 5, 6, 9, 8, 0, 0, 4, 4, 6, 5, 3, 7, 5, 0, 0, 9, 2, 3, 1]],
 [[8, 3, 9, 7, 9, 4, 5, 8, 8, 9, 9, 6, 9, 9, 3, 9, 2, 5, 1, 3, 4]],
 [[6, 5, 6, 9, 5, 2, 4, 9, 1, 6, 4, 8, 7, 7, 4, 2, 3, 3, 9, 6, 7]],
 [[2, 1, 9, 2, 3, 8, 0, 5, 5, 0, 5, 1, 8, 0, 9, 0, 5, 1, 0, 9, 3]],
 [[9, 8, 8, 7, 0, 9, 1, 2, 3, 4, 0, 9, 7, 7, 5, 1, 4, 1, 3, 4, 6]],
 [[1, 2, 1, 9, 4, 5, 9, 8, 9, 7, 3, 8, 1, 6, 5, 8, 8, 1, 3, 5, 2]],
 [[1, 4, 0, 2, 3, 7, 1, 4, 0, 7, 3, 9, 6, 6, 8, 7, 3, 3, 8, 6, 0]]]

# this is the definition of the total genetic algorithm is executed, it is almost literally copied from the deap library
def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    best = pop[np.argmin([toolbox.evaluate(x) for x in pop])]
    return best

best_solution = main()

products_table['univariate_choice'] = pd.Series(best_solution[0])
products_table.head()

Optimize The Shopping List Multivariately To Match Calories, Protein, Fat and Carbs¶

# in this second version, we optimize for the four components of the shopping list: calories, protein, fat and carbs
# if we need to make everything as important, we should add a weight to them
# we know that there are 30% protein calories, 20% fat and 50% carbs.
weights = (-1., -1. / 0.3, -1. / 0.2, -1./0.5)

creator.create("FitnessMin", base.Fitness, weights=weights)
creator.create("Individual", list, fitness=creator.FitnessMin)

C:\py32bit\lib\site-packages\deap\creator.py:141: RuntimeWarning: A class named 'FitnessMin' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.
  RuntimeWarning)
C:\py32bit\lib\site-packages\deap\creator.py:141: RuntimeWarning: A class named 'Individual' has already been created and it will be overwritten. Consider deleting previous creation of that class or rename it.
  RuntimeWarning)

def evaluate(individual):
    individual = individual[0]
    tot_prot = sum(x*y for x,y in zip(prot_data,individual))
    tot_fat = sum(x*y for x,y in zip(fat_data,individual))
    tot_carb = sum(x*y for x,y in zip(carb_data,individual))
    cals = prot_cal_p_gram * tot_prot + carb_cal_p_gram * tot_carb + fat_cal_p_gram * tot_fat
    
    
    return abs(cals - total_calories), \
            abs(tot_prot - gram_prot), \
            abs(tot_fat - gram_fat), \
            abs(tot_carb - gram_carb), \

# this is the setup of the deap library: registering the different function into the toolbox
toolbox = base.Toolbox()

toolbox.register("n_per_product", n_per_product)

toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.n_per_product, n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

def main():
    pop = toolbox.population(n=300)
    
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # CXPB  is the probability with which two individuals
    #       are crossed
    #
    # MUTPB is the probability for mutating an individual
    CXPB, MUTPB = 0.5, 0.2
    
    # Extracting all the fitnesses of 
    fits = [ind.fitness.values[0] for ind in pop]
    
    # Variable keeping track of the number of generations
    g = 0
    
    # Begin the evolution
    while g < 5000:
        # A new generation
        g = g + 1
        #print("-- Generation %i --" % g)
        
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CXPB:
                toolbox.mate(child1[0], child2[0])
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant[0])
                del mutant.fitness.values
            
                
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        #print(min(fits), max(fits), mean, std)
    
    
    best = pop[np.argmin([sum(toolbox.evaluate(x)) for x in pop])]
    return best

best_solution = main()

products_table['multivariate_choice'] = pd.Series(best_solution[0])

Analyzing the results¶

products_table['univariate_gr_prot'] = products_table['univariate_choice'] * products_table['Gram_Prot']
products_table['univariate_gr_fat'] = products_table['univariate_choice'] * products_table['Gram_Fat']
products_table['univariate_gr_carb'] = products_table['univariate_choice'] * products_table['Gram_Carb']
products_table['univariate_cal'] = products_table['univariate_choice'] * products_table['Calories']

products_table['multivariate_gr_prot'] = products_table['multivariate_choice'] * products_table['Gram_Prot']
products_table['multivariate_gr_fat'] = products_table['multivariate_choice'] * products_table['Gram_Fat']
products_table['multivariate_gr_carb'] = products_table['multivariate_choice'] * products_table['Gram_Carb']
products_table['multivariate_cal'] = products_table['multivariate_choice'] * products_table['Calories']

Making a summary of the univariate and multivariate results¶

summary = pd.DataFrame.from_records(
[
    [products_table['univariate_gr_prot'].sum(), products_table['multivariate_gr_prot'].sum(), gram_prot],
    [products_table['univariate_gr_fat'].sum(), products_table['multivariate_gr_fat'].sum(), gram_fat],
    [products_table['univariate_gr_carb'].sum(), products_table['multivariate_gr_carb'].sum(), gram_carb],
    [products_table['univariate_cal'].sum(), products_table['multivariate_cal'].sum(), sum((cal_prot, cal_carb, cal_fat))]
])
summary.columns = ['univariate', 'multivariate', 'goal']
summary.index = ['prot', 'fat', 'carb', 'cal']
summary["univ_error"] = (summary["goal"] - summary["univariate"]).apply(abs)
summary["multiv_error"] = (summary["goal"] - summary["multivariate"]).apply(abs)
summary

summary["univ_error"].sum(), summary["multiv_error"].sum()

(2528.1111111111113, 2278.1111111111113)

# Shopping list
products_table[['Name', 'multivariate_choice', 'univariate_choice']]

	univariate	multivariate	goal	univ_error	multiv_error
prot	1000	864	1312.500000	312.500000	448.500000
fat	1080	1012	388.888889	691.111111	623.111111
carb	945	1234	2187.500000	1242.500000	953.500000
cal	17782	17753	17500.000000	282.000000	253.000000

	Name	Max	Calories	Gram_Prot	Gram_Fat	Gram_Carb
0	Banana 1u	4	89	1	0	23
1	Mandarin 1u	4	40	1	0	10
2	Ananas 100g	7	50	1	0	13
3	Grapes 100g	7	76	1	0	17
4	Chocolate 1 bar	4	230	3	13	25
5	Hard Cheese 100g	8	350	28	26	2
6	Soft Cheese 100g	8	374	18	33	1
7	Pesto 100g	8	303	3	30	4
8	Hoummous 100g	8	306	7	25	11
9	Aubergine Paste 100g	4	228	1	20	8
10	Protein Shake	5	160	30	3	5
11	Veggie Burger 1	5	220	21	12	3
12	Veggie Burger 2	12	165	16	9	2
13	Boiled Egg	8	155	13	11	1
14	Backed Egg	16	196	14	15	1
15	Baguette Bread Half	3	274	10	0	52
16	Square Bread 1 slice	3	97	3	1	17
17	Cheese Pizza 1u	3	903	36	47	81
18	Veggie Pizza 1u	3	766	26	35	85
19	Soy Milk 200ml	1	115	8	4	11
20	Soy Chocolate Milk 250ml	3	160	7	6	20