SCIP Doxygen Documentation
 
Loading...
Searching...
No Matches
bandit_epsgreedy.c
Go to the documentation of this file.
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2/* */
3/* This file is part of the program and library */
4/* SCIP --- Solving Constraint Integer Programs */
5/* */
6/* Copyright (c) 2002-2023 Zuse Institute Berlin (ZIB) */
7/* */
8/* Licensed under the Apache License, Version 2.0 (the "License"); */
9/* you may not use this file except in compliance with the License. */
10/* You may obtain a copy of the License at */
11/* */
12/* http://www.apache.org/licenses/LICENSE-2.0 */
13/* */
14/* Unless required by applicable law or agreed to in writing, software */
15/* distributed under the License is distributed on an "AS IS" BASIS, */
16/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17/* See the License for the specific language governing permissions and */
18/* limitations under the License. */
19/* */
20/* You should have received a copy of the Apache-2.0 license */
21/* along with SCIP; see the file LICENSE. If not visit scipopt.org. */
22/* */
23/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24
25/**@file bandit_epsgreedy.c
26 * @ingroup OTHER_CFILES
27 * @brief implementation of epsilon greedy bandit algorithm
28 * @author Gregor Hendel
29 */
30
31/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
32
33#include "scip/bandit.h"
35#include "scip/pub_bandit.h"
36#include "scip/pub_message.h"
37#include "scip/pub_misc.h"
38#include "scip/scip_bandit.h"
39#include "scip/scip_mem.h"
41
42#define BANDIT_NAME "eps-greedy"
43#define EPSGREEDY_SMALL 1e-6
44
45/*
46 * Data structures
47 */
48
49/** private data structure of epsilon greedy bandit algorithm */
50struct SCIP_BanditData
51{
52 SCIP_Real* weights; /**< weights for every action */
53 SCIP_Real* priorities; /**< saved priorities for tie breaking */
54 int* sels; /**< individual number of selections per action */
55 SCIP_Real eps; /**< epsilon parameter (between 0 and 1) to control epsilon greedy */
56 SCIP_Real decayfactor; /**< the factor to reduce the weight of older observations if exponential decay is enabled */
57 int avglim; /**< nonnegative limit on observation number before the exponential decay starts,
58 * only relevant if exponential decay is enabled
59 */
60 int nselections; /**< counter for the number of selection calls */
61 SCIP_Bool preferrecent; /**< should the weights be updated in an exponentially decaying way? */
62};
63
64/*
65 * Callback methods of bandit algorithm virtual function table
66 */
67
68/** callback to free bandit specific data structures */
70{ /*lint --e{715}*/
72 int nactions;
73
74 assert(bandit != NULL);
75
78 assert(banditdata->weights != NULL);
79 nactions = SCIPbanditGetNActions(bandit);
80
81 BMSfreeBlockMemoryArray(blkmem, &banditdata->weights, nactions);
82 BMSfreeBlockMemoryArray(blkmem, &banditdata->priorities, nactions);
83 BMSfreeBlockMemoryArray(blkmem, &banditdata->sels, nactions);
85
86 SCIPbanditSetData(bandit, NULL);
87
88 return SCIP_OKAY;
89}
90
91/** selection callback for bandit algorithm */
93{ /*lint --e{715}*/
95 SCIP_Real randnr;
96 SCIP_Real curreps;
97 SCIP_RANDNUMGEN* rng;
98 int nactions;
99 assert(bandit != NULL);
101
104 rng = SCIPbanditGetRandnumgen(bandit);
105 assert(rng != NULL);
106
107 nactions = SCIPbanditGetNActions(bandit);
108
109 /* roll the dice to check if the best element should be picked, or an element at random */
110 randnr = SCIPrandomGetReal(rng, 0.0, 1.0);
111
112 /* make epsilon decrease with an increasing number of selections */
113 banditdata->nselections++;
114 curreps = banditdata->eps * sqrt((SCIP_Real)nactions/(SCIP_Real)banditdata->nselections);
115
116 /* select the best action seen so far */
117 if( randnr >= curreps )
118 {
119 SCIP_Real* weights = banditdata->weights;
120 SCIP_Real* priorities = banditdata->priorities;
121 int j;
122 SCIP_Real maxweight;
123
124 assert(weights != NULL);
125 assert(priorities != NULL);
126
127 /* pick the element with the largest reward */
128 maxweight = weights[0];
129 *selection = 0;
130
131 /* determine reward for every element */
132 for( j = 1; j < nactions; ++j )
133 {
134 SCIP_Real weight = weights[j];
135
136 /* select the action that maximizes the reward, breaking ties by action priorities */
137 if( maxweight < weight
138 || (weight >= maxweight - EPSGREEDY_SMALL && priorities[j] > priorities[*selection] ) )
139 {
140 *selection = j;
141 maxweight = weight;
142 }
143 }
144 }
145 else
146 {
147 /* play one of the actions at random */
148 *selection = SCIPrandomGetInt(rng, 0, nactions - 1);
149 }
150
151 return SCIP_OKAY;
152}
153
154/** update callback for bandit algorithm */
156{ /*lint --e{715}*/
158
159 assert(bandit != NULL);
160
163
164 /* increase the selection count */
165 ++banditdata->sels[selection];
166
167 /* the very first observation is directly stored as weight for both average or exponential decay */
168 if( banditdata->sels[selection] == 1 )
169 banditdata->weights[selection] = score;
170 else
171 {
172 /* use exponentially decreasing weights for older observations */
173 if( banditdata->preferrecent && banditdata->sels[selection] > banditdata->avglim )
174 {
175 /* decrease old weights by decay factor */
176 banditdata->weights[selection] *= banditdata->decayfactor;
177 banditdata->weights[selection] += (1.0 - banditdata->decayfactor) * score;
178 }
179 else
180 {
181 /* update average score */
182 SCIP_Real diff = score - banditdata->weights[selection];
183 banditdata->weights[selection] += diff / (SCIP_Real)(banditdata->sels[selection]);
184 }
185 }
186
187 return SCIP_OKAY;
188}
189
190/** reset callback for bandit algorithm */
192{ /*lint --e{715}*/
194 SCIP_Real* weights;
195 int w;
196 int nactions;
197 SCIP_RANDNUMGEN* rng;
198
199 assert(bandit != NULL);
200
203
204 weights = banditdata->weights;
205 nactions = SCIPbanditGetNActions(bandit);
206 assert(weights != NULL);
207 assert(banditdata->priorities != NULL);
208 assert(nactions > 0);
209
210 rng = SCIPbanditGetRandnumgen(bandit);
211 assert(rng != NULL);
212
213 /* alter priorities slightly to make them unique */
214 if( priorities != NULL )
215 {
216 for( w = 1; w < nactions; ++w )
217 {
218 assert(priorities[w] >= 0);
219 banditdata->priorities[w] = priorities[w] + SCIPrandomGetReal(rng, -EPSGREEDY_SMALL, EPSGREEDY_SMALL);
220 }
221 }
222 else
223 {
224 /* use random priorities */
225 for( w = 0; w < nactions; ++w )
226 banditdata->priorities[w] = SCIPrandomGetReal(rng, 0.0, 1.0);
227 }
228
229 /* reset weights and selection counters to 0 */
230 BMSclearMemoryArray(weights, nactions);
231 BMSclearMemoryArray(banditdata->sels, nactions);
232
233 banditdata->nselections = 0;
234
235 return SCIP_OKAY;
236}
237
238/*
239 * interface methods of the Epsilon Greedy bandit algorithm
240 */
241
242/** internal method to create and reset epsilon greedy bandit algorithm */
244 BMS_BLKMEM* blkmem, /**< block memory */
245 BMS_BUFMEM* bufmem, /**< buffer memory */
246 SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
247 SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
248 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
249 SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
250 SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
251 SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
252 int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
253 * only relevant if exponential decay is enabled */
254 int nactions, /**< the positive number of possible actions */
255 unsigned int initseed /**< initial random seed */
256 )
257{
259
262 assert(eps >= 0.0);
263
264 SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->weights, nactions) );
265 SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->priorities, nactions) );
266 SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->sels, nactions) );
267 banditdata->eps = eps;
268 banditdata->nselections = 0;
269 banditdata->preferrecent = preferrecent;
270 banditdata->decayfactor = decayfactor;
271 banditdata->avglim = avglim;
272
273 SCIP_CALL( SCIPbanditCreate(epsgreedy, vtable, blkmem, bufmem, priorities, nactions, initseed, banditdata) );
274
275 return SCIP_OKAY;
276}
277
278/** create and resets an epsilon greedy bandit algorithm */
280 SCIP* scip, /**< SCIP data structure */
281 SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
282 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
283 SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
284 SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
285 SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
286 int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
287 * only relevant if exponential decay is enabled */
288 int nactions, /**< the positive number of possible actions */
289 unsigned int initseed /**< initial seed for random number generation */
290 )
291{
292 SCIP_BANDITVTABLE* vtable;
293 assert(scip != NULL);
295
297 if( vtable == NULL )
298 {
299 SCIPerrorMessage("Could not find virtual function table for %s bandit algorithm\n", BANDIT_NAME);
300 return SCIP_INVALIDDATA;
301 }
302
304 priorities, eps, preferrecent, decayfactor, avglim, nactions, SCIPinitializeRandomSeed(scip, initseed)) );
305
306 return SCIP_OKAY;
307}
308
309/** get weights array of epsilon greedy bandit algorithm */
311 SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
312 )
313{
318
319 return banditdata->weights;
320}
321
322/** set epsilon parameter of epsilon greedy bandit algorithm */
324 SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
325 SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
326 )
327{
330 assert(eps >= 0);
331
333
334 banditdata->eps = eps;
335}
336
337
338/** creates the epsilon greedy bandit algorithm includes it in SCIP */
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
Definition bandit.c:200
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
Definition bandit.c:42
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
Definition bandit.c:190
internal methods for bandit algorithms
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define EPSGREEDY_SMALL
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
#define BANDIT_NAME
internal methods for epsilon greedy bandit selection
SCIP_VAR * w
#define SCIP_ALLOC(x)
Definition def.h:399
#define SCIP_Real
Definition def.h:186
#define SCIP_CALL(x)
Definition def.h:388
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
Definition bandit.c:303
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
Definition bandit.c:293
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
Definition scip_bandit.c:80
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)),)
Definition scip_bandit.c:48
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
Definition scip_mem.c:72
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
Definition misc.c:10041
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
Definition misc.c:10019
unsigned int SCIPinitializeRandomSeed(SCIP *scip, unsigned int initialseedvalue)
return SCIP_OKAY
int selection
assert(minobj< SCIPgetCutoffbound(scip))
#define NULL
Definition lpi_spx1.cpp:161
#define BMSfreeBlockMemory(mem, ptr)
Definition memory.h:467
#define BMSallocBlockMemory(mem, ptr)
Definition memory.h:453
#define BMSallocBlockMemoryArray(mem, ptr, num)
Definition memory.h:456
#define BMSfreeBlockMemoryArray(mem, ptr, num)
Definition memory.h:469
#define BMSclearMemoryArray(ptr, num)
Definition memory.h:132
struct BMS_BlkMem BMS_BLKMEM
Definition memory.h:439
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
Definition scip_mem.c:57
real eps
public methods for bandit algorithms
public methods for message output
#define SCIPerrorMessage
Definition pub_message.h:64
public data structures and miscellaneous methods
public methods for bandit algorithms
public methods for memory management
public methods for random numbers
#define SCIP_DECL_BANDITUPDATE(x)
Definition type_bandit.h:75
#define SCIP_DECL_BANDITFREE(x)
Definition type_bandit.h:63
struct SCIP_BanditData SCIP_BANDITDATA
Definition type_bandit.h:56
#define SCIP_DECL_BANDITSELECT(x)
Definition type_bandit.h:69
#define SCIP_DECL_BANDITRESET(x)
Definition type_bandit.h:82
@ SCIP_INVALIDDATA
enum SCIP_Retcode SCIP_RETCODE