@conference {Abad2002Self-learning-c,
title = {Self learning control of constrained Markov chains - a gradient approach},
booktitle = {Decision and Control, 2002, Proceedings of the 41st IEEE Conference on},
volume = {2},
year = {2002},
month = {dec.},
pages = {1940 - 1945 vol.2},
abstract = {We present stochastic approximation algorithms for computing the locally optimal policy of a constrained average cost finite state Markov decision process. The stochastic approximation algorithms require computation of the gradient of the cost function with respect to the parameter that characterizes the randomized policy. This is computed by simulation based gradient estimation schemes involving weak derivatives. Similar to neuro-dynamic programming algorithms (e.g. Q-learning or temporal difference methods), the algorithms proposed in the paper are simulation based and do not require explicit knowledge of the underlying parameters such as transition probabilities. However, unlike neuro-dynamic programming methods, the algorithms proposed can handle constraints and time varying parameters. The multiplier based constrained stochastic gradient algorithm proposed is also of independent interest in stochastic approximation.},
keywords = {approximation theory, constrained average cost finite state Markov decision process, constrained Markov chains, decision theory, gradient approach, gradient estimation schemes, gradient methods, learning systems, locally optimal policy, Markov processes, self learning control, self-adjusting systems, stochastic approximation algorithms, time varying parameters, weak derivatives},
issn = {0191-2216},
doi = {10.1109/CDC.2002.1184811},
url = {http://dx.doi.org/10.1109/CDC.2002.1184811},
author = {Abad, F.V. and Krishnamurthy, V. and Martin, K. and Baltcheva, I.}
}