Commit 29fc67cc authored by Simon de Givry's avatar Simon de Givry
Browse files

migration from cvs mulcyber to git forgemia loosing history

parents
This diff is collapsed.
This diff is collapsed.
\documentclass[french]{beamer}
\usepackage[latin1]{inputenc}
\usepackage[french]{babel}
\usepackage{graphicx}
\usepackage{eurosym}
\usepackage[T1]{fontenc}
%\usetheme{Frankfurt}
\useoutertheme[subsection=false]{mysmoothbars}
\useinnertheme[shadow=true]{rounded}
\usecolortheme{orchid}
\usecolortheme{whale}
\setbeamerfont{block title}{size={}}
%\usefonttheme[onlylarge]{structurebold}
%\setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries}
\setbeamertemplate{navigation symbols}{}
\title[Modles probabilistes et Algorithmes pour la Biologie]{Modles probabilistes}
\author{S. de Givry, T. Faraut, T. Schiex}
\date{\relax}
\institute{\includegraphics[width=3cm]{Fig/INRA.pdf}\\(Merci C.M. Bishop pour le PRML - Springer)}
\newcommand{\esp}{\mathbb{E}}
\def\ci{\perp\!\!\!\perp}
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\titlepage}
%\section{Modles probabilistes (discrets)}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{Rappels de probabilits}
\vskip -1ex
\begin{columns}[t]
\column{.4\textwidth}
\begin{example}[Les urnes]
\centerline{\includegraphics[width=.9\textwidth]{Fig/Figure1-9}}
\end{example}
\column{.6\textwidth}
\begin{block}{Processus}
\begin{itemize}
\item On choisit une des deux botes (rouge 40\%, bleue 60\%)
\item On choisit un objet sans distinction (Abricot, Courgette)
\end{itemize}
\end{block}
\end{columns}
\begin{itemize}
\item Une variable (alatoire) $B$ reprsente la bote choisie.
\item Une variable (alatoire) $F$ reprsente le fruit obtenu.
\end{itemize}
\begin{center}
$p(B=r) = \frac{4}{10} \quad\quad p(B=b) = \frac{6}{10}$.
\end{center}
\begin{block}{Proprits}
Les probabilits sont $\leq 1$. Si les cas sont mutuellement exclusifs et exhaustifs, la somme des probabilits vaut $1$.
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{Notations}
On ne considre que des variables discrtes.
\begin{block}{Notation}
\begin{itemize}
\item Variables alatoires~: $X,Y,Z,\ldots$
\item Vecteurs, squences de variables~: $\mathbf{X},\mathbf{Y},\mathbf{Z},\ldots$
\item Valeurs~: $\mathtt{a}, \mathtt{b}, \mathtt{c}, \mathtt{g}, \texttt{r}, \mathtt{t},\ldots$
\item Vecteur, squence de valeurs~: $\mathtt{acgtgcatggagccacgtcaggta}$
\item Valeur possibles~: $u, v, w, x, y, z\ldots$
\item Vecteurs, squences de valeurs possibles~: $\mathbf{u},\mathbf{v},\mathbf{w},\mathbf{x} , \mathbf{y} , \mathbf{z}\ldots$
\end{itemize}
\end{block}
\only<2->{
\begin{example}[\alt<3>{ou plus simplement\ldots}{Un peu lourd parfois}]
\[p(\uncover<2>{X =} \mathtt{a})\quad \sum_{\alt<2>{x}{X}} p(X\uncover<2>{ = x}) \quad p(\uncover<2>{\mathbf{X}=} \mathtt{tgataatag}) \quad \sum_{\alt<2>{\mathbf{x}}{\mathbf{X}}} p(\mathbf{X}\uncover<2>{=\mathbf{x}})\]
\end{example}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{Rappels de probabilits}
\vskip -3ex
\begin{columns}[t]
\column{.4\textwidth}
\begin{example}[Les urnes]
\centerline{\includegraphics[width=.9\textwidth]{Fig/Figure1-9}}
\end{example}
\column{.6\textwidth}
\begin{block}{Exprience}
On ralise 1000 tirages.
\begin{itemize}
\item $n_{ij} =$ \# de tirages o $B=i, F = j$.
\item $c_i= $ \# de tirages o $B=i$.
\item $r_j= $ \# de tirages o $F=j$.
\end{itemize}
\end{block}
\end{columns}
\only<2->{\centerline{\begin{beamerboxesrounded}{}
{\begin{displaymath}
% use packages: array
\begin{array}{l|l|l||l}
& B = \texttt{r} & B = \texttt{b} & \Sigma \\\hline
F = \texttt{a} & 449 & 102 & 551 \\\hline
F = \texttt{c} & 152 & 297 & 449 \\\hline\hline
\Sigma & 601 & 399 & 1000
\end{array}
\end{displaymath}}\end{beamerboxesrounded}}}
\only<3| trans:1>{
\begin{block}{Probabilit jointe}
$p(B=b ,F=f) \approx \frac{n_{bf}}{N}$ \hfill $p(B = \mathtt{r}, F = \mathtt{c})\approx 0.15$.
\end{block}}
\only<4| trans:2>{
\begin{block}{Probabilits marginales}
$p(B=b) \approx \frac{c_{b}}{N}, b\in\{\texttt{r},\texttt{b}\}$ \hfill $p(B = \mathtt{r})\approx 0.601$.\\
$p(F=f) \approx \frac{l_{f}}{N}, f\in\{\texttt{a},\texttt{c}\}$ \hfill $p(F = \texttt{c})\approx 0.449$.
\end{block}}
\only<5| trans:3>{
\begin{block}{Probabilits conditionnelles}
$p(F=f|B=b) \approx \frac{n_{bf}}{c_b}$ \hfill $p(F = \texttt{c} |B = \texttt{}r)\approx 0.25$.\\
$p(B=b|F=f) \approx \frac{n_{bf}}{l_f}$ \hfill $p(B = \texttt{r} |F = \texttt{c})\approx 0.33$.
\end{block}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Un cas plus gnral}
2 variables $X,Y$; domaines $x_i, i=1,\ldots L$ ($y_j, j = 1,\ldots M$)
\centerline{\includegraphics[scale=0.9]{Fig/Figure1-10}}
\begin{block}{A l'infini}
\begin{itemize}
\item<2-> $p(X = x_i, Y = y_j) = \frac{n_{ij}}{N}$
\item<3-> $p(X = x_i) = \frac{c_i}{N}$ \hfill $= \sum_{j = 1}^M p(X = x_i, Y = y_j)$
\item<4-> $p(Y = y_j| X = x_i) = \frac{n_{ij}}{c_i}$
\item<5-> $p(X = x_i| Y = y_j) = \frac{n_{ij}}{r_j}$
\item<6-> $p(X = x_i, Y = y_j) = \frac{n_{ij}}{c_i}.\frac{c_i}{N} \hfill = p(Y = y_j|X = x_i).p(X=x_i)$
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Les deux grandes rgles}
\begin{block}{Rgle de la somme - limination de variable - marginale}
\[p(X) = \sum_Y p(X,Y)\]
\end{block}
\uncover<2->{\begin{block}{Rgle du produit}
\[p(X,Y) = p(Y|X).p(X) = p(X|Y).p(Y)\]
\end{block}}
\uncover<3->{NB: Si $p(Y|X) = p(Y)$, $Y$ indpendant de $X$: $Y \ci X$}
\vskip 3ex
Ces rgles s'appliquent un nombre quelconque de variables.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Rgle de Bayes}
Application directe des rgles prcdentes.
\begin{block}{Rgle de Bayes}
\[ p(Y|X) = \frac{p(X|Y).p(Y)}{p(X)} = \frac{p(X|Y).p(Y)}{\sum_Y p(X|Y).p(Y)}\]
\end{block}
\[
\underbrace{p(Y|X)}_\mathrm{posterior} \propto \underbrace{p(X|Y)}_\mathrm{vraisemblance} \times \underbrace{p(Y)}_\mathrm{prior}
\]
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{Retour sur les botes}
\begin{columns}[t]
\column{.35\textwidth}
\begin{example}[Les urnes]
\centerline{\includegraphics[width=.9\textwidth]{Fig/Figure1-9}}
\end{example}
\column{.65\textwidth}
\begin{block}{Soit...}
\begin{itemize}\small
\item $p(B = \texttt{r}) = \frac{4}{10} \hfill p(B=\texttt{b}) = \frac{6}{10}$
\item $p(F=\texttt{c}|B=\texttt{r}) = \frac{1}{4} \hfill p(F=\texttt{a}|B=\texttt{r}) = \frac{3}{4}$
\item $p(F=\texttt{c}|B=\texttt{b}) = \frac{3}{4} \hfill p(F=\texttt{a}|B=\texttt{b}) = \frac{1}{4}$
\end{itemize}
\end{block}
\end{columns}
\only<2>{
Quelle est la probabilit de tirer une courgette ?
\begin{block}{$p(F=\texttt{c}) = \frac{11}{20}$}\small
$ p(F=\texttt{c}|B=\texttt{r}).p(B=\texttt{r})+p(F=\texttt{c}|B=\texttt{b}).p(B=\texttt{b}) = \frac{1}{4}.\frac{4}{10} + \frac{3}{4}.\frac{6}{10}$
\end{block}}
\only<3->{
On tire un abricot. Probabilit qu'il vienne de la bote rouge ?
\begin{block}{$p(B=\texttt{r}|F=\texttt{a}) = \frac{2}{3}$}
$\frac{p(F=\texttt{a}|B=\texttt{r}).p(B=\texttt{r})}{p(F=\texttt{a})} = \frac{3}{4}.\frac{4}{10}.\frac{20}{9}$
\end{block}}
\only<4>{
\begin{itemize}
\item avant de connatre $F$ :\hfill $p(B=\texttt{r})= \frac{4}{10}$ (prior)
\item aprs observation de $F$ : \hfill $p(B=\texttt{r}|F=\texttt{a})=\frac{2}{3}$ (posterior)
\end{itemize}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Moyenne, esprance}
\begin{block}{Moyenne - esprance}
La valeur moyenne d'une fonction $f(x)$ sous une distribution $p(X=x)$ est appel esprance de $f$. \[\esp_X[f] = \sum_X p(X)f(X)\]
\end{block}
L'esprance est linaire.
$\esp_X[f(X,Y)] = \sum_X p(X)f(X,y)$ est une fonction de $y$.
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{Variance, Covariance}
\begin{block}{Variance}
La variance de $f(x)$ est dfinie par:
\[\mathrm{var}_X[f] = \esp_X[(f(X) - \esp_X[f(X)])^2] \]
Elle quantifie quel point $f(x)$ varie autour de sa moyenne $\esp_X[f(X)]$ (moyenne des carrs des carts la moyenne).
\end{block}
$\mathrm{var}_X[f] = \esp_X[(f(X) - \esp_X[f(X)])^2] = \esp_X[f(X)^2 - 2\esp_X[f(X)]f(X) + \esp_X[f]^2]
= \esp_X[f(X)^2] - \esp[f(X)]^2 $
\begin{block}{Covariance}
$\mathrm{cov}[X,Y] = \esp_{X,Y}[(X-\esp_X[X])(Y-\esp[Y])] = \esp_{X,Y}[XY] - \esp[X]\esp[Y]$
Quantifie quel point $X$ et $Y$ varient ensemble.
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{Modles: pour manipuler l'incertitude}
\begin{block}{Modle probabiliste}
Un modle probabiliste est un objet mathmatique qui dfinit une distribution de probabilit sur un ensemble d'objets (eg. squences et leur structure intron-exon). Il dpend en gnral d'un ensemble de paramtres.
\end{block}
\begin{enumerate}
\item<2-> parce que les objets sont ``alatoires'' (mutations, rarrangements, crossing-overs...). On peut observer diffrentes ralisations (rptitions) de la (ou des) variables alatoires observe(s).
\item<3-> parce que notre connaissance des objets d'intrt est imparfaite. Un modle probabiliste permet de capturer ce que l'on pense savoir, ou certaines caractristiques qui semblent pertinentes pour un problme donn.
\end{enumerate}
\only<4>{Un ``bon'' modle dpend aussi du but de la modlisation.}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Un exemple simple: Bernoulli}
Soit une bote contenant courgettes et abricots. Le tirage avec remise dfinit une variable alatoire $X\in\{\texttt{0},\texttt{1}\}$ ($\texttt{1}\equiv$ courgette). Soit $\mu$ la fraction de courgettes dans la bote.
\begin{block}{Un modle simple}
\[p(X=1|\mu) = \mu\]
\[p(X=x|\mu) = \mu^x(1-\mu)^{1-x}\]
\end{block}
On observe une squence de $N$ tirages indpendants et identiquement distribus $\mathbf{D} = (x_1,\ldots,x_n)$. On utilise le modle prcdent.
\begin{example}[$\mathbf{D} = \texttt{01101}$]
\[p(\texttt{01101}|\mu) = (1-\mu).\mu.\mu.(1-\mu).\mu = \mu^{3}.(1-\mu)^{2}\]
\end{example}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Vraisemblance}
\begin{block}{Vraisemblance}
La vraisemblance de $\mathbf{D}$ est dfinie comme la probabilit de $\mathbf{D}$ sous le modle utilis. C'est une fonction des paramtres ($\mu$).
\end{block}
\begin{example}
\[p(\mathbf{D}|\mu) = \prod_{n=1}^N p(x_n|\mu) = \prod_{n=1}^N \mu^{x_n}.(1-\mu)^{1-x_n} = \mu^{\#\texttt{1}}(1-\mu)^{N-\#\texttt{1}}\]
\end{example}
o $\#\texttt{1}$ dnote le nombre de 1 dans $\mathbf{D}$ (suffisant).
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Modles probabilistes des squences}
\begin{block}{Bioinformatique}
\begin{itemize}[<+->]
\item beaucoup de variables (eg. squences de longueur $N$).
\item une partie est observe (donnes $\mathbf{D}$)
\item une partie n'est pas observe (ce que l'on souhaite savoir $\mathbf{M}$)
\item dpend de paramtres ($\mu$)
\item souvent, $p(\mathbf{D},\mathbf{M}|\mu)$ est ``simple'' dcrire.
\end{itemize}
\end{block}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{A quoi a sert ?}
\only<1->{\begin{block}{Gnration}
Echantillonner la distribution. Simulation.
\end{block}}
\only<2->{\begin{block}{Evaluation}
Calculer $p(\mathbf{D},\mathbf{M}|\mu)$ (on sait tout) ou plus souvent~:
\[p(\mathbf{D}|\mu) = \sum_\mathbf{M} p(\mathbf{D},\mathbf{M}|\mu)\]
\end{block}}
\only<3->{\begin{block}{Classification}
Comparaison de $p(\mathbf{D}|\mu)$ pour plusieurs modles diffrents.
\end{block}}
\only<4->{\begin{block}{Reconstruction}
Trouver $\mathbf{M}$ qui maximise $p(\mathbf{D}|\mathbf{M},\mu)$.
\end{block}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]{Et encore...}
\begin{block}{Estimation}
partir de $\mathbf{D}$, calculer $\mu$ qui reprsente bien $\mathbf{D}$.
\end{block}
\only<2->{
\begin{block}{Maximum de vraisemblance}
\[\hat{\mu} = arg\max p(\mathbf{D}|\mu) = arg\max (\sum_M p(\mathbf{D},\mathbf{M}|\mu))\]
\end{block}
Mlange optimisation ($\max$) et intgration discrte ($\sum$).}
\only<3| trans:1>{\begin{block}{Bonnes proprits, asymptotiques sur taille d'chantillon}
\begin{enumerate}
\item convergent (il tend en probabilit vers la vraie valeur)
\item asymptotiquement efficace (variance minimum)
\item asymptotiquement normal
\end{enumerate}
\end{block}}
\only<4-| trans:2>{
\begin{example}[Pas de manquant, Bernoulli]
Observation~: squence de longueur $N$ avec $\#c$ courgettes. $p(\mathbf{D}|\mu) > 0$, $\log$ croissant~: passage au $\log$ ( $\prod \rightarrow \sum$).
\begin{enumerate}
\item $\log p(\mathbf{D}|\mu) = \#c.\log(\mu) + (N-\#c).\log(1-\mu)$
\item $\frac{d \log p(\mathbf{D}|\mu)}{d \mu} = \frac{\#c}{\mu} - \#a.\frac{1}{1-\mu} = 0 \Rightarrow \mu = \frac{\#c}{\#c+\#a} = \frac{\#c}{N}$
\end{enumerate}
\end{example}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Un exemple}
\begin{block}{Les deux botes}
On considre une squences de $N$ tirages avec remise dans deux botes. On commence avec une bote, on change au plus une fois de bote.
\end{block}
\begin{example}
\begin{itemize}
\item $\mathbf{D} = \mathtt{ccacacccacacaaacaaacacaacaaaccaa}$
\item $M = $ point de changement de bote (a priori n'importe o, sans prfrence).
\item $\mu = (\mu_1,\mu_2)$ proportions de courgettes dans les deux botes.
\end{itemize}
\end{example}
\begin{block}{Vraisemblance}
$ p(\mathbf{D},M|\mathbf{\mu}) = \mu_1^{\#c<M}(1-\mu_1)^{\#a<M}.\mu_2^{\#c\geq M}.(1-\mu_2)^{\#a\geq M}$
\end{block}
Possibilit d'estimer $\mu_1,\mu_2$, de trouver la position de $M$ la plus vraisemblable...
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Dtailler l'exemple ?
\frame {
\frametitle{Conclusion}
\begin{block}{Algorithmes}
\begin{enumerate}
\item La plupart des questions ncessite de calculer des sommes sur un espace multidimensionnel de taille qui crot exponentiellement avec le nombre de variables.
\item La sommation directe n'est possible que sur des problmes de taille trs faible.
\item Sinon:exploitation de la ``structure'' des problmes (indpendances) dans des algorithmes de type ``limination de variable'' (programmation dynamique).
\item au del: algorithmes bass sur la simulation (Monte-Carlo) et extensions (pas dans le cours).
\end{enumerate}
\end{block}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
%% option pour imprimer sans les animations
\documentclass[trans]{beamer}
%%\documentclass{beamer}
\usepackage[latin1]{inputenc}
\usepackage{graphicx}
\usepackage{eurosym}
\usepackage[vlined]{algorithm2e}
\usepackage{beamerthemeshadow}
\newcommand{\red}{\color{red}}
\newcommand{\e}{\textbf{e}}
\newcommand{\E}{\textbf{E}}
\newcommand{\x}{\textbf{x}}
\newcommand{\X}{\textbf{X}}
\newcommand{\y}{\textbf{y}}
\newcommand{\Y}{\textbf{Y}}
\newcommand{\z}{\textbf{z}}
\newcommand{\Z}{\textbf{Z}}
\renewcommand{\u}{\textbf{u}}
\newcommand{\U}{\textbf{U}}
\renewcommand{\v}{\textbf{v}}
\newcommand{\V}{\textbf{V}}
\newcommand{\w}{\textbf{w}}
\newcommand{\W}{\textbf{W}}
\newcommand{\1}{\textbf{1}}
\renewcommand{\FuncSty}[1]{{\sf #1}}
\renewcommand{\ArgSty}[1]{\textrm{#1}}
\SetKwFor{Procedure}{Procdure}{}{}%
\SetKwFor{Function}{Fonction}{}{}%
\SetKwFunction{pop}{pop}
\SetInd{0.3em}{0.6em}
\setlength{\algomargin}{0em}
\newcommand{\assign}{:=}
\def\ci{\perp\!\!\!\perp}
\title[Algorithmes et Modles probabilistes pour la Biologie]{Apprentissage dans les rseaux Baysiens}
\author{S. de Givry, T. Faraut, T. Schiex}
\date{\relax}
\institute{\centerline{\includegraphics[width=1.5cm]{Fig/INRA.pdf}}}
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame{\titlepage}
\section{Apprentissage dans les rseaux Baysiens}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Apprentissage}
Deux approches~:
\begin{itemize}
\item {\red Apprentissage automatique partir de donnes} (une liste d'exemples d'affectations des variables).
\item Apprentissage par acquisition de connaissances avec un expert du domaine.
\end{itemize}
\uncover<2->{
{\small
\begin{tabular}{l|c|c}
& Structure connue & Structure inconnue\\
\hline
{\red Donnes compltes} & Estimation paramtrique & Optimisation discrte\\
&& sur la structure\\
\hline
Donnes incompltes & Optimisation paramtrique & Optimisation mixte
\end{tabular}}
}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Estimation paramtrique}
\begin{block}{Estimation statistique par maximum de vraisemblance}
$$p(V_i = v | \Pi(V_i) = \u) = \hat\theta_{i,\u,v} = \frac{N_{i,\u,v}}{\sum_{w=1}^{d_i} N_{i,\u,w}}$$
avec $d_i$, la taille du domaine de $V_i$ et $N_{i,\u,v}$, le nombre d'exemples o la variable $V_i$ est affecte la valeur $v$ et ses parents au tuple $\u$.
\end{block}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Estimation paramtrique}
{\small
\begin{block}{Dmonstration}
La vraisemblance d'un exemple $\v = (v_1, \ldots, v_n)$ s'crit
$$p(\V = \v | \textbf{$\theta$}) = \prod_{i=1}^n p(V_i = v_i | \Pi(V_i) = \v, \textbf{$\theta$}) = \prod_{i=1}^n \theta_{i,\v,v_i}$$
\uncover<2->{
La vraisemblance de l'ensemble des donnes $\mathcal{D} = (\v^1,\ldots, \v^N)$ s'crit
\begin{eqnarray*}
p(\mathcal{D} | \textbf{$\theta$}) &=& \prod_{l=1}^N p(\V=\v^l | \textbf{$\theta$})= \prod_{l=1}^N \prod_{i=1}^n \theta_{i,\v^l,v_i^l}\pause\\
&=& \prod_{i=1}^n \prod_{\u \in \mathcal{L}(\Pi(V_i))} \prod_{w=1}^{d_i} \theta_{i,\u,w}^{N_{i,\u,w}}\pause\\
\ln(p(\mathcal{D} | \textbf{$\theta$})) &=& \sum_{i=1}^n \sum_{\u \in \mathcal{L}(\Pi(V_i))} \sum_{w=1}^{d_i} N_{i,\u,w} \ln(\theta_{i,\u,w})
\end{eqnarray*}}
\end{block}
}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Estimation paramtrique}
{\small
\begin{block}{Dmonstration - suite}
La contrainte de normalisation sur les probabilits conditionnelles~:
$$\sum_{w=1}^{d_i} \theta_{i,\u,w} = 1 \quad \Leftrightarrow \quad \theta_{i,\u,d_i} = 1 - \sum_{w=1}^{d_i-1} \theta_{i,\u,w}$$
\uncover<2->{
On a donc~:
$$\ln(p(\mathcal{D} | \textbf{$\theta$})) = \sum_{i=1}^n \sum_{\u \in \mathcal{L}(\Pi(V_i))} (\sum_{w=1}^{d_i - 1} N_{i,\u,w} \ln(\theta_{i,\u,w}) + N_{i,\u,d_i} \ln(1 - \sum_{w=1}^{d_i-1} \theta_{i,\u,w}))$$
}\uncover<3->{
Et sa drive par rapport un paramtre $\theta_{i,\u,v}$ s'crit~:
$$\frac{\partial \ln(p(\mathcal{D} | \textbf{$\theta$}))}{\partial \theta_{i,\u,v}} = \frac{N_{i,\u,v}}{\theta_{i,\u,v}} - \frac{N_{i,\u,d_i}}{1 - \sum_{w=1}^{d_i-1} \theta_{i,\u,w}} = \frac{N_{i,\u,v}}{\theta_{i,\u,v}} - \frac{N_{i,\u,d_i}}{\theta_{i,\u,d_i}}$$}
\end{block}
}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Estimation paramtrique}
{\small
\begin{block}{Dmonstration - suite}
La valeur $\hat\theta_{i,\u,v}$ du paramtre $\theta_{i,\u,v}$ qui maximise la vraisemblance doit annuler la drive~:
%%% $$\forall v \in \{1,\ldots,d_i-1\}, \quad \frac{N_{i,\u,v}}{\hat\theta_{i,\u,v}} = \frac{N_{i,\u,d_i}}{\hat\theta_{i,\u,d_i}} \quad \Leftrightarrow \quad \frac{\sum_{v=1}^{d_i} N_{i,\u,v}}{\sum_{v=1}^{d_i} \hat\theta_{i,\u,v}} = \sum_{w=1}^{d_i} N_{i,\u,w}$$
$$\forall v \in \{1,\ldots,d_i-1\}, \quad \frac{N_{i,\u,v}}{\hat\theta_{i,\u,v}} = \frac{N_{i,\u,d_i}}{\hat\theta_{i,\u,d_i}}$$
\uncover<2->{
D'o le rsultat, sous contrainte de normalisation des $\hat\theta$~:
$$\forall v \in \{1,\ldots,d_i\}, \quad \hat\theta_{i,\u,v} = \frac{N_{i,\u,v}}{\sum_{w=1}^{d_i} N_{i,\u,w}}$$}
\end{block}
}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\frame {
\frametitle{Donnes incompltes}
Soit les donnes $\mathcal{D} = \{v^l_i\}_{1 \leq i \leq n, 1 \leq l \leq N}$, avec $\mathcal{D}_o$, la partie observe mais incomplte de $\mathcal{D}$, et $\mathcal{D}_m$, la partie manquante.