%\documentclass[10pt]{article}
\documentclass[11pt,draft]{article}
\voffset -24.5mm
 \hoffset -5mm
 \textwidth 165mm
 \textheight 245mm
 \oddsidemargin=6mm
 \evensidemargin=6mm

\usepackage[intlimits]{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage[mathscr]{eucal}
%\usepackage{cite}
%\usepackage{dsfont}
%\usepackage[dvips]{graphicx}
\usepackage{indentfirst}

\allowdisplaybreaks[4]
\renewcommand{\baselinestretch}{1.05}

%*********************************************
\renewcommand{\theequation}{\arabic{section}.\arabic{equation}}

%*********************************************
\makeatletter \@addtoreset{equation}{section} \makeatother

\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{corollary}{Corollary}[section]
\newtheorem{remark}{Remark}[section] %new
\newtheorem{proposition}{Proposition}[section]
\newtheorem{example}{Example}[section]

\begin{document}
\author{V.Nekrutkin\thanks{Supported by the RFFI grant 129755 ${\rm N^o}$ 3072507.}}
\title{P\'{o}ly{a}  processes and sequences: the martingale characterization}
\date{}
\maketitle
\begin{abstract}
It is proved that a mixed Poisson process $\xi_t$ is  either P\'{o}ly{a} or pure Poisson process iff there exists a non-degenerate
linear transformation $\xi_t\mapsto \eta_t = a(t)\xi_t+b(t)$ such that $\eta_t$ is a martingale. Analogous result
is demonstrated for P\'{o}ly{a} sequences.
\end{abstract}
\bigskip
\noindent
{\small {\bf 2000 Mathematics Subject Classification}: primary 60G55, secondary 60G44, 91B42.}\\
{\small {\bf Keywords}:\, Mixed Poisson processes, P\'{o}lya processes, martingale characterization.}

\section{Introduction}
Let $\eta_{\centerdot}=\{\eta_i, i\geq 1\}$ be a sequence of i.i.d.
random variables such that $\mathcal L(\eta_i)=\mathrm{EXP}(1)$,
where $\mathcal L(\xi)$ stands for the distribution of the random
variable $\xi$ and $\mathrm{EXP}(\lambda)$ is the exponential
distribution with the parameter $\lambda$.

Consider a non-negative
%\marginpar{?}
random variable $\tau$ with the distribution ${\mathcal P}=\mathcal L(\tau)$ and assume that
$\tau$ is independent of $\eta_\centerdot$.
%and denote ${\mathcal P}=\mathcal L(\tau)$.
Then the random process
$N_{\centerdot}=\{N_t,\,t\geq 0\}$ defined by the equality
\begin{gather}
\label{eq:MP_exp}
N_t=\max\{n: \eta_1+\ldots+\eta_n<t\tau\}
\end{gather}
is called {\em the
  mixed Poisson process with the structure distribution $\mathcal P$}.

Definition \eqref{eq:MP_exp} means that the conditional distribution of the process
$N_\centerdot$ under the condition $\tau=z$ coincides with the distribution of a Poisson process with the parameter~$z$.


Mixed Poisson processes are natural models in many fields including accidents and sickness,
market research, clinical trials, etc.
Numerous examples and references can be found in
\cite{Ehr88}--\cite{CW03}.

The most common choice of the structure distribution $\mathcal P$ is the Gamma distribution
${\rm G}(k,\mu)$ with a certain  parameters $k,\mu>0$ ($k$ is the {\em shape parameter} and $\mu$ the
{\em scale parameter}).
Then
$N_{\centerdot}$ is called  {\em the P\'{o}lya process}
%(the other term is {\em gamma-Poison process})
with the parameters $k,\mu$.

The natural problem is to characterize P\'{o}lya processes in the class of all mixed Poisson processes. Several
characterization results can be found in \cite{Gr97}.

For example
(see \cite[th.\!\! 2.2]{Gr97}), if  $\mathbb{E}\tau\!<\!\infty$
and
%there exists such $t_0\!>\!0$ that
$\mathbb{E}(\tau\,|\,N_{t_0}=n)\!=\!a_{t_0}n\!+\!b_{t_0}$ for some $t_0\!>\!0$ and any $n>0$,
then
$N_{\centerdot}$ is either a Poisson process or a P\'{o}lya process.
Other characterizations of P\'{o}lya/Poisson processes are given  in terms of their transition intensities and in terms of
time-transformation of the corresponding mixed Poison processes (see  \cite[th. 4.1 and th. 4.3]{Gr97}).

The aim of this paper is to characterize P\'{o}lya processes in martingale terms. The interest to martingale
characterizations can be explained
as follows. By definition, the process
$\xi_{\centerdot}$ is the martingale relative to
the {\em natural filtration}
${\mathcal F}^{\,\xi}_t\!=\!\sigma(\xi_s, 0\leq s\leq t)$, if
$\mathbb{E}|\xi_t|\!<\!\infty$  for any $t\geq 0$ and
\begin{gather}
\label{eq:mart}
\mathbb{E}(\xi_T\,|{\mathcal F}^{\,\xi}_t)=\xi_t \qquad a.s.
\end{gather}
for any $T\geq t\geq0$.

If additionally
$\mathbb{E}\xi_t^2<\infty$  for any $t\geq 0$, then the left side of \eqref{eq:mart} has the interpretation in terms
of prediction:
$\mathbb{E}(\xi_T\,|{\mathcal F}^{\,\xi}_t)$ is the best least-square approximation of $\xi_T$ in the class
$L^2_{{\mathcal F}^{\,\xi}_t}$ of all
random variables $\zeta$ such that a) $\mathbb{E}\zeta^2\!<\!\infty$ and b) $\zeta$ depends only on the trajectory
$\{\xi_s, \, 0\!\leq\! s\!\leq\! t\}$ of the process $\xi_{\centerdot}$\,.

In other words, \eqref{eq:mart} means that $\xi_t$ is the best (least-square) prediction of $\xi_T$ for any
$T>t$, if the approximation is based only on the knowledge of the trajectory of the process $\xi_{\centerdot}$ up to time $t$.
Evidently, such a prediction seems to be somewhat degenerate. Wiener process is the most well-known  example of this
situation.

A bit less trivial case occurs if
some non-degenerate linear trans\-for\-ma\-tion
of $\xi_{\centerdot}$ is a martingale (rather than the process $\xi_{\centerdot}$ itself). Indeed,
assume that  $\mathbb{E}\xi_t^2<\infty$ for any $t\geq 0$ and consider the
process $\zeta_t=b(t)(\xi_t+a(t))$ with $b(t)\neq 0$ for any $t\geq 0$.

Evidently,
${\mathcal F}^{\,\zeta}_t \stackrel{\rm def}=\sigma(\zeta_s, 0\leq s\leq t)=
{\mathcal F}^{\,\xi}_t \stackrel{\rm def}=\sigma(\xi_s, 0\leq s\leq t)$. If $\zeta_{\centerdot}$ is the martingale
relative to the natural filtration ${\mathcal F}^{\,\zeta}_t$, then for any $T>t$
\begin{gather}
\mathbb{E}\big(\xi_T\,|\,{\mathcal F}^{\,\xi}_t\big)=
%\frac{1}{b(T)}\,\mathbb{E}(\zeta_T\,|\,{\mathcal F}^{\,\zeta}_t)-a(T)=
%\nonumber
%\\
%=\frac{1}{b(T)}\,b(t)(\xi_t+a(t))-a(T)=
\frac{b(t)}{b(T)}\,\xi_t+\frac{b(t)a(t)}{b(T)}-a(T).
\label{eq:mart_lin}
\end{gather}
(Of course, this equality is valid only almost surely. Starting from this point, we ignore these technicalities.)
Therefore, the best least-square prediction of $\xi_T$ is equal to the right side of \eqref{eq:mart_lin}.

The good example of such a situation is the Poisson process with the parameter
$\lambda$: then $b(t)\!\equiv\!1$, $a(t)=-\lambda t$
and the best prediction of  $\xi_T$ is $\xi_t+\lambda(T-t)$. Other  processes with independent increments
and finite second moments produce
analogous examples.

%Below\marginpar{?}
Our aim is to prove that a non-degenerate linear transformation of a mixed Poisson process is the martingale
iff this process is either a Poisson process or a P\'{o}lya process. (Note that a P\'{o}lya process has by no means
independent increments.) Analogous proposition is demonstrated for the so-called P\'{o}lya sequences, common in some
popular models of consumer behavior.

\section{Characterization of P\'{o}lya processes}
Let us start with the simple lemma.
\begin{lemma}
\label{lem:MPOS}
Let $N_\centerdot$ be a mixed Poison process with the structure distribution
$\mathcal P$.
Denote ${\mathcal F}^{\,N}_t=\sigma(N_s, 0\leq s\leq t)$. Then
\begin{gather}
\label{eq:cond_exp}
\mathbb{E}(N_{T}\,|\,{\mathcal F}^{\,N}_t)=N_t+(T-t)\,\frac{\int_{(0,\infty)}
\lambda^{N_t+1} e^{-\lambda t}{\mathcal P}(d\lambda)}
{\int_{(0,\infty)}\lambda^{N_t} e^{-\lambda t}{\mathcal P}(d\lambda)}
\end{gather}
for any $T>t\geq0$.
\end{lemma}
\begin{proof}
It is easy to show that
\begin{gather*}
\mathbb{P}(N_{t_1}=m_1,\ldots,N_{t_n}=m_n)=
\prod_{i=1}^{n-1}\frac{(t_{i+1}-t_i)^{m_{i+1}-m_i}}{(m_{i+1}-m_i)!}
\int_{(0,\infty)}\lambda^{m_n}e^{-\lambda t_n}{\mathcal P}(d\lambda)
%\label{eq:mult_distr_cp}
\end{gather*}
for any $n\geq 1$,
 $0=t_0<t_1<\ldots<t_n$ and integer $0=m_0\leq m_1\leq\ldots\leq m_n$
Therefore, $N_\centerdot$ is the Markov process and $\mathbb{E}(N_{T}\,|\,{\mathcal F}^{\,N}_t)=
\mathbb{E}(N_{T}\,|\,N_t)$.
The transition function of $N_\centerdot$ has the form
\begin{gather}
\label{eq:tr_func}
\mathbb{P}(N_{T}=m\,|\,N_t=l)=
\frac{(T-t)^{m-l}}{(m-l)!} \ \frac{\int_{(0,\infty)}\lambda^{m}e^{-\lambda T}{\mathcal P}(d\lambda)}
{\int_{(0,\infty)}\lambda^{l}e^{-\lambda t}{\mathcal P}(d\lambda)}\ ,
\end{gather}
where $m\geq l\geq 0$  and
$T>t>0$. Analogously, if $t=0$ and $m\geq 0$, then
\begin{gather}
\label{eq:distr}
\mathbb{P}(N_{T}=m\,|\,N_0=0)=\mathbb{P}(N_{T}=m)=
\frac{T^m}{m!}\,\int_{(0,\infty)}\lambda^{m}e^{-\lambda T}{\mathcal P}(d\lambda).
\end{gather}
%It follows from the\marginpar{?}
% Markov property of the process
%$N_\centerdot$ that
In view of \eqref{eq:tr_func} and \eqref{eq:distr}, we get
\eqref{eq:cond_exp}.
\end{proof}


\begin{proposition}
1. Consider a P\'{o}lya process $N_\centerdot$   with the parameters
$k,\mu$ and denote $\zeta_t=(N_t+k)/(t+\mu)$.
Then  $\zeta_\centerdot$ is the martingale relative to the natural filtration
${\mathcal F}^{\,\zeta}_t$.\\
2. Let $N_\centerdot$ be a mixed Poisson process with the structure distribution
$\mathcal P$. Suppose that there exist functions
$b(\cdot)$ and $a(\cdot)$
such that the process
$\zeta_t=b(t)(N_t+a(t))$ is the martingale relative to the natural filtration
${\mathcal F}^{\,\zeta}_t$. Suppose additionally that $b(t)\neq 0$ for any $t\!\geq\! 0$.
Then either
${\mathcal P}=\delta_{\alpha_0}$ for some $\alpha_0>0$ or ${\mathcal P}={\rm G}(k,\mu)$ with certain $k,\mu>0$.
\end{proposition}
\begin{proof}
1. As it was already mentioned, ${\mathcal F}^{\,\zeta}_t={\mathcal F}^{\,N}_t$. Since
${\mathcal P}={\rm G}(k,\mu)$, then
\begin{gather}
\label{eq:gamma_mom_exp}
\int_{(0,\infty)}\lambda^{l} e^{-\lambda t}{\mathcal P}(d\lambda)=
%\frac{\mu^k}{\Gamma(k)}\int_{0}^\infty \lambda^{k+l-1}e^{-(\mu+t)\lambda}d\lambda=
\frac{\mu^k}{\Gamma(k)}\,\frac{\Gamma(k+l)}{(\mu+t)^{k+l}}
\end{gather}
for any   $l\geq 0$. In view of \eqref{eq:gamma_mom_exp}, we obtain from
 \eqref{eq:cond_exp} that
\begin{gather*}
\mathbb{E}(N_{T}\,|\,{\mathcal F}^{\,N}_t)
=N_t+(T-t)\,\frac{k+N_t}{\mu+t}=\frac{N_t(\mu+T)+k(T-t)}{\mu+t}\ .
\end{gather*}
Therefore,
\begin{gather*}
\mathbb{E}(\zeta_T\,|\,{\mathcal F}^{\,\zeta}_t)=
\frac{\mathbb{E}(N_T\,|\,{\mathcal F}^{\,N}_t)+k}{\mu+T}+\frac{k}{\mu+T}=
\frac{N_t}{\mu+t}+\frac{k(T-t)}{(\mu+t)(\mu+T)}+\frac{k}{\mu+T}=\zeta_t
\end{gather*}
and the first assertion is proved.\\
2. Since $\zeta_\centerdot$ is the martingale,
\begin{gather}
\label{eq:Mart}
\mathbb{E}(\zeta_{t+\Delta}\,|\,{\mathcal F}^\zeta_t)=
b(t+\Delta)\Big(\mathbb{E}(N_{t+\Delta}\,|{\mathcal F}^N_t)+a(t+\Delta)\Big)=\zeta_t=b(t)(N_t+a(t))
\end{gather}
for any $t,\Delta\geq 0$.
In view of  Markov property of the process
$N_\centerdot$\,, we obtain from
\eqref{eq:Mart} that
\begin{gather}
\mathbb{E}(N_{t+\Delta}\,|N_t=l)=l\,\frac{b(t)}{b(t+\Delta)}+\frac{b(t)a(t)}{b(t+\Delta)}-a(t+\Delta)
\label{eq:cond_exp_hyp}
\end{gather}
for any $t>0$ and integer $l\geq 0$. Denote
\begin{gather*}
f_1(t,\Delta)=\frac{b(t)-b(t+\Delta)}{b(t+\Delta)}\quad \mathrm{and} \quad
f_2(t,\Delta)=\frac{b(t)a(t)}{b(t+\Delta)}-a(t+\Delta).
\end{gather*}
It follows from (\ref{eq:cond_exp}) and (\ref{eq:cond_exp_hyp}) that
functions $f_1(t,\Delta)$ and $f_2(t,\Delta)$ are both proportional to $\Delta$:
$f_1(t,\Delta)=c_1(t)\Delta$ and $f_2(t,\Delta)=c_2(t)\Delta$
with $c_2(t)>0$ and $c_1(t)\geq 0$.
Moreover, if $c_1(t_0)= 0$ for some $t_0>0$, then $c_1(t)= 0$ for any $t>0$. As the result, we come to the equality
\begin{gather}
\label{eq:mom_recc}
\frac{\int_{(0,\infty)}\lambda^{l+1} e^{-\lambda t}{\mathcal P}(d\lambda)}
{\int_{(0,\infty)}\lambda^{l} e^{-\lambda t}{\mathcal P}(d\lambda)}=c_1(t)l+c_2(t), \quad t>0, \quad l\geq 0.
\end{gather}
Let us fix $t_0>0$ and denote
 $c_1=c_1(t_0)$ and $c_2=c_2(t_0)$ for brevity. Additionally, denote
\begin{gather*}
\gamma=\int_{(0,\infty)}e^{-\lambda t_0}{\mathcal P}(d\lambda)
\end{gather*}
and ${\mathcal Q}(d\lambda)\!=\!e^{-\lambda t_0}{\mathcal P}(d\lambda)/\gamma$. Evidently, $Q$ is the probability
distribution.
 Now the equality
\eqref{eq:mom_recc} obtains the form
\begin{gather}
\label{eq:mom_recc1}
\frac{\int_{(0,\infty)}\lambda^{l+1} {\mathcal Q}(d\lambda)}
{\int_{(0,\infty)}\lambda^{l} {\mathcal Q}(d\lambda)}=c_1l+c_2.
\end{gather}

Let  $c_1 =0$. Then
${\int_{(0,\infty)}\lambda^{l} {\mathcal Q}(d\lambda)}=c_2^{\,l}$
for any integer $l\geq 0$ and ${\mathcal Q}=\delta_{c_2}$.
Therefore ${\mathcal Q}=\delta_{\alpha}$ with $\alpha=c_2$ and
 $N_\centerdot$ is the Poisson process with the parameter $\alpha$.

Suppose that $c_1>0$.
If $\mathcal R$ stands for the gamma-distribution
$\mathrm{G}(k',\mu')$, then
\begin{gather*}
\frac{\int_{(0,\infty)}\lambda^{l+1} {\mathcal R}(d\lambda)}
{\int_{(0,\infty)}\lambda^{l} {\mathcal R}(d\lambda)}=(l+k')/\mu'
\end{gather*}
in view of \eqref{eq:gamma_mom_exp}.
The latter equality is of the same form as
\eqref{eq:mom_recc1}. Since any gamma-distribution is determined by its moments, then
${\mathcal Q}={\rm G}(k_1,\mu_1)$ with $k_1=c_2/c_1$ and $\mu_1=1/c_1$.
Therefore, the distribution ${\mathcal P}$ has the density
\begin{gather*}
p(\lambda)=\gamma\frac{\mu_1^{k_1}}{\Gamma(k_1)}\lambda^{k_1-1}e^{-(\mu_1-t_0)\lambda}\,,
\end{gather*}
where $\lambda>0$ and $\mu_1>t_0$. In other words, $\mathcal P={\rm G}(k,\mu)$ with $k=k_1$ and $\mu=\mu_1-t_0$.
Since these parameters do not depend on $t_0$, we get the explicit expressions for functions
$c_1(\,\cdot\,)$, $c_2(\,\cdot\,)$: $c_1(t)=1/(\mu+t)$ and $c_2(t)=k/(\mu+t)$.
\end{proof}

\section{Characterization of P\'{o}lya sequences}
%\label{sect:POLYA_seq}
The so-called ``NBD-Dirichlet'' model of consumer behavior (see \cite{Ehr88} for the detailed dis\-cus\-sion)
uses a  P\'{o}lya process $N_t$ to describe the number of purchase occasions up to time $t$.
% moments\marginpar{?}  of a random buyer.
The second part of the model deals with
a so-called P\'{o}lya sequence of $d$-dimensional random vectors. This sequence explains
how a buyer makes a choice of one among $d$ interchangeable
goods. To be short, we restrict ourselves to the case $d=2$.

Consider the sequence $\varepsilon_\centerdot=\{\varepsilon_j,\,j\geq 1\}$ of random variables such that
$\varepsilon_j\in \{0,1\}$.
Assume that the sequence $\varepsilon_\centerdot$ is permutable. This means that for any $n$ all
permutations of random variables $\varepsilon_1,\ldots,\varepsilon_n$ have identical $n$-dimensional
distributions.

Then (e.g., \cite[ch. 7]{F84}) there exists a {\em structure distribution}
$\mathcal P$ such that $\mathcal P([0,1])=1$ and
\begin{gather}
\label{eq:def_ssym}
\mathbb{P}(\varepsilon_1=\delta_1,\ldots,\varepsilon_n=\delta_n)=\int_{[0,1]}z^m(1-z)^{n-m}{\mathcal P}(dz)
\quad {\rm with} \quad m=\delta_1+\ldots+\delta_n
\end{gather}
%with $m=\delta_1+\ldots+\delta_n$
for any $n\geq 1$ and any $\delta_i\in \{0,1\}$.

If $\mathcal P$ is a beta-distribution with some parameters
$(\nu_1,\nu_2)$, then the sequence
$\varepsilon_\centerdot$ can be described in terms of P\'{o}lya urn models. Therefore it is natural
to call $\varepsilon_\centerdot$ as the two-dimensional {\em P\'{o}lya sequence} with parameters $(\nu_1,\nu_2)$.
(Various results on P\'{o}lya urn models can be found in \cite{JK77}.)

Our aim is to give a martingale characterization of P\'{o}lya sequences among all permutable random sequences
$\varepsilon_\centerdot=\{\varepsilon_j,\,j\geq 1\}$. More precisely, we consider random variables
$S_n=\varepsilon_1+\ldots+ \varepsilon_n$ and their non-degenerate linear transformations.
Evidently, it is sufficient to restrict ourselves to the case
${\mathcal P}((0,1))=1$.

Let us start with the proposition analogous to Lemma
\ref{lem:MPOS}.
\begin{lemma}
\label{lem:Msymm}
Suppose that the distribution of the sequence
$\varepsilon_\centerdot=\{\varepsilon_j,\,j\geq 1\}$
is defined by \eqref{eq:def_ssym} with ${\mathcal P}((0,1))=1$. Denote
$S_n=\varepsilon_1+\ldots+\varepsilon_n$. Then

\begin{gather}
\label{eq:cond_seq}
\mathbb{E}(S_{N}\,|\, S_1,\ldots,S_n)=S_n+(N-n)\,\frac{\int_{(0,1)}z^{S_n+1}(1-z)^{n-S_n}{\mathcal P}(dz)}
{\int_{(0,1)}z^{S_n}(1-z)^{n-S_n}{\mathcal P}(dz)}
\end{gather}
for any $N>n\geq0$.
\end{lemma}
\begin{proof}
Let $\delta_j\in \{0,1\}$ and
$m_j=\delta_1+\ldots+\delta_j$. It follows from
\eqref{eq:def_ssym} that
\begin{gather*}
\mathbb{P}(S_{n+1}=m_{n+1}\,|\,S_1=m_1,\ldots,S_n=m_n)=\frac{\int_{(0,1)}z^{m_{n+1}}(1-z)^{n+1-m_{n+1}}{\mathcal P}(dz)}
{\int_{(0,1)}z^{m_{n}}(1-z)^{n-m_{n}}{\mathcal P}(dz)}\,.
\end{gather*}
Therefore, the sequence
$\{S_n,\, n\geq 0\}$ is the Markov chain with the transition function
\begin{gather*}
\mathbb{P}(S_{n+1}=m+\delta\,|\,S_n=m)=\frac{\int_{(0,1)}z^{m+\delta}(1-z)^{n+1-m-\delta}{\mathcal P}(dz)}
{\int_{(0,1)}z^{m}(1-z)^{n-m}{\mathcal P}(dz)}\,,
\end{gather*}
where $\delta\in \{0,1\}$ and $m\in \{0,\ldots,n\}$. As the result, $\mathbb{E}(S_{N}\,|\, S_1,\ldots,S_n)=
\mathbb{E}(S_{N}\,|\,S_n)$.
Analogously,
\begin{gather}
\label{eq:cond_SN}
\mathbb{P}(S_{N}=l+m\,|\, S_n=l)=\frac{{\rm C}^m_{N-n}\int_{(0,1)}z^{l+m}(1-z)^{N-l-m}{\mathcal P}(dz)}
{\int_{(0,1)}z^{l}(1-z)^{n-l}{\mathcal P}(dz)}
\end{gather}
for $N>n\geq 0$, $l\in \{0,\ldots,n\}$ and $m\in \{0,\ldots,N-n\}$,
Using \eqref{eq:cond_SN}, we obtain that
\begin{gather}
\label{eq:cond_l}
\mathbb{E}(S_{N}\,|\, S_n=l)=l+(N-n)\,\frac{\int_{(0,1)}z^{l+1}(1-z)^{n-l}{\mathcal P}(dz)}
{\int_{(0,1)}z^{l}(1-z)^{n-l}{\mathcal P}(dz)}
%=l+\Delta A(n,l).
\end{gather}
and the proof is complete.
%\ebox
\end{proof}

\begin{proposition}
1. Let $\varepsilon_{\centerdot}=\{\varepsilon_j,\,j\geq 1\}$ be a {\em P\'{o}lya sequence} with parameters $(\nu_1,\nu_2)$.
Denote
$\zeta_n=(S_n+\nu_1)/(n+\nu_1+\nu_2)$. Then $\{\zeta_n,\,n\geq 1\}$ is the martingale relative to the natural
filtration
${\mathcal F}^\zeta_n=\sigma(\zeta_1,\ldots,\zeta_n)$.\\
2. Let the conditions of Lemma \ref{lem:Msymm} be fulfilled. Suppose that there exist sequences
$b_n\neq 0$ and $a_n$ such that the random sequence $\zeta_n=b_n(S_n+a_n)$ is a martingale relative to
the natural filtration
${\mathcal F}^{\,\zeta}_n=\sigma(\zeta_1,\ldots,\zeta_n)$.
Then either
${\mathcal P}=\delta_{\alpha}$ for some $\alpha\in(0,1)$ or  ${\mathcal P}$ is a beta-distribution with a certain
parameters $(\nu_1,\nu_2)$.
\end{proposition}
\begin{proof}
1. Evidently,
${\mathcal F}^\zeta_n={\mathcal F}^S_n\stackrel{\rm def}=\sigma(S_1,\ldots,S_n)$. Besides, if
$\mathcal P$ is the beta-distribution with parameters $(\nu_1,\nu_2)$, then
\begin{gather}
\label{eq:beta_mom}
\int_{(0,1)}z^{k}(1-z)^{m}{\mathcal P}(dz)=\frac{\Gamma(\nu_1+\nu_2)}{\Gamma(\nu_1)\Gamma(\nu_2)}\,
\frac{\Gamma(\nu_1+k)\Gamma(\nu_1+m)}{\Gamma(\nu_1+\nu_2+k+m)}
\end{gather}
for $k>-\nu_1$ and $m>-\nu_2$.
Using \eqref{eq:beta_mom} and \eqref{eq:cond_seq}, we see that
\begin{gather*}
\mathbb{E}(S_{n+1}\,|\,{\mathcal F}^S_n)=S_n+\frac{S_n+\nu_1}{\nu_1+\nu_2+n}\,.
\end{gather*}
Therefore,
\begin{gather*}
\mathbb{E}(\zeta_{n+1}\,|\,{\mathcal F}^\zeta_n)=\frac{\mathbb{E}(S_{n+1}\,|\,{\mathcal F}^S_n)+\nu_1}{n+1+\nu_1+\nu_2}=
(S_n+\nu_1)\frac{1+1/(\nu_1+\nu_2+n)}{n+1+\nu_1+\nu_2}=\zeta_n.
\end{gather*}

2. Without loss of generality we can take $b_0=1$.
Let $N\geq n\geq 0$ and denote $\Delta=N-n$.
Since $\{\zeta_n,\,n\geq 0\}$ is a martingale, then
\begin{gather}
\label{eq:mart_P}
\mathbb{E}(S_{n+\Delta}\,|S_n=l)=\frac{b_n(l+a_n)}{b_{n+\Delta}}-a_{n+\Delta}
\end{gather}
for any $\Delta,n\geq 0$ and $0\leq l\leq n$. Denote
\begin{gather*}
A(n,l)=\frac{\int_{(0,1)}z^{l+1}(1-z)^{n-l}{\mathcal P}(dz)}
{\int_{(0,1)}z^{l}(1-z)^{n-l}{\mathcal P}(dz)}\,.
\end{gather*}
Then  (see Lemma \ref{lem:Msymm}) the equality
\eqref{eq:mart_P} can be rewritten in the form
\begin{gather}
\label{eq:ANL}
\Delta A(n,l)=\frac{b_n-b_{n+\Delta}}{b_{n+\Delta}}\,l+\frac{b_n-b_{n+\Delta}}{b_{n+\Delta}}\,a_n+a_n-a_{n+\Delta}.
\end{gather}
In other terms,
\begin{gather*}
\Delta A(n,l)=\frac{b_n-b_{n+\Delta}}{b_{n+\Delta}}\,l+\Delta A(n,0).
\end{gather*}
%It follows from the latter equality
This means
that
 $(b_n-b_{n+\Delta})/b_{n+\Delta}$ is proportional to
$\Delta$: $(b_n-b_{n+\Delta})/b_{n+\Delta}=c_1(n)\Delta$. In other words,
$b_{n+\Delta}={b_n}/({1+c_1(n)\Delta})$ and $b_n={1}/{(1+c_1\,n)}$
with $c_1=c_1(0)$.
Besides,
\begin{gather}
(b_n-b_{n+\Delta})/(b_{n+\Delta})=
{c_1\Delta}/(1+c_1 n).
\label{eq:delta_bn}
\end{gather}
Analogously,
$a_{n+\Delta}-a_n$ is proportional to
$\Delta$: $a_{n+\Delta}-a_n=c_2(n)\Delta$. Therefore
\begin{gather}
\label{eq:delta_an}
a_n=a_0+c_2 n \quad {\mbox and} \quad a_{n+\Delta}-a_n=c_2\Delta
\end{gather}
with $c_2=c_2(0)$.
Applying
 \eqref{eq:ANL}, \eqref{eq:delta_bn} and \eqref{eq:delta_an}, we obtain that
\begin{gather}
\label{eq:mom2}
\frac{\int_{(0,1)}z^{l+1}(1-z)^{n-l}{\mathcal P}(dz)}
{\int_{(0,1)}z^{l}(1-z)^{n-l}{\mathcal P}(dz)}=
\frac{c_1 l}{1+c_1 n}+\frac{c_1(a_0+c_2 n)}{1+c_1 n}-c_2=\frac{c_1l+c_1 a_0-c_2}{1+c_1 n}
\end{gather}
and
%Under the choice $n=l$
\begin{gather}
\label{eq:eq_main}
\frac{\int_{(0,1)}z^{l+1}{\mathcal P}(dz)}
{\int_{(0,1)}z^l{\mathcal P}(dz)}
=\frac{c_1 a_0-c_2+c_1 l}{1+c_1 l}\,.
\end{gather}
%$b_3=b_2+1$.

Suppose now that $c_1=0$. Then the right side of
\eqref{eq:eq_main} is a constant $\alpha=-c_2\in (0,1)$ and
$\mathcal P=\delta_\alpha$. Consider the case $c_1\neq0$.

If we take
$l=0$ and $l=1$ in \eqref{eq:eq_main}, then under denotation $x=c_1 a_0-c_2$
\begin{gather*}
0<\int_{(0,1)}z{\mathcal P}(dz)=x<1 \quad \mbox{and} \quad \int_{(0,1)}z^2{\mathcal P}(dz)=x\,\frac{x+c_1}{1+c_1}\,.
\end{gather*}
Since $\mathcal P\neq\delta_\alpha$, then $x(x+c_1)/(1+c_1)>x^2$ and either
$c_1>0$ or $c_1<-1$. On the other hand, if we choose $l=0$ in \eqref{eq:mom2}, we see that the supposition $c_1<-1$
leads to the contradiction: in this case the left side of
\eqref{eq:mom2} is negative. Therefore $c_1>0$.

If
 $\mathcal R$ stands for the beta-distribution with parameters
$(\nu_1,\nu_2)$, then
\begin{gather}
\label{eq:beta_mom1}
\frac{\int_{(0,1)}z^{l+1}{\mathcal R}(dz)}
{\int_{(0,1)}z^l{\mathcal R}(dz)}=
\frac{\nu_1+l}{\nu_1+\nu_2+l}\,.
\end{gather}
in view of \eqref{eq:beta_mom}.

Let us denote $\nu_1\!=\!(c_1 a_0-c_2)/c_1$ and $\nu_2\!=\!1/c_1-\nu_1$. Then \eqref{eq:eq_main} has the same form as
\eqref{eq:beta_mom1}. Since any beta-distribution is  determined by its moments, we obtain the result.
\end{proof}


\begin{thebibliography}{99}
\bibitem{F84}
Feller, W (1971). An Introduction to Probability Theory and Its Applications, 2-nd ed., V.2, Wiley, New York e.a.
\bibitem{JK77}
Johnson N.L., Kotz S. (1977) Urn Models and their Application.  Wiley, New York e.a.
\bibitem{Ehr88}
Ehrenberg A.S.C. (1988) Repeat-buying: Facts, Theory and Applications.
2-nd ed. London: Charles Griffin \& Company LTD; New York: Oxford
University Press.
\bibitem{Gr97}
Grandell J. (1997) Mixed Poisson Processes.   Chapman \& Hall, London e.a.
\bibitem{CW03}
Cook, R. J., Wei, W. (2003) Conditional Analysis of Mixed Poisson Processes with
Baseline Counts: Implications for Trial Design and Analysis.  Biostatistics. 2003. Vol. 4, P. 479 - 494.


\end{thebibliography}

\end{document}