fsg_psubtree.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * ====================================================================
32  *
33  */
34 /*
35  * fsg_psubtree.h -- Phone-level FSG subtree representing all transitions
36  * out of a single FSG state.
37  * (Note: Currently, it is actually a flat lexicon representation
38  *
39  * **********************************************
40  * CMU ARPA Speech Project
41  *
42  * Copyright (c) 2004 Carnegie Mellon University.
43  * ALL RIGHTS RESERVED.
44  * **********************************************
45  *
46  * HISTORY
47  *
48  * $Log$
49  * Revision 1.1 2006/04/05 20:27:30 dhdfu
50  * A Great Reorganzation of header files and executables
51  *
52  * Revision 1.2 2006/02/23 05:10:18 arthchan2003
53  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: Adaptation of Sphinx 2's FSG search into Sphinx 3
54  *
55  * Revision 1.1.2.5 2005/07/24 01:34:54 arthchan2003
56  * Mode 2 is basically running. Still need to fix function such as resulting and build the correct utterance ID
57  *
58  * Revision 1.1.2.4 2005/07/20 21:18:30 arthchan2003
59  * FSG can now be read, srch_fsg_init can now be initialized, psubtree can be built. Sounds like it is time to plug in other function pointers.
60  *
61  * Revision 1.1.2.3 2005/07/17 05:44:32 arthchan2003
62  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
63  *
64  * Revision 1.1.2.2 2005/07/13 18:39:47 arthchan2003
65  * (For Fun) Remove the hmm_t hack. Consider each s2 global functions one-by-one and replace them by sphinx 3's macro. There are 8 minor HACKs where functions need to be removed temporarily. Also, there are three major hacks. 1, there are no concept of "phone" in sphinx3 dict_t, there is only ciphone. That is to say we need to build it ourselves. 2, sphinx2 dict_t will be a bunch of left and right context tables. This is currently bypass. 3, the fsg routine is using fsg_hmm_t which is just a duplication of CHAN_T in sphinx2, I will guess using hmm_evaluate should be a good replacement. But I haven't figure it out yet.
66  *
67  * Revision 1.1.2.1 2005/06/27 05:26:29 arthchan2003
68  * Sphinx 2 fsg mainpulation routines. Compiled with faked functions. Currently fended off from users.
69  *
70  * Revision 1.1 2004/07/16 00:57:12 egouvea
71  * Added Ravi's implementation of FSG support.
72  *
73  * Revision 1.3 2004/06/25 14:49:08 rkm
74  * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state
75  *
76  * Revision 1.2 2004/05/27 14:22:57 rkm
77  * FSG cross-word triphones completed (but for single-phone words)
78  *
79  * Revision 1.1.1.1 2004/03/01 14:30:31 rkm
80  *
81  *
82  * Revision 1.2 2004/02/27 15:05:21 rkm
83  * *** empty log message ***
84  *
85  * Revision 1.1 2004/02/23 15:53:45 rkm
86  * Renamed from fst to fsg
87  *
88  * Revision 1.4 2004/02/23 15:09:50 rkm
89  * *** empty log message ***
90  *
91  * Revision 1.3 2004/02/19 21:16:54 rkm
92  * Added fsg_search.{c,h}
93  *
94  * Revision 1.2 2004/02/18 15:02:34 rkm
95  * Added fsg_lextree.{c,h}
96  *
97  * Revision 1.1 2004/02/17 21:11:49 rkm
98  * *** empty log message ***
99  *
100  *
101  * 09-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon
102  * Started.
103  */
104 
105 
106 #ifndef __S2_FSG_PSUBTREE_H__
107 #define __S2_FSG_PSUBTREE_H__
108 
109 
110 #include <stdio.h>
111 
112 #include <cmd_ln.h>
113 #include <logmath.h>
114 
115 #include "s3types.h"
116 #include "word_fsg.h"
117 #include "fsg.h"
118 #include "hmm.h"
119 #include "dict.h"
120 #include "mdef.h"
121 
122 
123 #ifdef __cplusplus
124 extern "C" {
125 #endif
126 #if 0
127 /* Fool Emacs. */
128 }
129 #endif
130 
131 /*
132  * **HACK-ALERT**!! Compile-time constant determining the size of the
133  * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.)
134  * But it makes memory allocation simpler and more efficient.
135  */
136 #define FSG_PNODE_CTXT_BVSZ 2
137 
138 typedef struct {
141 
142 
159 typedef struct fsg_pnode_s {
167  union {
168  struct fsg_pnode_s *succ;
170  } next;
171 
172  /*
173  * For simplicity of memory management (i.e., freeing the pnodes), all
174  * pnodes allocated for all transitions out of a state are maintained in a
175  * linear linked list through the alloc_next pointer.
176  */
178 
179  /*
180  * The next node that is also a child of the parent of this node; NULL if
181  * none.
182  */
184 
185  /*
186  * The transition (log) probability to be incurred upon transitioning to
187  * this node. (Transition probabilities are really associated with the
188  * transitions. But a lextree node has exactly one incoming transition.
189  * Hence, the prob can be associated with the node.)
190  * This is a logs2(prob) value, and includes the language weight.
191  */
192  int32 logs2prob;
193 
194  /*
195  * The root and leaf positions associated with any transition have to deal
196  * with multiple phonetic contexts. However, different contexts may result
197  * in the same SSID (senone-seq ID), and can share a single pnode with that
198  * SSID. But the pnode should track the set of context CI phones that share
199  * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For
200  * simplicity of implementation, its size is a compile-time constant for
201  * now.) Single phone words would need a 2-D array of context, but that's
202  * too expensive. For now, they simply use SIL as right context, so only
203  * the left context is properly modelled.
204  * (For word-internal phones, this field is unused, of course.)
205  */
207 
208  uint8 ci_ext; /* This node's CIphone as viewed externally (context) */
209  uint8 ppos; /* Phoneme position in pronunciation */
210  uint8 leaf; /* Whether this is a leaf node */
211 
212  /* HMM-state-level stuff here */
214 } fsg_pnode_t;
215 
216 /* Access macros */
217 #define fsg_pnode_leaf(p) ((p)->leaf)
218 #define fsg_pnode_logs2prob(p) ((p)->logs2prob)
219 #define fsg_pnode_succ(p) ((p)->next.succ)
220 #define fsg_pnode_fsglink(p) ((p)->next.fsglink)
221 #define fsg_pnode_sibling(p) ((p)->sibling)
222 #define fsg_pnode_hmmptr(p) (&((p)->hmm))
223 #define fsg_pnode_ci_ext(p) ((p)->ci_ext)
224 #define fsg_pnode_ppos(p) ((p)->ppos)
225 #define fsg_pnode_leaf(p) ((p)->leaf)
226 #define fsg_pnode_ctxt(p) ((p)->ctxt)
227 
228 #define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f)))
229 
230 
238  word_fsg_t *fsg,
239  int32 from_state,
240  fsg_pnode_t **alloc_head,
241  cmd_ln_t *config,
242  logmath_t *logmath
243  );
244 
245 
250 void fsg_psubtree_free (fsg_pnode_t *alloc_head);
251 
252 
253 /*
254  * Dump the list of nodes in the given lextree to the given file. alloc_head:
255  * head of linear list of allocated nodes updated by fsg_psubtree_init().
256  */
257 void fsg_psubtree_dump (fsg_pnode_t *alloc_head, FILE *fp,
258  dict_t *dict, mdef_t *mdef
259  );
260 
261 
262 /*
263  * Attempt to transition into the given node with the given attributes.
264  * If the node is already active in the given frame with a score better
265  * than the incoming score, nothing is done. Otherwise the transition is
266  * successful.
267  * Return value: TRUE if the node was newly activated for the given frame,
268  * FALSE if it was already activated for that frame (whether the incoming
269  * transition was successful or not).
270  */
272  int32 score,
273  int32 frame,
274  int32 bpidx);
275 
276 
277 /*
278  * Mark the given pnode as inactive (for search).
279  */
281 
282 
283 /* Set all flags on in the given context bitvector */
285 
286 /*
287  * Subtract bitvector sub from bitvector src (src updated with the result).
288  * Return 0 if result is all 0, non-zero otherwise.
289  */
291 
292 #ifdef __cplusplus
293 }
294 #endif
295 
296 
297 #endif
strcture for storing the model definition.
Definition: mdef.h:184
Definition: fsg_psubtree.h:138
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
#define FSG_PNODE_CTXT_BVSZ
Definition: fsg_psubtree.h:136
uint8 leaf
Definition: fsg_psubtree.h:210
Definition: word_fsg.h:187
void fsg_psubtree_dump(fsg_pnode_t *alloc_head, FILE *fp, dict_t *dict, mdef_t *mdef)
An individual HMM among the HMM search space.
Operations on dictionary.
int fsg_psubtree_pnode_enter(fsg_pnode_t *pnode, int32 score, int32 frame, int32 bpidx)
word_fsglink_t * fsglink
Definition: fsg_psubtree.h:169
union fsg_pnode_s::@3 next
uint8 ppos
Definition: fsg_psubtree.h:209
struct fsg_pnode_s * succ
Definition: fsg_psubtree.h:168
HMM data structure and operation.
uint8 ci_ext
Definition: fsg_psubtree.h:208
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Size definition of semantically units. Common for both s3 and s3.X decoder.
Shared information between a set of HMMs.
struct fsg_pnode_s * sibling
Definition: fsg_psubtree.h:183
hmm_t hmm
Definition: fsg_psubtree.h:213
uint32 fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub)
a structure for a dictionary.
Definition: dict.h:146
void fsg_psubtree_free(fsg_pnode_t *alloc_head)
an fsg node. All transitions (words) out of any given FSG state represented are by a phonetic prefix ...
Definition: fsg_psubtree.h:159
int32 logs2prob
Definition: fsg_psubtree.h:192
Model definition.
fsg_pnode_t * fsg_psubtree_init(hmm_context_t *ctx, word_fsg_t *fsg, int32 from_state, fsg_pnode_t **alloc_head, cmd_ln_t *config, logmath_t *logmath)
struct fsg_pnode_s fsg_pnode_t
struct fsg_pnode_s * alloc_next
Definition: fsg_psubtree.h:177
fsg_pnode_ctxt_t ctxt
Definition: fsg_psubtree.h:206