MythTV  master
teletextextractorreader.cpp
Go to the documentation of this file.
1 // -*- Mode: c++ -*-
2 
4 
5 void TeletextExtractorReader::PageUpdated(int page, int subpage)
6 {
7  m_updatedPages.insert(qMakePair(page, subpage));
8  TeletextReader::PageUpdated(page, subpage);
9 }
10 
12  int page, int subpage, tt_line_array& page_ptr, int lang)
13 {
14  m_updatedPages.insert(qMakePair(page, subpage));
15  TeletextReader::HeaderUpdated(page, subpage, page_ptr, lang);
16 }
17 
18 /************************************************************************
19  * Everything below this message in this file is based on some VLC
20  * teletext code which was in turn based on some ProjectX teletext code.
21  ************************************************************************/
22 
23 /*****************************************************************************
24  * telx.c : Minimalistic Teletext subtitles decoder
25  *****************************************************************************
26  * Copyright (C) 2007 Vincent Penne
27  * Some code converted from ProjectX java dvb decoder (c) 2001-2005 by dvb.matt
28  * $Id: 2b01e6a460b7c3693bccd690e3dbc018832d2777 $
29  *
30  * This program is free software; you can redistribute it and/or modify
31  * it under the terms of the GNU General Public License as published by
32  * the Free Software Foundation; either version 2 of the License, or
33  * (at your option) any later version.
34  *
35  * This program is distributed in the hope that it will be useful,
36  * but WITHOUT ANY WARRANTY; without even the implied warranty of
37  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38  * GNU General Public License for more details.
39  *
40  * You should have received a copy of the GNU General Public License
41  * along with this program; if not, write to the Free Software
42  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
43  *****************************************************************************/
44 
45 /*
46  * My doc only mentions 13 national characters, but experiments show there
47  * are more, in france for example I already found two more (0x9 and 0xb).
48  *
49  * Conversion is in this order :
50  *
51  * 0x23 0x24 0x40 0x5b 0x5c 0x5d 0x5e 0x5f 0x60 0x7b 0x7c 0x7d 0x7e
52  * (these are the standard ones)
53  * 0x08 0x09 0x0a 0x0b 0x0c 0x0d (apparently a control character) 0x0e 0x0f
54  */
55 
56 using ppi_natl_array = std::array<const uint16_t,20>;
57 static const std::array<const ppi_natl_array,13> ppi_national_subsets
58 {{
59  { 0x00a3, 0x0024, 0x0040, 0x00ab, 0x00bd, 0x00bb, 0x005e, 0x0023,
60  0x002d, 0x00bc, 0x00a6, 0x00be, 0x00f7 }, /* english, 000 */
61 
62  { 0x0023, 0x0024, 0x00a7, 0x00c4, 0x00d6, 0x00dc, 0x005e, 0x005f,
63  0x00b0, 0x00e4, 0x00f6, 0x00fc, 0x00df }, /* german, 001 */
64 
65  { 0x0023, 0x00a4, 0x00c9, 0x00c4, 0x00d6, 0x00c5, 0x00dc, 0x005f,
66  0x00e9, 0x00e4, 0x00f6, 0x00e5, 0x00fc
67  }, /* swedish, finnish, hungarian, 010 */
68 
69  { 0x00a3, 0x0024, 0x00e9, 0x00b0, 0x00e7, 0x00bb, 0x005e, 0x0023,
70  0x00f9, 0x00e0, 0x00f2, 0x00e8, 0x00ec }, /* italian, 011 */
71 
72  { 0x00e9, 0x00ef, 0x00e0, 0x00eb, 0x00ea, 0x00f9, 0x00ee, 0x0023,
73  0x00e8, 0x00e2, 0x00f4, 0x00fb, 0x00e7, 0, 0x00eb, 0, 0x00ef
74  }, /* french, 100 */
75 
76  { 0x00e7, 0x0024, 0x00a1, 0x00e1, 0x00e9, 0x00ed, 0x00f3, 0x00fa,
77  0x00bf, 0x00fc, 0x00f1, 0x00e8, 0x00e0 }, /* portuguese, spanish, 101 */
78 
79  { 0x0023, 0x016f, 0x010d, 0x0165, 0x017e, 0x00fd, 0x00ed, 0x0159,
80  0x00e9, 0x00e1, 0x011b, 0x00fa, 0x0161 }, /* czech, slovak, 110 */
81 
82  { 0x0023, 0x00a4, 0x0162, 0x00c2, 0x015e, 0x0102, 0x00ce, 0x0131,
83  0x0163, 0x00e2, 0x015f, 0x0103, 0x00ee }, /* rumanian, 111 */
84 
85  /* I have these tables too, but I don't know how they can be triggered */
86  { 0x0023, 0x0024, 0x0160, 0x0117, 0x0119, 0x017d, 0x010d, 0x016b,
87  0x0161, 0x0105, 0x0173, 0x017e, 0x012f }, /* lettish, lithuanian, 1000 */
88 
89  { 0x0023, 0x0144, 0x0105, 0x005a, 0x015a, 0x0141, 0x0107, 0x00f3,
90  0x0119, 0x017c, 0x015b, 0x0142, 0x017a }, /* polish, 1001 */
91 
92  { 0x0023, 0x00cb, 0x010c, 0x0106, 0x017d, 0x0110, 0x0160, 0x00eb,
93  0x010d, 0x0107, 0x017e, 0x0111, 0x0161
94  }, /* serbian, croatian, slovenian, 1010 */
95 
96  { 0x0023, 0x00f5, 0x0160, 0x00c4, 0x00d6, 0x017e, 0x00dc, 0x00d5,
97  0x0161, 0x00e4, 0x00f6, 0x017e, 0x00fc }, /* estonian, 1011 */
98 
99  { 0x0054, 0x011f, 0x0130, 0x015e, 0x00d6, 0x00c7, 0x00dc, 0x011e,
100  0x0131, 0x015f, 0x00f6, 0x00e7, 0x00fc }, /* turkish, 1100 */
101 }};
102 
103 // utc-2 --> utf-8
104 // this is not a general function, but it's enough for what we do here
105 // the result buffer need to be at least 4 bytes long
106 static void to_utf8(std::string &res, uint16_t ch)
107 {
108  if(ch >= 0x80)
109  {
110  if(ch >= 0x800)
111  {
112  res = { static_cast<char>( (ch >> 12) | 0xE0),
113  static_cast<char>(((ch >> 6) & 0x3F) | 0x80),
114  static_cast<char>( (ch & 0x3F) | 0x80) };
115  }
116  else
117  {
118  res = { static_cast<char>((ch >> 6) | 0xC0),
119  static_cast<char>((ch & 0x3F) | 0x80) } ;
120  }
121  }
122  else
123  {
124  res = { static_cast<char>(ch) };
125  }
126 }
127 
132 QString decode_teletext(int codePage, const tt_line_array& data)
133 {
134  QString res;
135  std::string utf8 {};
136 
137  const ppi_natl_array pi_active_national_set = ppi_national_subsets[codePage];
138 
139  for (int i = 0; i < 40; ++i)
140  {
141  //int in = bytereverse(data[i]) & 0x7f;
142  int in = data[i] & 0x7f;
143  uint16_t out = 32;
144 
145  switch (in)
146  {
147  /* special national characters */
148  case 0x23:
149  out = pi_active_national_set[0];
150  break;
151  case 0x24:
152  out = pi_active_national_set[1];
153  break;
154  case 0x40:
155  out = pi_active_national_set[2];
156  break;
157  case 0x5b:
158  out = pi_active_national_set[3];
159  break;
160  case 0x5c:
161  out = pi_active_national_set[4];
162  break;
163  case 0x5d:
164  out = pi_active_national_set[5];
165  break;
166  case 0x5e:
167  out = pi_active_national_set[6];
168  break;
169  case 0x5f:
170  out = pi_active_national_set[7];
171  break;
172  case 0x60:
173  out = pi_active_national_set[8];
174  break;
175  case 0x7b:
176  out = pi_active_national_set[9];
177  break;
178  case 0x7c:
179  out = pi_active_national_set[10];
180  break;
181  case 0x7d:
182  out = pi_active_national_set[11];
183  break;
184  case 0x7e:
185  out = pi_active_national_set[12];
186  break;
187 
188  case 0x0a:
189  case 0x0b:
190  case 0x0d:
191  //wtf? looks like some kind of garbage for me
192  out = 32;
193  break;
194 
195  default:
196  /* non documented national range 0x08 - 0x0f */
197  if (in >= 0x08 && in <= 0x0f)
198  {
199  out = pi_active_national_set[13 + in - 8];
200  break;
201  }
202 
203  /* normal ascii */
204  if (in > 32 && in < 0x7f)
205  out = in;
206  }
207 
208  /* handle undefined national characters */
209  if (out == 0)
210  out = '?'; //' ' or '?' ?
211 
212  /* convert to utf-8 */
213  to_utf8(utf8, out);
214  res += QString::fromUtf8(utf8.c_str());
215  }
216 
217  return res;
218 }
219 
220 //QString DechiperTtxFlags(int flags) {
221 // QString res;
222 
223 // if (flags & TP_SUPPRESS_HEADER)
224 // res += "TP_SUPPRESS_HEADER ";
225 // if (flags & TP_UPDATE_INDICATOR)
226 // res += "TP_UPDATE_INDICATOR ";
227 // if (flags & TP_INTERRUPTED_SEQ)
228 // res += "TP_INTERRUPTED_SEQ ";
229 // if (flags & TP_INHIBIT_DISPLAY)
230 // res += "TP_INHIBIT_DISPLAY ";
231 // if (flags & TP_MAGAZINE_SERIAL)
232 // res += "TP_MAGAZINE_SERIAL ";
233 // if (flags & TP_ERASE_PAGE)
234 // res += "TP_ERASE_PAGE ";
235 // if (flags & TP_NEWSFLASH)
236 // res += "TP_NEWSFLASH ";
237 // if (flags & TP_SUBTITLE)
238 // res += "TP_SUBTITLE ";
239 
240 // return res.trimmed();
241 //}
242 
243 /* vim: set expandtab tabstop=4 shiftwidth=4: */
TeletextExtractorReader::HeaderUpdated
void HeaderUpdated(int page, int subpage, tt_line_array &page_ptr, int lang) override
Definition: teletextextractorreader.cpp:11
ppi_natl_array
std::array< const uint16_t, 20 > ppi_natl_array
Definition: teletextextractorreader.cpp:56
TeletextReader::PageUpdated
virtual void PageUpdated(int page, int subpage)
Definition: teletextreader.cpp:508
teletextextractorreader.h
to_utf8
static void to_utf8(std::string &res, uint16_t ch)
Definition: teletextextractorreader.cpp:106
decode_teletext
QString decode_teletext(int codePage, const tt_line_array &data)
Get decoded ttx as a string.
Definition: teletextextractorreader.cpp:132
TeletextExtractorReader::PageUpdated
void PageUpdated(int page, int subpage) override
Definition: teletextextractorreader.cpp:5
TeletextReader::HeaderUpdated
virtual void HeaderUpdated(int page, int subpage, tt_line_array &page_ptr, int lang)
Definition: teletextreader.cpp:517
tt_line_array
std::array< uint8_t, 40 > tt_line_array
Definition: teletextreader.h:36
ppi_national_subsets
static const std::array< const ppi_natl_array, 13 > ppi_national_subsets
Definition: teletextextractorreader.cpp:58
TeletextExtractorReader::m_updatedPages
QSet< QPair< int, int > > m_updatedPages
Definition: teletextextractorreader.h:34
uint16_t
unsigned short uint16_t
Definition: iso6937tables.h:3