MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <array>
36 #include <cctype>
37 #include <cstdio>
38 #include <cstdlib>
39 #include <cstring>
40 #include <iostream>
41 #include <fcntl.h>
42 #include <cctype>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <unistd.h>
47 
48 #include "mythlogging.h"
49 
50 #define LOG_MODULE "demux_sputext"
51 #define LOG_VERBOSE
52 /*
53 #define LOG
54 */
55 
56 #define ERR ((void *)-1)
57 #define LINE_LEN 1000
58 #define LINE_LEN_QUOT "1000"
59 
60 #ifdef _WIN32
61  #include <stdlib.h>
62  #include <ctype.h>
63 
64  char *strcasestr(const char *str, const char *pattern) {
65  size_t i;
66 
67  if (!*pattern)
68  return (char*)str;
69 
70  for (; *str; str++) {
71  if (toupper((unsigned char)*str) == toupper((unsigned char)*pattern)) {
72  for (i = 1;; i++) {
73  if (!pattern[i])
74  return (char*)str;
75  if (toupper((unsigned char)str[i]) != toupper((unsigned char)pattern[i]))
76  break;
77  }
78  }
79  }
80  return NULL;
81  }
82 #endif
83 
84 /*
85  * Demuxer code start
86  */
87 
88 static bool isEol(char p) {
89  return (p=='\r' || p=='\n' || p=='\0');
90 }
91 
92 static inline void trail_space(std::string& str)
93 {
94  auto mark = str.find_last_of(" \t\r\n");
95  if (mark != std::string::npos)
96  str.erase(mark);
97  mark = str.find_first_not_of(" \t\r\n");
98  if (mark != std::string::npos)
99  str.erase(0, mark);
100 }
101 
102 /*
103  *
104  */
105 static char *read_line_from_input(demux_sputext_t *demuxstr, std::string& line) {
106 
107  line.reserve(LINE_LEN);
108  if ((line.capacity() - demuxstr->buf.size()) > 512) {
109  off_t nread = line.capacity() - demuxstr->buf.size();
110  nread = std::min(nread, demuxstr->rbuffer_len - demuxstr->rbuffer_cur);
111  if (nread < 0) {
112  printf("read failed.\n");
113  return nullptr;
114  }
115  if (nread > 0) {
116  demuxstr->buf.append(&demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
117  nread);
118  demuxstr->rbuffer_cur += nread;
119  }
120  }
121 
122  size_t index = demuxstr->buf.find('\n');
123  if (index != std::string::npos) {
124  line.assign(demuxstr->buf, 0, index+1);
125  demuxstr->buf.erase(0, index+1);
126  return line.data();
127  }
128  if (!demuxstr->buf.empty()) {
129  line = demuxstr->buf;
130  demuxstr->buf.clear();
131  return line.data();
132  }
133 
134  return nullptr;
135 }
136 
137 
139 
140  static std::string s_line;
141  static char *s_s = nullptr;
142  std::string text;
143 
144  current->start = 0;
145  current->end = -1;
146  int state = 0;
147 
148  /* read the first line */
149  if (!s_s)
150  if (!(s_s = read_line_from_input(demuxstr, s_line))) return nullptr;
151 
152  do {
153  switch (state) {
154 
155  case 0: /* find "START=" */
156  s_s = strcasestr (s_s, "Start=");
157  if (s_s) {
158  current->start = strtol (s_s + 6, &s_s, 0) / 10;
159  state = 1; continue;
160  }
161  break;
162 
163  case 1: /* find "<P" */
164  if ((s_s = strcasestr (s_s, "<P"))) { s_s += 2; state = 2; continue; }
165  break;
166 
167  case 2: /* find ">" */
168  if ((s_s = strchr (s_s, '>'))) { s_s++; state = 3; text.clear(); continue; }
169  break;
170 
171  case 3: /* get all text until '<' appears */
172  if (*s_s == '\0') { break; }
173  else if (strncasecmp (s_s, "&nbsp;", 6) == 0) { text += ' '; s_s += 6; }
174  else if (*s_s == '\r') { s_s++; }
175  else if (strncasecmp (s_s, "<br>", 4) == 0 || *s_s == '\n') {
176  trail_space (text);
177  if (!text.empty())
178  current->text.push_back(text);
179  text.clear();
180  if (*s_s == '\n') s_s++; else s_s += 4;
181  }
182  else if (*s_s == '<') { state = 4; }
183  else text += *s_s++;
184  continue;
185 
186  case 4: /* get current->end or skip <TAG> */
187  char *q = strcasestr (s_s, "start=");
188  if (q) {
189  current->end = strtol (q + 6, &q, 0) / 10 - 1;
190  trail_space (text);
191  if (!text.empty())
192  current->text.push_back(text);
193  if (!current->text.empty()) { state = 99; break; }
194  state = 0; continue;
195  }
196  s_s = strchr (s_s, '>');
197  if (s_s) { s_s++; state = 3; continue; }
198  break;
199  }
200 
201  /* read next line */
202  if (state != 99 && !(s_s = read_line_from_input (demuxstr, s_line)))
203  return nullptr;
204 
205  } while (state != 99);
206 
207  return current;
208 }
209 
210 
211 
224 static char *sub_readtext(char *source, std::string& dest) {
225  if (source == nullptr)
226  return nullptr;
227 
228  int len=0;
229  char *p=source;
230 
231  while ( !isEol(*p) && *p!= '|' ) {
232  p++,len++;
233  }
234 
235  dest.assign(source, len);
236 
237  while (*p=='\r' || *p=='\n' || *p=='|')
238  p++;
239 
240  if (*p) return p; /* not-last text field */
241  return (char*)nullptr; /* last text field */
242 }
243 
245 
246  std::string line; line.reserve(LINE_LEN + 1);
247  std::string line2; line2.reserve(LINE_LEN + 1);
248 
249  current->end=-1;
250  do {
251  if (!read_line_from_input (demuxstr, line)) return nullptr;
252  } while ((sscanf (line.c_str(), "{%" SCNd64 "}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2.data()) !=2) &&
253  (sscanf (line.c_str(), "{%" SCNd64 "}{%" SCNd64 "}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2.data()) !=3)
254  );
255 
256  char *next=line2.data();
257  std::string out {};
258  while ((next = sub_readtext (next, out))) {
259  if (next==ERR) return (subtitle_t *)ERR;
260  current->text.push_back(out);
261  }
262  current->text.push_back(out);
263 
264  return current;
265 }
266 
268 
269  std::string line;
270  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
271 
272  while (true) {
273  if (!read_line_from_input(demuxstr, line)) return nullptr;
274  if (sscanf (line.c_str(), "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
275  if (sscanf (line.c_str(), "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
276  continue;
277  }
278  current->start = a1*360000+a2*6000+a3*100+a4;
279  current->end = b1*360000+b2*6000+b3*100+b4;
280 
281  if (!read_line_from_input(demuxstr, line))
282  return nullptr;
283 
284  char *p=line.data();
285  while (true) {
286  char *q=nullptr;
287  int len = 0;
288  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
289  (strncasecmp(p,"[br]",4) != 0); p++,len++);
290  current->text.emplace_back(q, len);
291  if (!*p || *p=='\r' || *p=='\n') break;
292  if (*p=='[') while (*p++!=']');
293  if (*p=='|') p++;
294  }
295  break;
296  }
297  return current;
298 }
299 
301  std::string line;
302  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
303  int i = 0;
304 
305  do {
306  if(!read_line_from_input(demuxstr,line))
307  return nullptr;
308  i = sscanf(line.c_str(),"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
309  } while(i < 8);
310  current->start = a1*360000+a2*6000+a3*100+a4/10;
311  current->end = b1*360000+b2*6000+b3*100+b4/10;
312  bool end_sub = false;
313  do {
314  char *p = nullptr; /* pointer to the curently read char */
315  std::string temp_line; /* subtitle line that will be transfered to current->text[i] */
316  temp_line.reserve(SUB_BUFSIZE);
317  if(!read_line_from_input(demuxstr,line))
318  return (!current->text.empty()) ? current : nullptr;
319  for (p=line.data(); *p!='\0' && !end_sub; p++) {
320  bool eol = false;
321  switch(*p) {
322  case '\\':
323  if(*(p+1)=='N' || *(p+1)=='n') {
324  eol = true;
325  p++;
326  } else
327  temp_line += *p;
328  break;
329  case '{':
330  // The different code for these if/else clauses is ifdef'd out.
331  // NOLINTNEXTLINE(bugprone-branch-clone)
332  if(strncmp(p,"{\\i1}",5) == 0) {
333 #if 0 /* italic not implemented in renderer, ignore them for now */
334  temp_line.append("<i>");
335 #endif
336  p+=4;
337  }
338  else if(strncmp(p,"{\\i0}",5) == 0) {
339 #if 0 /* italic not implemented in renderer, ignore them for now */
340  temp_line.append("</i>");
341 #endif
342  p+=4;
343  }
344  else
345  temp_line += *p;
346  break;
347  case '\r': /* just ignore '\r's */
348  break;
349  case '\n':
350  eol = true;
351  break;
352  default:
353  temp_line += *p;
354  break;
355  }
356  if (eol) {
357  if (!temp_line.empty())
358  {
359  current->text.push_back(temp_line);
360  temp_line.clear();
361  } else {
362  end_sub = true;
363  }
364  }
365  }
366  } while (!end_sub);
367  return current;
368 }
369 
371  std::string line;
372  int a1=0,a2=0,a3=0,b1=0,b2=0,b3=0; // NOLINT(readability-isolate-declaration)
373 
374  while (current->text.empty()) {
375  if( demuxstr->next_line.empty() ) {
376  /* if the buffer is empty.... */
377  if( !read_line_from_input(demuxstr, line) ) return nullptr;
378  } else {
379  /* ... get the current line from buffer. */
380  line = demuxstr->next_line;
381  demuxstr->next_line.clear();
382  }
383  /* Initialize buffer with next line */
384  if( ! read_line_from_input( demuxstr, demuxstr->next_line) ) {
385  demuxstr->next_line.clear();
386  return nullptr;
387  }
388  if( (sscanf( line.c_str(), "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
389  (sscanf( demuxstr->next_line.c_str(), "%d:%d:%d:", &b1, &b2, &b3) < 3) )
390  continue;
391  current->start = a1*360000+a2*6000+a3*100;
392  current->end = b1*360000+b2*6000+b3*100;
393  if ((current->end - current->start) > LINE_LEN)
394  current->end = current->start + LINE_LEN; /* not too long though. */
395  /* teraz czas na wkopiowanie stringu */
396  char *p=line.data();
397  /* finds the body of the subtitle_t */
398  for (int i=0; i<3; i++){
399  char *p2=strchr( p, ':');
400  if( p2 == nullptr ) break;
401  p=p2+1;
402  }
403 
404  char *next=p;
405  std::string out {};
406  while( (next = sub_readtext( next, out )) ) {
407  if (next==ERR)
408  return (subtitle_t *)ERR;
409  current->text.push_back(out);
410  }
411  current->text.push_back(out);
412  }
413  return current;
414 }
415 
417  /*
418  * TODO: This format uses quite rich (sub/super)set of xhtml
419  * I couldn't check it since DTD is not included.
420  * WARNING: full XML parses can be required for proper parsing
421  */
422  std::string line;
423  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
424  int plen = 0;
425 
426  while (current->text.empty()) {
427  if (!read_line_from_input(demuxstr, line)) return nullptr;
428  /*
429  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
430  * to describe the same moment in time. Maybe there are even more formats in use.
431  */
432  if (sscanf (line.c_str(), R"(<Time Begin="%d:%d:%d.%d" End="%d:%d:%d.%d")",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
433 
434  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
435  if (
436  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&plen) < 4) &&
437  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&b4,&plen) < 5) &&
438  /* (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen) < 5) && */
439  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d.%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&a4,&b2,&b3,&b4,&plen) < 6) &&
440  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d:%d.%d" %*[Ee]nd="%d:%d:%d.%d"%*[^<]<clear/>%n)",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen) < 8)
441  )
442  continue;
443  current->start = a1*360000+a2*6000+a3*100+a4/10;
444  current->end = b1*360000+b2*6000+b3*100+b4/10;
445  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
446  size_t index = line.find("<clear/>");
447  char *next = (index != std::string::npos) ? &line[index+8] : nullptr;
448  std::string out {};
449  while ((next = sub_readtext (next, out))) {
450  if (next==ERR)
451  return (subtitle_t *)ERR;
452  current->text.push_back(out);
453  }
454  current->text.push_back(out);
455  }
456  return current;
457 }
458 
460  int comma = 0;
461  static int s_maxComma = 32; /* let's use 32 for the case that the */
462  /* amount of commas increase with newer SSA versions */
463 
464  int hour1 = 0;
465  int min1 = 0;
466  int sec1 = 0;
467  int hunsec1 = 0;
468  int hour2 = 0;
469  int min2 = 0;
470  int sec2 = 0;
471  int hunsec2 = 0;
472  int nothing = 0;
473  std::string line;
474  std::string line3; line3.resize(LINE_LEN);
475  char *tmp = nullptr;
476 
477  do {
478  if (!read_line_from_input(demuxstr, line)) return nullptr;
479  } while (sscanf (line.data(), "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
480  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
481  &hour1, &min1, &sec1, &hunsec1,
482  &hour2, &min2, &sec2, &hunsec2,
483  line3.data()) < 9
484  &&
485  sscanf (line.data(), "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
486  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
487  &hour1, &min1, &sec1, &hunsec1,
488  &hour2, &min2, &sec2, &hunsec2,
489  line3.data()) < 9 );
490 
491  size_t index = line3.find(',');
492  if (index == std::string::npos)
493  return nullptr;
494  char *line2 = &line3[index];
495 
496  for (comma = 4; comma < s_maxComma; comma ++)
497  {
498  tmp = line2;
499  if(!(tmp=strchr(++tmp, ','))) break;
500  if(*(++tmp) == ' ') break;
501  /* a space after a comma means we're already in a sentence */
502  line2 = tmp;
503  }
504 
505  if(comma < s_maxComma)s_maxComma = comma;
506  /* eliminate the trailing comma */
507  if(*line2 == ',') line2++;
508 
509  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
510  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
511 
512  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
513  current->text.emplace_back(line2, tmp-line2);
514  line2=tmp+2;
515  }
516 
517  current->text.emplace_back(line2);
518 
519  return current;
520 }
521 
522 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
523  * From MPlayer subreader.c :
524  *
525  * PJS subtitles reader.
526  * That's the "Phoenix Japanimation Society" format.
527  * I found some of them in http://www.scriptsclub.org/ (used for anime).
528  * The time is in tenths of second.
529  *
530  * by set, based on code by szabi (dunnowhat sub format ;-)
531  */
532 
534  std::string line;
535 
536  if (!read_line_from_input(demuxstr, line))
537  return nullptr;
538  size_t mark = line.find_first_not_of(" \t\r\n");
539  if (mark != std::string::npos)
540  line.erase(0, mark);
541  if (line.empty())
542  return nullptr;
543  if (sscanf (line.data(), "%" SCNd64 ",%" SCNd64 ",", &(current->start),
544  &(current->end)) <2)
545  return (subtitle_t *)ERR;
546  /* the files I have are in tenths of second */
547  current->start *= 10;
548  current->end *= 10;
549 
550  /* copy the string to the text buffer */
551  auto start = line.find('\"');
552  if (start == std::string::npos)
553  return (subtitle_t *)ERR;
554  auto end = line.find('\"', start + 1);
555  if (end == std::string::npos)
556  return (subtitle_t *)ERR;
557  current->text.push_back(line.substr(start+1, end));
558 
559  return current;
560 }
561 
563  std::string line;
564  float a = NAN;
565  float b = NAN;
566 
567  do {
568  if (!read_line_from_input(demuxstr, line))
569  return nullptr;
570  } while (sscanf (line.c_str(), "%f %f", &a, &b) !=2);
571 
572  demuxstr->mpsub_position += (a*100.0F);
573  current->start = (int) demuxstr->mpsub_position;
574  demuxstr->mpsub_position += (b*100.0F);
575  current->end = (int) demuxstr->mpsub_position;
576 
577  while (true) {
578  if (!read_line_from_input(demuxstr, line))
579  return (!current->text.empty()) ? current : nullptr;
580 
581  size_t mark = line.find_first_not_of(" \t\r\n");
582  if (mark != std::string::npos)
583  line.erase(0, mark);
584 
585  if (isEol(line[0]) && !current->text.empty())
586  return current;
587 
588  if (isEol(line[0]))
589  return nullptr;
590 
591  char *q = nullptr;
592  for (q=line.data(); !isEol(*q); q++);
593  *q='\0';
594  line.resize(strlen(line.c_str()));
595  if (!line.empty()) {
596  current->text.push_back(line);
597  /* printf(">%s<\n",line.data()); */
598  } else {
599  if (!current->text.empty())
600  return current;
601  return nullptr;
602  }
603  }
604 
605  return nullptr;
606 }
607 
609  std::string line;
610 
611  while (true) {
612  /* try to locate next subtitle_t */
613  if (!read_line_from_input(demuxstr, line))
614  return nullptr;
615  if (!(sscanf (line.c_str(), "-->> %" SCNd64, &(current->start)) <1))
616  break;
617  }
618 
619  while (read_line_from_input(demuxstr, line))
620  {
621  std::string out {};
622  sub_readtext(line.data(),out);
623  if (out.empty())
624  break;
625  current->text.push_back(out);
626  current->end = -1;
627  }
628  return (!current->text.empty())? current : nullptr;
629 }
630 
632  std::string line1;
633  std::string line2;
634  std::string directive; directive.resize(LINE_LEN);
635  char *p = nullptr;
636  char *q = nullptr;
637  unsigned a1=0, a2=0, a3=0, a4=0, b1=0, b2=0, b3=0, b4=0; // NOLINT(readability-isolate-declaration)
638  unsigned comment = 0;
639  static uint32_t s_jacoTimeRes = 30;
640  static uint32_t s_jacoShift = 0;
641 
642  while (current->text.empty()) {
643  if (!read_line_from_input(demuxstr, line1)) {
644  return nullptr;
645  }
646  // Support continuation lines
647  if (line1.size() >= 2) {
648  while ((line1[line1.size()-2] == '\\') && (line1[line1.size()-1] == '\n')) {
649  line1.resize(line1.size()-2);
650  if (!read_line_from_input(demuxstr, line2))
651  return nullptr;
652  size_t index = line2.find_first_not_of(" \t\r\n");
653  if (index != std::string::npos)
654  line2.erase(0, index);
655  line1 += line2;
656  }
657  }
658  line2.resize(0);
659  line2.resize(LINE_LEN);
660  if (sscanf
661  (line1.c_str(), "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
662  &b1, &b2, &b3, &b4, line2.data()) < 9) {
663  if (sscanf(line1.data(), "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2.data()) < 3) {
664  if (line1[0] == '#') {
665  int hours = 0;
666  int minutes = 0;
667  int seconds = 0;
668  int delta = 0;
669  uint32_t units = s_jacoShift;
670  switch (toupper(line1[1])) {
671  case 'S':
672  if (isalpha(line1[2])) {
673  delta = 6;
674  } else {
675  delta = 2;
676  }
677  if (sscanf(&line1[delta], "%d", &hours)) {
678  int inverter = 1;
679  if (hours < 0) {
680  hours *= -1;
681  inverter = -1;
682  }
683  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
684  if (sscanf
685  (&line1[delta], "%*d:%*d:%d",
686  &seconds)) {
687  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
688  &units);
689  } else {
690  hours = 0;
691  sscanf(&line1[delta], "%d:%d.%u",
692  &minutes, &seconds, &units);
693  minutes *= inverter;
694  }
695  } else {
696  hours = minutes = 0;
697  sscanf(&line1[delta], "%d.%u", &seconds,
698  &units);
699  seconds *= inverter;
700  }
701  s_jacoShift =
702  ((hours * 3600 + minutes * 60 +
703  seconds) * s_jacoTimeRes +
704  units) * inverter;
705  }
706  break;
707  case 'T':
708  if (isalpha(line1[2])) {
709  delta = 8;
710  } else {
711  delta = 2;
712  }
713  sscanf(&line1[delta], "%u", &s_jacoTimeRes);
714  break;
715  }
716  }
717  continue;
718  }
719  current->start =
720  (unsigned long) ((a4 + s_jacoShift) * 100.0 /
721  s_jacoTimeRes);
722  current->end =
723  (unsigned long) ((b4 + s_jacoShift) * 100.0 /
724  s_jacoTimeRes);
725  } else {
726  current->start =
727  (unsigned
728  long) (((a1 * 3600 + a2 * 60 + a3) * s_jacoTimeRes + a4 +
729  s_jacoShift) * 100.0 / s_jacoTimeRes);
730  current->end =
731  (unsigned
732  long) (((b1 * 3600 + b2 * 60 + b3) * s_jacoTimeRes + b4 +
733  s_jacoShift) * 100.0 / s_jacoTimeRes);
734  }
735  p = line2.data();
736  while ((*p == ' ') || (*p == '\t')) {
737  ++p;
738  }
739  if (isalpha(*p)||*p == '[') {
740  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive.data(), line1.data()) < 2)
741  return (subtitle_t *)ERR;
742  directive.resize(strlen(directive.c_str()));
743  std::transform(directive.begin(), directive.end(), directive.begin(),
744  [](unsigned char c){ return std::toupper(c);});
745  if ( (directive.find("RDB") != std::string::npos)
746  || (directive.find("RDC") != std::string::npos)
747  || (directive.find("RLB") != std::string::npos)
748  || (directive.find("RLG") != std::string::npos)) {
749  continue;
750  }
751  /* no alignment */
752 #if 0
753  if (directive.find("JL") != std::string::npos) {
754  current->alignment = SUB_ALIGNMENT_HLEFT;
755  } else if (directive.find("JR") != std::string::npos) {
756  current->alignment = SUB_ALIGNMENT_HRIGHT;
757  } else {
758  current->alignment = SUB_ALIGNMENT_HCENTER;
759  }
760 #endif
761  line2 = line1;
762  p = line2.data();
763  }
764  for (q = line1.data(); (!isEol(*p)); ++p) {
765  switch (*p) {
766  case '{':
767  comment++;
768  break;
769  case '}':
770  if (comment) {
771  --comment;
772  /* the next line to get rid of a blank after the comment */
773  if ((*(p + 1)) == ' ')
774  p++;
775  }
776  break;
777  case '~':
778  if (!comment) {
779  *q = ' ';
780  ++q;
781  }
782  break;
783  case ' ':
784  case '\t':
785  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
786  break;
787  if (!comment) {
788  *q = ' ';
789  ++q;
790  }
791  break;
792  case '\\':
793  if (*(p + 1) == 'n') {
794  *q = '\0';
795  q = line1.data();
796  current->text.push_back(line1);
797  ++p;
798  break;
799  }
800  if ((toupper(*(p + 1)) == 'C')
801  || (toupper(*(p + 1)) == 'F')) {
802  ++p,++p;
803  break;
804  }
805  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
806  /* actually this means "insert current date here" */
807  (*(p + 1) == 'D') ||
808  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
809  (*(p + 1) == 'N') ||
810  /* actually this means "insert current time here" */
811  (*(p + 1) == 'T') ||
812  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
813  ++p;
814  break;
815  }
816  if ((*(p + 1) == '\\') ||
817  (*(p + 1) == '~') || (*(p + 1) == '{')) {
818  ++p;
819  } else if (isEol(*(p + 1))) {
820  std::string tmpstr {};
821  if (!read_line_from_input(demuxstr, tmpstr))
822  return nullptr;
823  trail_space(tmpstr);
824  // The std::string addition can reallocate...
825  size_t offset = p - line2.data();
826  line2 += tmpstr;
827  p = line2.data() + offset;
828  break;
829  }
830  // Checked xine-lib-1.2.8. No fix there. Seems like it
831  // should be a break.
832  break;
833  default:
834  if (!comment) {
835  *q = *p;
836  ++q;
837  }
838  }
839  }
840  *q = '\0';
841  current->text.push_back(line1);
842  }
843  return current;
844 }
845 
847  std::string line;
848  int a1=0,a2=0,a3=0,a4=0; // NOLINT(readability-isolate-declaration)
849 
850  while (current->text.empty()) {
851  if (!read_line_from_input(demuxstr, line)) return nullptr;
852  if (line[0]!='{')
853  continue;
854  if (sscanf (line.data(), "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4) < 4)
855  continue;
856  current->start = a1*360000+a2*6000+a3*100+a4/10;
857  for (;;) {
858  if (!read_line_from_input(demuxstr, line)) break;
859  if (line[0]=='}') break;
860  size_t len = line.find_first_of("\n\r");
861  if (len == 0)
862  break;
863  current->text.push_back(line.substr(0, len));
864  }
865  }
866  return current;
867 }
868 
870  std::string line;
871  int h = 0;
872  int m = 0;
873  int s = 0;
874 
875  do {
876  if (!read_line_from_input (demuxstr, line)) return nullptr;
877  } while (sscanf (line.data(), "[%d:%d:%d]", &h, &m, &s) != 3);
878 
879  if (!read_line_from_input (demuxstr, line)) return nullptr;
880 
881  current->start = 360000 * h + 6000 * m + 100 * s;
882  current->end = -1;
883 
884  char *next=line.data();
885  std::string out {};
886  while ((next = sub_readtext (next, out))) {
887  if (next==ERR) return (subtitle_t *)ERR;
888  current->text.push_back(out);
889  }
890  current->text.push_back(out);
891 
892  return current;
893 }
894 
895 /* Code from subreader.c of MPlayer
896 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
897 */
898 
900  std::string line;
901  std::string line2; line2.resize(LINE_LEN);
902 
903  do {
904  if (!read_line_from_input (demuxstr, line)) return nullptr;
905  } while ((sscanf (line.data(),
906  "[%" SCNd64 "][%" SCNd64 "]%" LINE_LEN_QUOT "[^\r\n]",
907  &(current->start), &(current->end), line2.data()) < 3));
908  current->start *= 10;
909  current->end *= 10;
910 
911  char *p=line2.data();
912  char *next=p;
913  std::string out {};
914  while ((next = sub_readtext (next, out))) {
915  if (next == ERR) {return (subtitle_t *)ERR;}
916  current->text.push_back(out);
917  }
918  current->text.push_back(out);
919 
920  return current;
921 }
922 
923 
924 static int sub_autodetect (demux_sputext_t *demuxstr) {
925 
926  std::string line;
927  int i = 0;
928  int j = 0;
929  char p = 0;
930 
931  while (j < 100) {
932  j++;
933  if (!read_line_from_input(demuxstr, line))
934  return FORMAT_UNKNOWN;
935 
936  std::transform(line.begin(), line.end(), line.begin(),
937  [](unsigned char c){ return std::tolower(c);});
938 
939  if ((sscanf (line.data(), "{%d}{}", &i)==1) ||
940  (sscanf (line.data(), "{%d}{%d}", &i, &i)==2)) {
941  demuxstr->uses_time=0;
942  return FORMAT_MICRODVD;
943  }
944 
945  if (sscanf (line.data(), "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
946  demuxstr->uses_time=1;
947  return FORMAT_SUBRIP;
948  }
949 
950  if (sscanf (line.data(), "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
951  demuxstr->uses_time=1;
952  return FORMAT_SUBVIEWER;
953  }
954 
955  if (sscanf (line.data(), "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
956  demuxstr->uses_time=1;
957  return FORMAT_SUBVIEWER;
958  }
959 
960  if (line.find("<sami>") != std::string::npos) {
961  demuxstr->uses_time=1;
962  return FORMAT_SAMI;
963  }
964  // Sscanf stops looking at the format string once it populates the
965  // last argument, so it never validates the colon after the
966  // seconds. Add a final "the rest of the line" argument to get
967  // that validation, so that JACO subtitles can be distinguished
968  // from this format.
969  std::string line2; line2.resize(LINE_LEN);
970  if (sscanf (line.data(), "%d:%d:%d:%" LINE_LEN_QUOT "[^\n\r]",
971  &i, &i, &i, line2.data() )==4) {
972  demuxstr->uses_time=1;
973  return FORMAT_VPLAYER;
974  }
975  /*
976  * A RealText format is a markup language, starts with <window> tag,
977  * options (behaviour modifiers) are possible.
978  */
979  if (line.find("<window") != std::string::npos) {
980  demuxstr->uses_time=1;
981  return FORMAT_RT;
982  }
983  if ((line.find("dialogue: marked") != std::string::npos) ||
984  (line.find("dialogue: ") != std::string::npos)) {
985  demuxstr->uses_time=1;
986  return FORMAT_SSA;
987  }
988  if (sscanf (line.data(), "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
989  demuxstr->uses_time=0;
990  return FORMAT_PJS;
991  }
992  if (sscanf (line.data(), "format=%d", &i) == 1) {
993  demuxstr->uses_time=0;
994  return FORMAT_MPSUB;
995  }
996  if (sscanf (line.data(), "format=tim%c", &p)==1 && p=='e') {
997  demuxstr->uses_time=1;
998  return FORMAT_MPSUB;
999  }
1000  if (line.find("-->>") != std::string::npos) {
1001  demuxstr->uses_time=0;
1002  return FORMAT_AQTITLE;
1003  }
1004  if (sscanf(line.data(), "@%d @%d", &i, &i) == 2 ||
1005  sscanf(line.data(), "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
1006  demuxstr->uses_time = 1;
1007  return FORMAT_JACOBSUB;
1008  }
1009  if (sscanf(line.data(), "{t %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
1010  demuxstr->uses_time = 1;
1011  return FORMAT_SUBVIEWER2;
1012  }
1013  if (sscanf(line.data(), "[%d:%d:%d]", &i, &i, &i) == 3) {
1014  demuxstr->uses_time = 1;
1015  return FORMAT_SUBRIP09;
1016  }
1017 
1018  if (sscanf (line.data(), "[%d][%d]", &i, &i) == 2) {
1019  demuxstr->uses_time = 1;
1020  return FORMAT_MPL2;
1021  }
1022  }
1023  return FORMAT_UNKNOWN; /* too many bad lines */
1024 }
1025 
1026 // These functions all return either 1) nullptr, 2) (subtitle_t*)ERR,
1027 // or 3) a pointer to the dest parameter.
1029 const std::array<read_func_ptr, 14> read_func
1045 };
1046 
1047 bool sub_read_file (demux_sputext_t *demuxstr) {
1048 
1049  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1050  demuxstr->rbuffer_cur = 0;
1051  demuxstr->buf.clear();
1052  demuxstr->buf.reserve(SUB_BUFSIZE);
1053 
1054  demuxstr->format=sub_autodetect (demuxstr);
1055  if (demuxstr->format==FORMAT_UNKNOWN) {
1056  return false;
1057  }
1058 
1059  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1060 
1061  /* Rewind */
1062  demuxstr->rbuffer_cur = 0;
1063  demuxstr->buf.clear();
1064 
1065  demuxstr->num=0;
1066  int timeout = MAX_TIMEOUT;
1067 
1068  if (demuxstr->uses_time) timeout *= 100;
1069  else timeout *= 10;
1070 
1071  while(true) {
1072  subtitle_t dummy {};
1073  subtitle_t *sub = read_func[demuxstr->format] (demuxstr, &dummy);
1074  if (!sub) {
1075  break; /* EOF */
1076  }
1077 
1078  if (sub==ERR)
1079  ++demuxstr->errs;
1080  else {
1081  demuxstr->subtitles.push_back(*sub);
1082  if (demuxstr->num > 0 && demuxstr->subtitles[demuxstr->num-1].end == -1) {
1083  /* end time not defined in the subtitle */
1084  if (timeout > sub->start - demuxstr->subtitles[demuxstr->num-1].start) {
1085  demuxstr->subtitles[demuxstr->num-1].end = sub->start;
1086  } else {
1087  demuxstr->subtitles[demuxstr->num-1].end = demuxstr->subtitles[demuxstr->num-1].start + timeout;
1088  }
1089  }
1090  ++demuxstr->num; /* Error vs. Valid */
1091  }
1092  }
1093  /* timeout of last subtitle */
1094  if (demuxstr->num > 0 && demuxstr->subtitles[demuxstr->num-1].end == -1)
1095  {
1096  demuxstr->subtitles[demuxstr->num-1].end = demuxstr->subtitles[demuxstr->num-1].start + timeout;
1097  }
1098 
1099 #if DEBUG_XINE_DEMUX_SPUTEXT
1100  {
1101  char buffer[1024];
1102 
1103  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1104 
1105  if(demuxstr->errs)
1106  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1107  else
1108  strcat(buffer, "\n");
1109 
1110  printf("%s", buffer);
1111  }
1112 #endif
1113 
1114  return true;
1115 }
sub_read_line_jacobsub
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:631
sub_read_file
bool sub_read_file(demux_sputext_t *demuxstr)
Definition: xine_demux_sputext.cpp:1047
demux_sputext_t::rbuffer_cur
off_t rbuffer_cur
Definition: xine_demux_sputext.h:41
build_compdb.dest
dest
Definition: build_compdb.py:9
sub_read_line_mpsub
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:562
hardwareprofile.smolt.timeout
float timeout
Definition: smolt.py:103
demux_sputext_t::errs
int errs
Definition: xine_demux_sputext.h:51
demux_sputext_t::uses_time
int uses_time
Definition: xine_demux_sputext.h:50
isEol
static bool isEol(char p)
Definition: xine_demux_sputext.cpp:88
demux_sputext_t
Definition: xine_demux_sputext.h:37
sub_autodetect
static int sub_autodetect(demux_sputext_t *demuxstr)
Definition: xine_demux_sputext.cpp:924
FORMAT_RT
#define FORMAT_RT
Definition: xine_demux_sputext.h:19
FORMAT_VPLAYER
#define FORMAT_VPLAYER
Definition: xine_demux_sputext.h:18
sub_read_line_aqt
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:608
sub_read_line_sami
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:138
sub_read_line_ssa
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:459
read_line_from_input
static char * read_line_from_input(demux_sputext_t *demuxstr, std::string &line)
Definition: xine_demux_sputext.cpp:105
demux_sputext_t::rbuffer_text
char * rbuffer_text
Definition: xine_demux_sputext.h:39
MAX_TIMEOUT
#define MAX_TIMEOUT
Definition: xine_demux_sputext.h:9
NULL
#define NULL
Definition: H2645Parser.h:67
MythDate::current
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
tmp
static guint32 * tmp
Definition: goom_core.cpp:31
demux_sputext_t::num
int num
Definition: xine_demux_sputext.h:53
trail_space
static void trail_space(std::string &str)
Definition: xine_demux_sputext.cpp:92
read_func_ptr
subtitle_t *(*)(demux_sputext_t *demuxstr, subtitle_t *dest) read_func_ptr
Definition: xine_demux_sputext.cpp:1028
sub_read_line_mpl2
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:899
FORMAT_SUBVIEWER
#define FORMAT_SUBVIEWER
Definition: xine_demux_sputext.h:16
sub_read_line_microdvd
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:244
mythlogging.h
FORMAT_SSA
#define FORMAT_SSA
Definition: xine_demux_sputext.h:20
demux_sputext_t::buf
std::string buf
Definition: xine_demux_sputext.h:45
hardwareprofile.config.p
p
Definition: config.py:33
mark
Definition: lang.cpp:21
sub_read_line_subrip
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:300
demux_sputext_t::mpsub_position
float mpsub_position
Definition: xine_demux_sputext.h:48
p2
static guint32 * p2
Definition: goom_core.cpp:31
SUB_BUFSIZE
#define SUB_BUFSIZE
Definition: xine_demux_sputext.h:8
LINE_LEN
#define LINE_LEN
Definition: xine_demux_sputext.cpp:57
FORMAT_MPL2
#define FORMAT_MPL2
Definition: xine_demux_sputext.h:27
FORMAT_JACOBSUB
#define FORMAT_JACOBSUB
Definition: xine_demux_sputext.h:24
FORMAT_AQTITLE
#define FORMAT_AQTITLE
Definition: xine_demux_sputext.h:23
FORMAT_UNKNOWN
#define FORMAT_UNKNOWN
Definition: xine_demux_sputext.h:13
subtitle_t
Definition: xine_demux_sputext.h:29
hardwareprofile.smolt.long
long
Definition: smolt.py:76
FORMAT_SUBRIP
#define FORMAT_SUBRIP
Definition: xine_demux_sputext.h:15
sub_read_line_subviewer2
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:846
xine_demux_sputext.h
LINE_LEN_QUOT
#define LINE_LEN_QUOT
Definition: xine_demux_sputext.cpp:58
FORMAT_PJS
#define FORMAT_PJS
Definition: xine_demux_sputext.h:21
sub_readtext
static char * sub_readtext(char *source, std::string &dest)
Extract the next token from a string.
Definition: xine_demux_sputext.cpp:224
off_t
#define off_t
Definition: mythiowrapper.cpp:240
FORMAT_SAMI
#define FORMAT_SAMI
Definition: xine_demux_sputext.h:17
read_func
const std::array< read_func_ptr, 14 > read_func
Definition: xine_demux_sputext.cpp:1030
sub_read_line_pjs
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:533
demux_sputext_t::next_line
std::string next_line
Definition: xine_demux_sputext.h:56
sub_read_line_subviewer
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:267
sub_read_line_vplayer
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:370
sub_read_line_rt
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:416
strcasestr
char * strcasestr(const char *str, const char *pattern)
Definition: xine_demux_sputext.cpp:64
subtitle_t::start
int64_t start
Starting time in msec or starting frame.
Definition: xine_demux_sputext.h:31
FORMAT_MPSUB
#define FORMAT_MPSUB
Definition: xine_demux_sputext.h:22
demux_sputext_t::format
int format
Definition: xine_demux_sputext.h:55
FORMAT_SUBRIP09
#define FORMAT_SUBRIP09
Definition: xine_demux_sputext.h:26
demux_sputext_t::subtitles
std::vector< subtitle_t > subtitles
Definition: xine_demux_sputext.h:52
sub_read_line_subrip09
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:869
ERR
#define ERR
Definition: xine_demux_sputext.cpp:56
FORMAT_SUBVIEWER2
#define FORMAT_SUBVIEWER2
Definition: xine_demux_sputext.h:25
demux_sputext_t::rbuffer_len
off_t rbuffer_len
Definition: xine_demux_sputext.h:40
FORMAT_MICRODVD
#define FORMAT_MICRODVD
Definition: xine_demux_sputext.h:14