MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <array>
36 #include <cctype>
37 #include <cstdio>
38 #include <cstdlib>
39 #include <cstring>
40 #include <iostream>
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45 
47 
49 
50 #define ERR ((void *)-1)
51 static constexpr ssize_t LINE_LEN { 1000 };
52 #define LINE_LEN_QUOT "1000" // NOLINT(cppcoreguidelines-macro-usage)
53 
54 #ifdef _WIN32
55  #include <stdlib.h>
56  #include <ctype.h>
57 
58  char *strcasestr(const char *str, const char *pattern) {
59  size_t i;
60 
61  if (!*pattern)
62  return (char*)str;
63 
64  for (; *str; str++) {
65  if (toupper((unsigned char)*str) == toupper((unsigned char)*pattern)) {
66  for (i = 1;; i++) {
67  if (!pattern[i])
68  return (char*)str;
69  if (toupper((unsigned char)str[i]) != toupper((unsigned char)pattern[i]))
70  break;
71  }
72  }
73  }
74  return NULL;
75  }
76 #endif
77 
78 /*
79  * Demuxer code start
80  */
81 
82 static bool isEol(char p) {
83  return (p=='\r' || p=='\n' || p=='\0');
84 }
85 
86 static inline void trail_space(std::string& str)
87 {
88  auto mark = str.find_last_of(" \t\r\n");
89  if (mark != std::string::npos)
90  str.erase(mark);
91  mark = str.find_first_not_of(" \t\r\n");
92  if (mark != std::string::npos)
93  str.erase(0, mark);
94 }
95 
96 /*
97  *
98  */
99 static char *read_line_from_input(demux_sputext_t *demuxstr, std::string& line) {
100 
101  line.reserve(LINE_LEN);
102  if ((line.capacity() - demuxstr->buf.size()) > 512) {
103  off_t nread = line.capacity() - demuxstr->buf.size();
104  nread = std::min(nread, demuxstr->rbuffer_len - demuxstr->rbuffer_cur);
105  if (nread < 0) {
106  printf("read failed.\n");
107  return nullptr;
108  }
109  if (nread > 0) {
110  demuxstr->buf.append(&demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
111  nread);
112  demuxstr->rbuffer_cur += nread;
113  }
114  }
115 
116  size_t index = demuxstr->buf.find('\n');
117  if (index != std::string::npos) {
118  line.assign(demuxstr->buf, 0, index+1);
119  demuxstr->buf.erase(0, index+1);
120  return line.data();
121  }
122  if (!demuxstr->buf.empty()) {
123  line = demuxstr->buf;
124  demuxstr->buf.clear();
125  return line.data();
126  }
127 
128  return nullptr;
129 }
130 
131 
133 
134  static std::string s_line;
135  static char *s_s = nullptr;
136  std::string text;
137 
138  current->start = 0;
139  current->end = -1;
140  int state = 0;
141 
142  /* read the first line */
143  if (!s_s)
144  if (!(s_s = read_line_from_input(demuxstr, s_line))) return nullptr;
145 
146  do {
147  switch (state) {
148 
149  case 0: /* find "START=" */
150  s_s = strcasestr (s_s, "Start=");
151  if (s_s) {
152  current->start = strtol (s_s + 6, &s_s, 0) / 10;
153  state = 1; continue;
154  }
155  break;
156 
157  case 1: /* find "<P" */
158  if ((s_s = strcasestr (s_s, "<P"))) { s_s += 2; state = 2; continue; }
159  break;
160 
161  case 2: /* find ">" */
162  if ((s_s = strchr (s_s, '>'))) { s_s++; state = 3; text.clear(); continue; }
163  break;
164 
165  case 3: /* get all text until '<' appears */
166  if (*s_s == '\0') { break; }
167  else if (strncasecmp (s_s, "&nbsp;", 6) == 0) { text += ' '; s_s += 6; }
168  else if (*s_s == '\r') { s_s++; }
169  else if (strncasecmp (s_s, "<br>", 4) == 0 || *s_s == '\n') {
170  trail_space (text);
171  if (!text.empty())
172  current->text.push_back(text);
173  text.clear();
174  if (*s_s == '\n') s_s++; else s_s += 4;
175  }
176  else if (*s_s == '<') { state = 4; }
177  else text += *s_s++;
178  continue;
179 
180  case 4: /* get current->end or skip <TAG> */
181  char *q = strcasestr (s_s, "start=");
182  if (q) {
183  current->end = strtol (q + 6, &q, 0) / 10 - 1;
184  trail_space (text);
185  if (!text.empty())
186  current->text.push_back(text);
187  if (!current->text.empty()) { state = 99; break; }
188  state = 0; continue;
189  }
190  s_s = strchr (s_s, '>');
191  if (s_s) { s_s++; state = 3; continue; }
192  break;
193  }
194 
195  /* read next line */
196  if (state != 99 && !(s_s = read_line_from_input (demuxstr, s_line)))
197  return nullptr;
198 
199  } while (state != 99);
200 
201  return current;
202 }
203 
204 
205 
218 static char *sub_readtext(char *source, std::string& dest) {
219  if (source == nullptr)
220  return nullptr;
221 
222  int len=0;
223  char *p=source;
224 
225  while ( !isEol(*p) && *p!= '|' ) {
226  p++,len++;
227  }
228 
229  dest.assign(source, len);
230 
231  while (*p=='\r' || *p=='\n' || *p=='|')
232  p++;
233 
234  if (*p) return p; /* not-last text field */
235  return (char*)nullptr; /* last text field */
236 }
237 
239 
240  std::string line; line.reserve(LINE_LEN + 1);
241  std::string line2; line2.reserve(LINE_LEN + 1);
242 
243  current->end=-1;
244  do {
245  if (!read_line_from_input (demuxstr, line)) return nullptr;
246  } while ((sscanf (line.c_str(), "{%" SCNd64 "}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2.data()) !=2) &&
247  (sscanf (line.c_str(), "{%" SCNd64 "}{%" SCNd64 "}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2.data()) !=3)
248  );
249 
250  char *next=line2.data();
251  std::string out {};
252  while ((next = sub_readtext (next, out))) {
253  if (next==ERR) return (subtitle_t *)ERR;
254  current->text.push_back(out);
255  }
256  current->text.push_back(out);
257 
258  return current;
259 }
260 
262 
263  std::string line;
264  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
265 
266  while (true) {
267  if (!read_line_from_input(demuxstr, line)) return nullptr;
268  if (sscanf (line.c_str(), "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
269  if (sscanf (line.c_str(), "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
270  continue;
271  }
272  current->start = a1*360000+a2*6000+a3*100+a4;
273  current->end = b1*360000+b2*6000+b3*100+b4;
274 
275  if (!read_line_from_input(demuxstr, line))
276  return nullptr;
277 
278  char *p=line.data();
279  while (true) {
280  char *q=nullptr;
281  int len = 0;
282  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
283  (strncasecmp(p,"[br]",4) != 0); p++,len++);
284  current->text.emplace_back(q, len);
285  if (!*p || *p=='\r' || *p=='\n') break;
286  if (*p=='[') while (*p++!=']');
287  if (*p=='|') p++;
288  }
289  break;
290  }
291  return current;
292 }
293 
295  std::string line;
296  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
297  int i = 0;
298 
299  do {
300  if(!read_line_from_input(demuxstr,line))
301  return nullptr;
302  i = sscanf(line.c_str(),"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
303  } while(i < 8);
304  current->start = a1*360000+a2*6000+a3*100+a4/10;
305  current->end = b1*360000+b2*6000+b3*100+b4/10;
306  bool end_sub = false;
307  do {
308  char *p = nullptr; /* pointer to the curently read char */
309  std::string temp_line; /* subtitle line that will be transfered to current->text[i] */
310  temp_line.reserve(SUB_BUFSIZE);
311  if(!read_line_from_input(demuxstr,line))
312  return (!current->text.empty()) ? current : nullptr;
313  for (p=line.data(); *p!='\0' && !end_sub; p++) {
314  bool eol = false;
315  switch(*p) {
316  case '\\':
317  if(*(p+1)=='N' || *(p+1)=='n') {
318  eol = true;
319  p++;
320  } else
321  temp_line += *p;
322  break;
323  case '{':
324  // The different code for these if/else clauses is ifdef'd out.
325  // NOLINTNEXTLINE(bugprone-branch-clone)
326  if(strncmp(p,"{\\i1}",5) == 0) {
327 #if 0 /* italic not implemented in renderer, ignore them for now */
328  temp_line.append("<i>");
329 #endif
330  p+=4;
331  }
332  else if(strncmp(p,"{\\i0}",5) == 0) {
333 #if 0 /* italic not implemented in renderer, ignore them for now */
334  temp_line.append("</i>");
335 #endif
336  p+=4;
337  }
338  else
339  temp_line += *p;
340  break;
341  case '\r': /* just ignore '\r's */
342  break;
343  case '\n':
344  eol = true;
345  break;
346  default:
347  temp_line += *p;
348  break;
349  }
350  if (eol) {
351  if (!temp_line.empty())
352  {
353  current->text.push_back(temp_line);
354  temp_line.clear();
355  } else {
356  end_sub = true;
357  }
358  }
359  }
360  } while (!end_sub);
361  return current;
362 }
363 
365  std::string line;
366  int a1=0,a2=0,a3=0,b1=0,b2=0,b3=0; // NOLINT(readability-isolate-declaration)
367 
368  while (current->text.empty()) {
369  if( demuxstr->next_line.empty() ) {
370  /* if the buffer is empty.... */
371  if( !read_line_from_input(demuxstr, line) ) return nullptr;
372  } else {
373  /* ... get the current line from buffer. */
374  line = demuxstr->next_line;
375  demuxstr->next_line.clear();
376  }
377  /* Initialize buffer with next line */
378  if( ! read_line_from_input( demuxstr, demuxstr->next_line) ) {
379  demuxstr->next_line.clear();
380  return nullptr;
381  }
382  if( (sscanf( line.c_str(), "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
383  (sscanf( demuxstr->next_line.c_str(), "%d:%d:%d:", &b1, &b2, &b3) < 3) )
384  continue;
385  current->start = a1*360000+a2*6000+a3*100;
386  current->end = b1*360000+b2*6000+b3*100;
387  if ((current->end - current->start) > LINE_LEN)
388  current->end = current->start + LINE_LEN; /* not too long though. */
389  /* teraz czas na wkopiowanie stringu */
390  char *p=line.data();
391  /* finds the body of the subtitle_t */
392  for (int i=0; i<3; i++){
393  char *p2=strchr( p, ':');
394  if( p2 == nullptr ) break;
395  p=p2+1;
396  }
397 
398  char *next=p;
399  std::string out {};
400  while( (next = sub_readtext( next, out )) ) {
401  if (next==ERR)
402  return (subtitle_t *)ERR;
403  current->text.push_back(out);
404  }
405  current->text.push_back(out);
406  }
407  return current;
408 }
409 
411  /*
412  * TODO: This format uses quite rich (sub/super)set of xhtml
413  * I couldn't check it since DTD is not included.
414  * WARNING: full XML parses can be required for proper parsing
415  */
416  std::string line;
417  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
418  int plen = 0;
419 
420  while (current->text.empty()) {
421  if (!read_line_from_input(demuxstr, line)) return nullptr;
422  /*
423  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
424  * to describe the same moment in time. Maybe there are even more formats in use.
425  */
426  if (sscanf (line.c_str(), R"(<Time Begin="%d:%d:%d.%d" End="%d:%d:%d.%d")",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
427 
428  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
429  if (
430  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&plen) < 4) &&
431  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&b4,&plen) < 5) &&
432  /* (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen) < 5) && */
433  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d.%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&a4,&b2,&b3,&b4,&plen) < 6) &&
434  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d:%d.%d" %*[Ee]nd="%d:%d:%d.%d"%*[^<]<clear/>%n)",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen) < 8)
435  )
436  continue;
437  current->start = a1*360000+a2*6000+a3*100+a4/10;
438  current->end = b1*360000+b2*6000+b3*100+b4/10;
439  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
440  size_t index = line.find("<clear/>");
441  char *next = (index != std::string::npos) ? &line[index+8] : nullptr;
442  std::string out {};
443  while ((next = sub_readtext (next, out))) {
444  if (next==ERR)
445  return (subtitle_t *)ERR;
446  current->text.push_back(out);
447  }
448  current->text.push_back(out);
449  }
450  return current;
451 }
452 
454  int comma = 0;
455  static int s_maxComma = 32; /* let's use 32 for the case that the */
456  /* amount of commas increase with newer SSA versions */
457 
458  int hour1 = 0;
459  int min1 = 0;
460  int sec1 = 0;
461  int hunsec1 = 0;
462  int hour2 = 0;
463  int min2 = 0;
464  int sec2 = 0;
465  int hunsec2 = 0;
466  int nothing = 0;
467  std::string line;
468  std::string line3; line3.resize(LINE_LEN);
469  char *tmp = nullptr;
470 
471  do {
472  if (!read_line_from_input(demuxstr, line)) return nullptr;
473  } while (sscanf (line.data(), "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
474  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
475  &hour1, &min1, &sec1, &hunsec1,
476  &hour2, &min2, &sec2, &hunsec2,
477  line3.data()) < 9
478  &&
479  sscanf (line.data(), "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
480  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
481  &hour1, &min1, &sec1, &hunsec1,
482  &hour2, &min2, &sec2, &hunsec2,
483  line3.data()) < 9 );
484 
485  size_t index = line3.find(',');
486  if (index == std::string::npos)
487  return nullptr;
488  char *line2 = &line3[index];
489 
490  for (comma = 4; comma < s_maxComma; comma ++)
491  {
492  tmp = line2;
493  if(!(tmp=strchr(++tmp, ','))) break;
494  if(*(++tmp) == ' ') break;
495  /* a space after a comma means we're already in a sentence */
496  line2 = tmp;
497  }
498 
499  if(comma < s_maxComma)s_maxComma = comma;
500  /* eliminate the trailing comma */
501  if(*line2 == ',') line2++;
502 
503  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
504  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
505 
506  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
507  current->text.emplace_back(line2, tmp-line2);
508  line2=tmp+2;
509  }
510 
511  current->text.emplace_back(line2);
512 
513  return current;
514 }
515 
516 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
517  * From MPlayer subreader.c :
518  *
519  * PJS subtitles reader.
520  * That's the "Phoenix Japanimation Society" format.
521  * I found some of them in http://www.scriptsclub.org/ (used for anime).
522  * The time is in tenths of second.
523  *
524  * by set, based on code by szabi (dunnowhat sub format ;-)
525  */
526 
528  std::string line;
529 
530  if (!read_line_from_input(demuxstr, line))
531  return nullptr;
532  size_t mark = line.find_first_not_of(" \t\r\n");
533  if (mark != std::string::npos)
534  line.erase(0, mark);
535  if (line.empty())
536  return nullptr;
537  if (sscanf (line.data(), "%" SCNd64 ",%" SCNd64 ",", &(current->start),
538  &(current->end)) <2)
539  return (subtitle_t *)ERR;
540  /* the files I have are in tenths of second */
541  current->start *= 10;
542  current->end *= 10;
543 
544  /* copy the string to the text buffer */
545  auto start = line.find('\"');
546  if (start == std::string::npos)
547  return (subtitle_t *)ERR;
548  auto end = line.find('\"', start + 1);
549  if (end == std::string::npos)
550  return (subtitle_t *)ERR;
551  current->text.push_back(line.substr(start+1, end));
552 
553  return current;
554 }
555 
557  std::string line;
558  float a = NAN;
559  float b = NAN;
560 
561  do {
562  if (!read_line_from_input(demuxstr, line))
563  return nullptr;
564  } while (sscanf (line.c_str(), "%f %f", &a, &b) !=2);
565 
566  demuxstr->mpsub_position += (a*100.0F);
567  current->start = (int) demuxstr->mpsub_position;
568  demuxstr->mpsub_position += (b*100.0F);
569  current->end = (int) demuxstr->mpsub_position;
570 
571  while (true) {
572  if (!read_line_from_input(demuxstr, line))
573  return (!current->text.empty()) ? current : nullptr;
574 
575  size_t mark = line.find_first_not_of(" \t\r\n");
576  if (mark != std::string::npos)
577  line.erase(0, mark);
578 
579  if (isEol(line[0]) && !current->text.empty())
580  return current;
581 
582  if (isEol(line[0]))
583  return nullptr;
584 
585  char *q = nullptr;
586  for (q=line.data(); !isEol(*q); q++);
587  *q='\0';
588  line.resize(strlen(line.c_str()));
589  if (!line.empty()) {
590  current->text.push_back(line);
591  /* printf(">%s<\n",line.data()); */
592  } else {
593  if (!current->text.empty())
594  return current;
595  return nullptr;
596  }
597  }
598 
599  return nullptr;
600 }
601 
603  std::string line;
604 
605  while (true) {
606  /* try to locate next subtitle_t */
607  if (!read_line_from_input(demuxstr, line))
608  return nullptr;
609  if (!(sscanf (line.c_str(), "-->> %" SCNd64, &(current->start)) <1))
610  break;
611  }
612 
613  while (read_line_from_input(demuxstr, line))
614  {
615  std::string out {};
616  sub_readtext(line.data(),out);
617  if (out.empty())
618  break;
619  current->text.push_back(out);
620  current->end = -1;
621  }
622  return (!current->text.empty())? current : nullptr;
623 }
624 
626  std::string line1;
627  std::string line2;
628  std::string directive; directive.resize(LINE_LEN);
629  char *p = nullptr;
630  char *q = nullptr;
631  unsigned a1=0, a2=0, a3=0, a4=0, b1=0, b2=0, b3=0, b4=0; // NOLINT(readability-isolate-declaration)
632  unsigned comment = 0;
633  static uint32_t s_jacoTimeRes = 30;
634  static uint32_t s_jacoShift = 0;
635 
636  while (current->text.empty()) {
637  if (!read_line_from_input(demuxstr, line1)) {
638  return nullptr;
639  }
640  // Support continuation lines
641  if (line1.size() >= 2) {
642  while ((line1[line1.size()-2] == '\\') && (line1[line1.size()-1] == '\n')) {
643  line1.resize(line1.size()-2);
644  if (!read_line_from_input(demuxstr, line2))
645  return nullptr;
646  size_t index = line2.find_first_not_of(" \t\r\n");
647  if (index != std::string::npos)
648  line2.erase(0, index);
649  line1 += line2;
650  }
651  }
652  line2.resize(0);
653  line2.resize(LINE_LEN);
654  if (sscanf
655  (line1.c_str(), "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
656  &b1, &b2, &b3, &b4, line2.data()) < 9) {
657  if (sscanf(line1.data(), "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2.data()) < 3) {
658  if (line1[0] == '#') {
659  int hours = 0;
660  int minutes = 0;
661  int seconds = 0;
662  int delta = 0;
663  uint32_t units = s_jacoShift;
664  switch (toupper(line1[1])) {
665  case 'S':
666  if (isalpha(line1[2])) {
667  delta = 6;
668  } else {
669  delta = 2;
670  }
671  if (sscanf(&line1[delta], "%d", &hours)) {
672  int inverter = 1;
673  if (hours < 0) {
674  hours *= -1;
675  inverter = -1;
676  }
677  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
678  if (sscanf
679  (&line1[delta], "%*d:%*d:%d",
680  &seconds)) {
681  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
682  &units);
683  } else {
684  hours = 0;
685  sscanf(&line1[delta], "%d:%d.%u",
686  &minutes, &seconds, &units);
687  minutes *= inverter;
688  }
689  } else {
690  hours = minutes = 0;
691  sscanf(&line1[delta], "%d.%u", &seconds,
692  &units);
693  seconds *= inverter;
694  }
695  s_jacoShift =
696  ((hours * 3600 + minutes * 60 +
697  seconds) * s_jacoTimeRes +
698  units) * inverter;
699  }
700  break;
701  case 'T':
702  if (isalpha(line1[2])) {
703  delta = 8;
704  } else {
705  delta = 2;
706  }
707  sscanf(&line1[delta], "%u", &s_jacoTimeRes);
708  break;
709  }
710  }
711  continue;
712  }
713  current->start =
714  (unsigned long) ((a4 + s_jacoShift) * 100.0 /
715  s_jacoTimeRes);
716  current->end =
717  (unsigned long) ((b4 + s_jacoShift) * 100.0 /
718  s_jacoTimeRes);
719  } else {
720  current->start =
721  (unsigned
722  long) (((a1 * 3600 + a2 * 60 + a3) * s_jacoTimeRes + a4 +
723  s_jacoShift) * 100.0 / s_jacoTimeRes);
724  current->end =
725  (unsigned
726  long) (((b1 * 3600 + b2 * 60 + b3) * s_jacoTimeRes + b4 +
727  s_jacoShift) * 100.0 / s_jacoTimeRes);
728  }
729  p = line2.data();
730  while ((*p == ' ') || (*p == '\t')) {
731  ++p;
732  }
733  if (isalpha(*p)||*p == '[') {
734  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive.data(), line1.data()) < 2)
735  return (subtitle_t *)ERR;
736  directive.resize(strlen(directive.c_str()));
737  std::transform(directive.begin(), directive.end(), directive.begin(),
738  [](unsigned char c){ return std::toupper(c);});
739  if ( (directive.find("RDB") != std::string::npos)
740  || (directive.find("RDC") != std::string::npos)
741  || (directive.find("RLB") != std::string::npos)
742  || (directive.find("RLG") != std::string::npos)) {
743  continue;
744  }
745  /* no alignment */
746 #if 0
747  if (directive.find("JL") != std::string::npos) {
748  current->alignment = SUB_ALIGNMENT_HLEFT;
749  } else if (directive.find("JR") != std::string::npos) {
750  current->alignment = SUB_ALIGNMENT_HRIGHT;
751  } else {
752  current->alignment = SUB_ALIGNMENT_HCENTER;
753  }
754 #endif
755  line2 = line1;
756  p = line2.data();
757  }
758  for (q = line1.data(); (!isEol(*p)); ++p) {
759  switch (*p) {
760  case '{':
761  comment++;
762  break;
763  case '}':
764  if (comment) {
765  --comment;
766  /* the next line to get rid of a blank after the comment */
767  if ((*(p + 1)) == ' ')
768  p++;
769  }
770  break;
771  case '~':
772  if (!comment) {
773  *q = ' ';
774  ++q;
775  }
776  break;
777  case ' ':
778  case '\t':
779  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
780  break;
781  if (!comment) {
782  *q = ' ';
783  ++q;
784  }
785  break;
786  case '\\':
787  if (*(p + 1) == 'n') {
788  *q = '\0';
789  q = line1.data();
790  current->text.push_back(line1);
791  ++p;
792  break;
793  }
794  if ((toupper(*(p + 1)) == 'C')
795  || (toupper(*(p + 1)) == 'F')) {
796  ++p,++p;
797  break;
798  }
799  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
800  /* actually this means "insert current date here" */
801  (*(p + 1) == 'D') ||
802  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
803  (*(p + 1) == 'N') ||
804  /* actually this means "insert current time here" */
805  (*(p + 1) == 'T') ||
806  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
807  ++p;
808  break;
809  }
810  if ((*(p + 1) == '\\') ||
811  (*(p + 1) == '~') || (*(p + 1) == '{')) {
812  ++p;
813  } else if (isEol(*(p + 1))) {
814  std::string tmpstr {};
815  if (!read_line_from_input(demuxstr, tmpstr))
816  return nullptr;
817  trail_space(tmpstr);
818  // The std::string addition can reallocate...
819  size_t offset = p - line2.data();
820  line2 += tmpstr;
821  p = line2.data() + offset;
822  break;
823  }
824  // Checked xine-lib-1.2.8. No fix there. Seems like it
825  // should be a break.
826  break;
827  default:
828  if (!comment) {
829  *q = *p;
830  ++q;
831  }
832  }
833  }
834  *q = '\0';
835  current->text.push_back(line1);
836  }
837  return current;
838 }
839 
841  std::string line;
842  int a1=0,a2=0,a3=0,a4=0; // NOLINT(readability-isolate-declaration)
843 
844  while (current->text.empty()) {
845  if (!read_line_from_input(demuxstr, line)) return nullptr;
846  if (line[0]!='{')
847  continue;
848  if (sscanf (line.data(), "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4) < 4)
849  continue;
850  current->start = a1*360000+a2*6000+a3*100+a4/10;
851  for (;;) {
852  if (!read_line_from_input(demuxstr, line)) break;
853  if (line[0]=='}') break;
854  size_t len = line.find_first_of("\n\r");
855  if (len == 0)
856  break;
857  current->text.push_back(line.substr(0, len));
858  }
859  }
860  return current;
861 }
862 
864  std::string line;
865  int h = 0;
866  int m = 0;
867  int s = 0;
868 
869  do {
870  if (!read_line_from_input (demuxstr, line)) return nullptr;
871  } while (sscanf (line.data(), "[%d:%d:%d]", &h, &m, &s) != 3);
872 
873  if (!read_line_from_input (demuxstr, line)) return nullptr;
874 
875  current->start = 360000 * h + 6000 * m + 100 * s;
876  current->end = -1;
877 
878  char *next=line.data();
879  std::string out {};
880  while ((next = sub_readtext (next, out))) {
881  if (next==ERR) return (subtitle_t *)ERR;
882  current->text.push_back(out);
883  }
884  current->text.push_back(out);
885 
886  return current;
887 }
888 
889 /* Code from subreader.c of MPlayer
890 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
891 */
892 
894  std::string line;
895  std::string line2; line2.resize(LINE_LEN);
896 
897  do {
898  if (!read_line_from_input (demuxstr, line)) return nullptr;
899  } while ((sscanf (line.data(),
900  "[%" SCNd64 "][%" SCNd64 "]%" LINE_LEN_QUOT "[^\r\n]",
901  &(current->start), &(current->end), line2.data()) < 3));
902  current->start *= 10;
903  current->end *= 10;
904 
905  char *p=line2.data();
906  char *next=p;
907  std::string out {};
908  while ((next = sub_readtext (next, out))) {
909  if (next == ERR) {return (subtitle_t *)ERR;}
910  current->text.push_back(out);
911  }
912  current->text.push_back(out);
913 
914  return current;
915 }
916 
917 
918 static int sub_autodetect (demux_sputext_t *demuxstr) {
919 
920  std::string line;
921  int i = 0;
922  int j = 0;
923  char p = 0;
924 
925  while (j < 100) {
926  j++;
927  if (!read_line_from_input(demuxstr, line))
928  return FORMAT_UNKNOWN;
929 
930  std::transform(line.begin(), line.end(), line.begin(),
931  [](unsigned char c){ return std::tolower(c);});
932 
933  if ((sscanf (line.data(), "{%d}{}", &i)==1) ||
934  (sscanf (line.data(), "{%d}{%d}", &i, &i)==2)) {
935  demuxstr->uses_time=0;
936  return FORMAT_MICRODVD;
937  }
938 
939  if (sscanf (line.data(), "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
940  demuxstr->uses_time=1;
941  return FORMAT_SUBRIP;
942  }
943 
944  if (sscanf (line.data(), "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
945  demuxstr->uses_time=1;
946  return FORMAT_SUBVIEWER;
947  }
948 
949  if (sscanf (line.data(), "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
950  demuxstr->uses_time=1;
951  return FORMAT_SUBVIEWER;
952  }
953 
954  if (line.find("<sami>") != std::string::npos) {
955  demuxstr->uses_time=1;
956  return FORMAT_SAMI;
957  }
958  // Sscanf stops looking at the format string once it populates the
959  // last argument, so it never validates the colon after the
960  // seconds. Add a final "the rest of the line" argument to get
961  // that validation, so that JACO subtitles can be distinguished
962  // from this format.
963  std::string line2; line2.resize(LINE_LEN);
964  if (sscanf (line.data(), "%d:%d:%d:%" LINE_LEN_QUOT "[^\n\r]",
965  &i, &i, &i, line2.data() )==4) {
966  demuxstr->uses_time=1;
967  return FORMAT_VPLAYER;
968  }
969  /*
970  * A RealText format is a markup language, starts with <window> tag,
971  * options (behaviour modifiers) are possible.
972  */
973  if (line.find("<window") != std::string::npos) {
974  demuxstr->uses_time=1;
975  return FORMAT_RT;
976  }
977  if ((line.find("dialogue: marked") != std::string::npos) ||
978  (line.find("dialogue: ") != std::string::npos)) {
979  demuxstr->uses_time=1;
980  return FORMAT_SSA;
981  }
982  if (sscanf (line.data(), "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
983  demuxstr->uses_time=0;
984  return FORMAT_PJS;
985  }
986  if (sscanf (line.data(), "format=%d", &i) == 1) {
987  demuxstr->uses_time=0;
988  return FORMAT_MPSUB;
989  }
990  if (sscanf (line.data(), "format=tim%c", &p)==1 && p=='e') {
991  demuxstr->uses_time=1;
992  return FORMAT_MPSUB;
993  }
994  if (line.find("-->>") != std::string::npos) {
995  demuxstr->uses_time=0;
996  return FORMAT_AQTITLE;
997  }
998  if (sscanf(line.data(), "@%d @%d", &i, &i) == 2 ||
999  sscanf(line.data(), "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
1000  demuxstr->uses_time = 1;
1001  return FORMAT_JACOBSUB;
1002  }
1003  if (sscanf(line.data(), "{t %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
1004  demuxstr->uses_time = 1;
1005  return FORMAT_SUBVIEWER2;
1006  }
1007  if (sscanf(line.data(), "[%d:%d:%d]", &i, &i, &i) == 3) {
1008  demuxstr->uses_time = 1;
1009  return FORMAT_SUBRIP09;
1010  }
1011 
1012  if (sscanf (line.data(), "[%d][%d]", &i, &i) == 2) {
1013  demuxstr->uses_time = 1;
1014  return FORMAT_MPL2;
1015  }
1016  }
1017  return FORMAT_UNKNOWN; /* too many bad lines */
1018 }
1019 
1020 // These functions all return either 1) nullptr, 2) (subtitle_t*)ERR,
1021 // or 3) a pointer to the dest parameter.
1023 const std::array<read_func_ptr, 14> read_func
1039 };
1040 
1041 bool sub_read_file (demux_sputext_t *demuxstr) {
1042 
1043  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1044  demuxstr->rbuffer_cur = 0;
1045  demuxstr->buf.clear();
1046  demuxstr->buf.reserve(SUB_BUFSIZE);
1047 
1048  demuxstr->format=sub_autodetect (demuxstr);
1049  if (demuxstr->format==FORMAT_UNKNOWN) {
1050  return false;
1051  }
1052 
1053  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1054 
1055  /* Rewind */
1056  demuxstr->rbuffer_cur = 0;
1057  demuxstr->buf.clear();
1058 
1059  demuxstr->num=0;
1060  int timeout = MAX_TIMEOUT;
1061 
1062  if (demuxstr->uses_time) timeout *= 100;
1063  else timeout *= 10;
1064 
1065  while(true) {
1066  subtitle_t dummy {};
1067  subtitle_t *sub = read_func[demuxstr->format] (demuxstr, &dummy);
1068  if (!sub) {
1069  break; /* EOF */
1070  }
1071 
1072  if (sub==ERR)
1073  ++demuxstr->errs;
1074  else {
1075  demuxstr->subtitles.push_back(*sub);
1076  if (demuxstr->num > 0 && demuxstr->subtitles[demuxstr->num-1].end == -1) {
1077  /* end time not defined in the subtitle */
1078  if (timeout > sub->start - demuxstr->subtitles[demuxstr->num-1].start) {
1079  demuxstr->subtitles[demuxstr->num-1].end = sub->start;
1080  } else {
1081  demuxstr->subtitles[demuxstr->num-1].end = demuxstr->subtitles[demuxstr->num-1].start + timeout;
1082  }
1083  }
1084  ++demuxstr->num; /* Error vs. Valid */
1085  }
1086  }
1087  /* timeout of last subtitle */
1088  if (demuxstr->num > 0 && demuxstr->subtitles[demuxstr->num-1].end == -1)
1089  {
1090  demuxstr->subtitles[demuxstr->num-1].end = demuxstr->subtitles[demuxstr->num-1].start + timeout;
1091  }
1092 
1093 #if DEBUG_XINE_DEMUX_SPUTEXT
1094  {
1095  char buffer[1024];
1096 
1097  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1098 
1099  if(demuxstr->errs)
1100  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1101  else
1102  strcat(buffer, "\n");
1103 
1104  printf("%s", buffer);
1105  }
1106 #endif
1107 
1108  return true;
1109 }
sub_read_line_jacobsub
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:625
sub_read_file
bool sub_read_file(demux_sputext_t *demuxstr)
Definition: xine_demux_sputext.cpp:1041
demux_sputext_t::rbuffer_cur
off_t rbuffer_cur
Definition: xine_demux_sputext.h:41
build_compdb.dest
dest
Definition: build_compdb.py:9
sub_read_line_mpsub
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:556
hardwareprofile.smolt.timeout
float timeout
Definition: smolt.py:103
demux_sputext_t::errs
int errs
Definition: xine_demux_sputext.h:51
demux_sputext_t::uses_time
int uses_time
Definition: xine_demux_sputext.h:50
isEol
static bool isEol(char p)
Definition: xine_demux_sputext.cpp:82
demux_sputext_t
Definition: xine_demux_sputext.h:37
sub_autodetect
static int sub_autodetect(demux_sputext_t *demuxstr)
Definition: xine_demux_sputext.cpp:918
FORMAT_RT
#define FORMAT_RT
Definition: xine_demux_sputext.h:19
FORMAT_VPLAYER
#define FORMAT_VPLAYER
Definition: xine_demux_sputext.h:18
sub_read_line_aqt
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:602
sub_read_line_sami
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:132
sub_read_line_ssa
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:453
read_line_from_input
static char * read_line_from_input(demux_sputext_t *demuxstr, std::string &line)
Definition: xine_demux_sputext.cpp:99
demux_sputext_t::rbuffer_text
char * rbuffer_text
Definition: xine_demux_sputext.h:39
MAX_TIMEOUT
#define MAX_TIMEOUT
Definition: xine_demux_sputext.h:9
MythDate::current
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:14
tmp
static guint32 * tmp
Definition: goom_core.cpp:26
demux_sputext_t::num
int num
Definition: xine_demux_sputext.h:53
trail_space
static void trail_space(std::string &str)
Definition: xine_demux_sputext.cpp:86
read_func_ptr
subtitle_t *(*)(demux_sputext_t *demuxstr, subtitle_t *dest) read_func_ptr
Definition: xine_demux_sputext.cpp:1022
sub_read_line_mpl2
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:893
FORMAT_SUBVIEWER
#define FORMAT_SUBVIEWER
Definition: xine_demux_sputext.h:16
sub_read_line_microdvd
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:238
mythlogging.h
FORMAT_SSA
#define FORMAT_SSA
Definition: xine_demux_sputext.h:20
demux_sputext_t::buf
std::string buf
Definition: xine_demux_sputext.h:45
hardwareprofile.config.p
p
Definition: config.py:33
mark
Definition: lang.cpp:22
sub_read_line_subrip
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:294
demux_sputext_t::mpsub_position
float mpsub_position
Definition: xine_demux_sputext.h:48
p2
static guint32 * p2
Definition: goom_core.cpp:26
SUB_BUFSIZE
#define SUB_BUFSIZE
Definition: xine_demux_sputext.h:8
FORMAT_MPL2
#define FORMAT_MPL2
Definition: xine_demux_sputext.h:27
FORMAT_JACOBSUB
#define FORMAT_JACOBSUB
Definition: xine_demux_sputext.h:24
FORMAT_AQTITLE
#define FORMAT_AQTITLE
Definition: xine_demux_sputext.h:23
FORMAT_UNKNOWN
#define FORMAT_UNKNOWN
Definition: xine_demux_sputext.h:13
subtitle_t
Definition: xine_demux_sputext.h:29
hardwareprofile.smolt.long
long
Definition: smolt.py:76
FORMAT_SUBRIP
#define FORMAT_SUBRIP
Definition: xine_demux_sputext.h:15
LINE_LEN
static constexpr ssize_t LINE_LEN
Definition: xine_demux_sputext.cpp:51
sub_read_line_subviewer2
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:840
xine_demux_sputext.h
LINE_LEN_QUOT
#define LINE_LEN_QUOT
Definition: xine_demux_sputext.cpp:52
FORMAT_PJS
#define FORMAT_PJS
Definition: xine_demux_sputext.h:21
sub_readtext
static char * sub_readtext(char *source, std::string &dest)
Extract the next token from a string.
Definition: xine_demux_sputext.cpp:218
off_t
#define off_t
Definition: mythiowrapper.cpp:241
FORMAT_SAMI
#define FORMAT_SAMI
Definition: xine_demux_sputext.h:17
read_func
const std::array< read_func_ptr, 14 > read_func
Definition: xine_demux_sputext.cpp:1024
sub_read_line_pjs
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:527
demux_sputext_t::next_line
std::string next_line
Definition: xine_demux_sputext.h:56
sub_read_line_subviewer
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:261
sub_read_line_vplayer
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:364
sub_read_line_rt
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:410
strcasestr
char * strcasestr(const char *str, const char *pattern)
Definition: xine_demux_sputext.cpp:58
subtitle_t::start
int64_t start
Starting time in msec or starting frame.
Definition: xine_demux_sputext.h:31
FORMAT_MPSUB
#define FORMAT_MPSUB
Definition: xine_demux_sputext.h:22
demux_sputext_t::format
int format
Definition: xine_demux_sputext.h:55
FORMAT_SUBRIP09
#define FORMAT_SUBRIP09
Definition: xine_demux_sputext.h:26
demux_sputext_t::subtitles
std::vector< subtitle_t > subtitles
Definition: xine_demux_sputext.h:52
sub_read_line_subrip09
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:863
ERR
#define ERR
Definition: xine_demux_sputext.cpp:50
FORMAT_SUBVIEWER2
#define FORMAT_SUBVIEWER2
Definition: xine_demux_sputext.h:25
demux_sputext_t::rbuffer_len
off_t rbuffer_len
Definition: xine_demux_sputext.h:40
FORMAT_MICRODVD
#define FORMAT_MICRODVD
Definition: xine_demux_sputext.h:14