MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <array>
36 #include <cctype>
37 #include <cstdio>
38 #include <cstdlib>
39 #include <cstring>
40 #include <iostream>
41 #include <fcntl.h>
42 #include <cctype>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <unistd.h>
47 
48 #include "mythlogging.h"
49 
50 #define LOG_MODULE "demux_sputext"
51 #define LOG_VERBOSE
52 /*
53 #define LOG
54 */
55 
56 #define ERR ((void *)-1)
57 #define LINE_LEN 1000
58 #define LINE_LEN_QUOT "1000"
59 
60 /*
61  * Demuxer code start
62  */
63 
64 static bool isEol(char p) {
65  return (p=='\r' || p=='\n' || p=='\0');
66 }
67 
68 static inline void trail_space(std::string& str)
69 {
70  auto mark = str.find_last_of(" \t\r\n");
71  if (mark != std::string::npos)
72  str.erase(mark);
73  mark = str.find_first_not_of(" \t\r\n");
74  if (mark != std::string::npos)
75  str.erase(0, mark);
76 }
77 
78 /*
79  *
80  */
81 static char *read_line_from_input(demux_sputext_t *demuxstr, std::string& line) {
82 
83  line.reserve(LINE_LEN);
84  if ((line.capacity() - demuxstr->buf.size()) > 512) {
85  off_t nread = line.capacity() - demuxstr->buf.size();
86  nread = std::min(nread, demuxstr->rbuffer_len - demuxstr->rbuffer_cur);
87  if (nread < 0) {
88  printf("read failed.\n");
89  return nullptr;
90  }
91  if (nread > 0) {
92  demuxstr->buf.append(&demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
93  nread);
94  demuxstr->rbuffer_cur += nread;
95  }
96  }
97 
98  size_t index = demuxstr->buf.find('\n');
99  if (index != std::string::npos) {
100  line.assign(demuxstr->buf, 0, index+1);
101  demuxstr->buf.erase(0, index+1);
102  return line.data();
103  }
104  if (!demuxstr->buf.empty()) {
105  line = demuxstr->buf;
106  demuxstr->buf.clear();
107  return line.data();
108  }
109 
110  return nullptr;
111 }
112 
113 
115 
116  static std::string s_line;
117  static char *s_s = nullptr;
118  std::string text;
119 
120  current->start = 0;
121  current->end = -1;
122  int state = 0;
123 
124  /* read the first line */
125  if (!s_s)
126  if (!(s_s = read_line_from_input(demuxstr, s_line))) return nullptr;
127 
128  do {
129  switch (state) {
130 
131  case 0: /* find "START=" */
132  s_s = strcasestr (s_s, "Start=");
133  if (s_s) {
134  current->start = strtol (s_s + 6, &s_s, 0) / 10;
135  state = 1; continue;
136  }
137  break;
138 
139  case 1: /* find "<P" */
140  if ((s_s = strcasestr (s_s, "<P"))) { s_s += 2; state = 2; continue; }
141  break;
142 
143  case 2: /* find ">" */
144  if ((s_s = strchr (s_s, '>'))) { s_s++; state = 3; text.clear(); continue; }
145  break;
146 
147  case 3: /* get all text until '<' appears */
148  if (*s_s == '\0') { break; }
149  else if (strncasecmp (s_s, "&nbsp;", 6) == 0) { text += ' '; s_s += 6; }
150  else if (*s_s == '\r') { s_s++; }
151  else if (strncasecmp (s_s, "<br>", 4) == 0 || *s_s == '\n') {
152  trail_space (text);
153  if (!text.empty())
154  current->text.push_back(text);
155  text.clear();
156  if (*s_s == '\n') s_s++; else s_s += 4;
157  }
158  else if (*s_s == '<') { state = 4; }
159  else text += *s_s++;
160  continue;
161 
162  case 4: /* get current->end or skip <TAG> */
163  char *q = strcasestr (s_s, "start=");
164  if (q) {
165  current->end = strtol (q + 6, &q, 0) / 10 - 1;
166  trail_space (text);
167  if (!text.empty())
168  current->text.push_back(text);
169  if (!current->text.empty()) { state = 99; break; }
170  state = 0; continue;
171  }
172  s_s = strchr (s_s, '>');
173  if (s_s) { s_s++; state = 3; continue; }
174  break;
175  }
176 
177  /* read next line */
178  if (state != 99 && !(s_s = read_line_from_input (demuxstr, s_line)))
179  return nullptr;
180 
181  } while (state != 99);
182 
183  return current;
184 }
185 
186 
187 
200 static char *sub_readtext(char *source, std::string& dest) {
201  if (source == nullptr)
202  return nullptr;
203 
204  int len=0;
205  char *p=source;
206 
207  while ( !isEol(*p) && *p!= '|' ) {
208  p++,len++;
209  }
210 
211  dest.assign(source, len);
212 
213  while (*p=='\r' || *p=='\n' || *p=='|')
214  p++;
215 
216  if (*p) return p; /* not-last text field */
217  return (char*)nullptr; /* last text field */
218 }
219 
221 
222  std::string line; line.reserve(LINE_LEN + 1);
223  std::string line2; line2.reserve(LINE_LEN + 1);
224 
225  current->end=-1;
226  do {
227  if (!read_line_from_input (demuxstr, line)) return nullptr;
228  } while ((sscanf (line.c_str(), "{%" SCNd64 "}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2.data()) !=2) &&
229  (sscanf (line.c_str(), "{%" SCNd64 "}{%" SCNd64 "}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2.data()) !=3)
230  );
231 
232  char *next=line2.data();
233  std::string out {};
234  while ((next = sub_readtext (next, out))) {
235  if (next==ERR) return (subtitle_t *)ERR;
236  current->text.push_back(out);
237  }
238  current->text.push_back(out);
239 
240  return current;
241 }
242 
244 
245  std::string line;
246  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
247 
248  while (true) {
249  if (!read_line_from_input(demuxstr, line)) return nullptr;
250  if (sscanf (line.c_str(), "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
251  if (sscanf (line.c_str(), "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
252  continue;
253  }
254  current->start = a1*360000+a2*6000+a3*100+a4;
255  current->end = b1*360000+b2*6000+b3*100+b4;
256 
257  if (!read_line_from_input(demuxstr, line))
258  return nullptr;
259 
260  char *p=line.data();
261  while (true) {
262  char *q=nullptr;
263  int len = 0;
264  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
265  (strncasecmp(p,"[br]",4) != 0); p++,len++);
266  current->text.emplace_back(q, len);
267  if (!*p || *p=='\r' || *p=='\n') break;
268  if (*p=='[') while (*p++!=']');
269  if (*p=='|') p++;
270  }
271  break;
272  }
273  return current;
274 }
275 
277  std::string line;
278  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
279  int i = 0;
280 
281  do {
282  if(!read_line_from_input(demuxstr,line))
283  return nullptr;
284  i = sscanf(line.c_str(),"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
285  } while(i < 8);
286  current->start = a1*360000+a2*6000+a3*100+a4/10;
287  current->end = b1*360000+b2*6000+b3*100+b4/10;
288  bool end_sub = false;
289  do {
290  char *p = nullptr; /* pointer to the curently read char */
291  std::string temp_line; /* subtitle line that will be transfered to current->text[i] */
292  temp_line.reserve(SUB_BUFSIZE);
293  if(!read_line_from_input(demuxstr,line))
294  return (!current->text.empty()) ? current : nullptr;
295  for (p=line.data(); *p!='\0' && !end_sub; p++) {
296  bool eol = false;
297  switch(*p) {
298  case '\\':
299  if(*(p+1)=='N' || *(p+1)=='n') {
300  eol = true;
301  p++;
302  } else
303  temp_line += *p;
304  break;
305  case '{':
306  // The different code for these if/else clauses is ifdef'd out.
307  // NOLINTNEXTLINE(bugprone-branch-clone)
308  if(strncmp(p,"{\\i1}",5) == 0) {
309 #if 0 /* italic not implemented in renderer, ignore them for now */
310  temp_line.append("<i>");
311 #endif
312  p+=4;
313  }
314  else if(strncmp(p,"{\\i0}",5) == 0) {
315 #if 0 /* italic not implemented in renderer, ignore them for now */
316  temp_line.append("</i>");
317 #endif
318  p+=4;
319  }
320  else
321  temp_line += *p;
322  break;
323  case '\r': /* just ignore '\r's */
324  break;
325  case '\n':
326  eol = true;
327  break;
328  default:
329  temp_line += *p;
330  break;
331  }
332  if (eol) {
333  if (!temp_line.empty())
334  {
335  current->text.push_back(temp_line);
336  temp_line.clear();
337  } else {
338  end_sub = true;
339  }
340  }
341  }
342  } while (!end_sub);
343  return current;
344 }
345 
347  std::string line;
348  int a1=0,a2=0,a3=0,b1=0,b2=0,b3=0; // NOLINT(readability-isolate-declaration)
349 
350  while (current->text.empty()) {
351  if( demuxstr->next_line.empty() ) {
352  /* if the buffer is empty.... */
353  if( !read_line_from_input(demuxstr, line) ) return nullptr;
354  } else {
355  /* ... get the current line from buffer. */
356  line = demuxstr->next_line;
357  demuxstr->next_line.clear();
358  }
359  /* Initialize buffer with next line */
360  if( ! read_line_from_input( demuxstr, demuxstr->next_line) ) {
361  demuxstr->next_line.clear();
362  return nullptr;
363  }
364  if( (sscanf( line.c_str(), "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
365  (sscanf( demuxstr->next_line.c_str(), "%d:%d:%d:", &b1, &b2, &b3) < 3) )
366  continue;
367  current->start = a1*360000+a2*6000+a3*100;
368  current->end = b1*360000+b2*6000+b3*100;
369  if ((current->end - current->start) > LINE_LEN)
370  current->end = current->start + LINE_LEN; /* not too long though. */
371  /* teraz czas na wkopiowanie stringu */
372  char *p=line.data();
373  /* finds the body of the subtitle_t */
374  for (int i=0; i<3; i++){
375  char *p2=strchr( p, ':');
376  if( p2 == nullptr ) break;
377  p=p2+1;
378  }
379 
380  char *next=p;
381  std::string out {};
382  while( (next = sub_readtext( next, out )) ) {
383  if (next==ERR)
384  return (subtitle_t *)ERR;
385  current->text.push_back(out);
386  }
387  current->text.push_back(out);
388  }
389  return current;
390 }
391 
393  /*
394  * TODO: This format uses quite rich (sub/super)set of xhtml
395  * I couldn't check it since DTD is not included.
396  * WARNING: full XML parses can be required for proper parsing
397  */
398  std::string line;
399  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
400  int plen = 0;
401 
402  while (current->text.empty()) {
403  if (!read_line_from_input(demuxstr, line)) return nullptr;
404  /*
405  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
406  * to describe the same moment in time. Maybe there are even more formats in use.
407  */
408  if (sscanf (line.c_str(), R"(<Time Begin="%d:%d:%d.%d" End="%d:%d:%d.%d")",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
409 
410  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
411  if (
412  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&plen) < 4) &&
413  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&b4,&plen) < 5) &&
414  /* (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen) < 5) && */
415  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d.%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&a4,&b2,&b3,&b4,&plen) < 6) &&
416  (sscanf (line.c_str(), R"(<%*[tT]ime %*[bB]egin="%d:%d:%d.%d" %*[Ee]nd="%d:%d:%d.%d"%*[^<]<clear/>%n)",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen) < 8)
417  )
418  continue;
419  current->start = a1*360000+a2*6000+a3*100+a4/10;
420  current->end = b1*360000+b2*6000+b3*100+b4/10;
421  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
422  size_t index = line.find("<clear/>");
423  char *next = (index != std::string::npos) ? &line[index+8] : nullptr;
424  std::string out {};
425  while ((next = sub_readtext (next, out))) {
426  if (next==ERR)
427  return (subtitle_t *)ERR;
428  current->text.push_back(out);
429  }
430  current->text.push_back(out);
431  }
432  return current;
433 }
434 
436  int comma = 0;
437  static int s_maxComma = 32; /* let's use 32 for the case that the */
438  /* amount of commas increase with newer SSA versions */
439 
440  int hour1 = 0;
441  int min1 = 0;
442  int sec1 = 0;
443  int hunsec1 = 0;
444  int hour2 = 0;
445  int min2 = 0;
446  int sec2 = 0;
447  int hunsec2 = 0;
448  int nothing = 0;
449  std::string line;
450  std::string line3; line3.resize(LINE_LEN);
451  char *tmp = nullptr;
452 
453  do {
454  if (!read_line_from_input(demuxstr, line)) return nullptr;
455  } while (sscanf (line.data(), "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
456  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
457  &hour1, &min1, &sec1, &hunsec1,
458  &hour2, &min2, &sec2, &hunsec2,
459  line3.data()) < 9
460  &&
461  sscanf (line.data(), "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
462  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
463  &hour1, &min1, &sec1, &hunsec1,
464  &hour2, &min2, &sec2, &hunsec2,
465  line3.data()) < 9 );
466 
467  size_t index = line3.find(',');
468  if (index == std::string::npos)
469  return nullptr;
470  char *line2 = &line3[index];
471 
472  for (comma = 4; comma < s_maxComma; comma ++)
473  {
474  tmp = line2;
475  if(!(tmp=strchr(++tmp, ','))) break;
476  if(*(++tmp) == ' ') break;
477  /* a space after a comma means we're already in a sentence */
478  line2 = tmp;
479  }
480 
481  if(comma < s_maxComma)s_maxComma = comma;
482  /* eliminate the trailing comma */
483  if(*line2 == ',') line2++;
484 
485  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
486  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
487 
488  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
489  current->text.emplace_back(line2, tmp-line2);
490  line2=tmp+2;
491  }
492 
493  current->text.emplace_back(line2);
494 
495  return current;
496 }
497 
498 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
499  * From MPlayer subreader.c :
500  *
501  * PJS subtitles reader.
502  * That's the "Phoenix Japanimation Society" format.
503  * I found some of them in http://www.scriptsclub.org/ (used for anime).
504  * The time is in tenths of second.
505  *
506  * by set, based on code by szabi (dunnowhat sub format ;-)
507  */
508 
510  std::string line;
511 
512  if (!read_line_from_input(demuxstr, line))
513  return nullptr;
514  size_t mark = line.find_first_not_of(" \t\r\n");
515  if (mark != std::string::npos)
516  line.erase(0, mark);
517  if (line.empty())
518  return nullptr;
519  if (sscanf (line.data(), "%" SCNd64 ",%" SCNd64 ",", &(current->start),
520  &(current->end)) <2)
521  return (subtitle_t *)ERR;
522  /* the files I have are in tenths of second */
523  current->start *= 10;
524  current->end *= 10;
525 
526  /* copy the string to the text buffer */
527  auto start = line.find('\"');
528  if (start == std::string::npos)
529  return (subtitle_t *)ERR;
530  auto end = line.find('\"', start + 1);
531  if (end == std::string::npos)
532  return (subtitle_t *)ERR;
533  current->text.push_back(line.substr(start+1, end));
534 
535  return current;
536 }
537 
539  std::string line;
540  float a = NAN;
541  float b = NAN;
542 
543  do {
544  if (!read_line_from_input(demuxstr, line))
545  return nullptr;
546  } while (sscanf (line.c_str(), "%f %f", &a, &b) !=2);
547 
548  demuxstr->mpsub_position += (a*100.0F);
549  current->start = (int) demuxstr->mpsub_position;
550  demuxstr->mpsub_position += (b*100.0F);
551  current->end = (int) demuxstr->mpsub_position;
552 
553  while (true) {
554  if (!read_line_from_input(demuxstr, line))
555  return (!current->text.empty()) ? current : nullptr;
556 
557  size_t mark = line.find_first_not_of(" \t\r\n");
558  if (mark != std::string::npos)
559  line.erase(0, mark);
560 
561  if (isEol(line[0]) && !current->text.empty())
562  return current;
563 
564  if (isEol(line[0]))
565  return nullptr;
566 
567  char *q = nullptr;
568  for (q=line.data(); !isEol(*q); q++);
569  *q='\0';
570  line.resize(strlen(line.c_str()));
571  if (!line.empty()) {
572  current->text.push_back(line);
573  /* printf(">%s<\n",line.data()); */
574  } else {
575  if (!current->text.empty())
576  return current;
577  return nullptr;
578  }
579  }
580 
581  return nullptr;
582 }
583 
585  std::string line;
586 
587  while (true) {
588  /* try to locate next subtitle_t */
589  if (!read_line_from_input(demuxstr, line))
590  return nullptr;
591  if (!(sscanf (line.c_str(), "-->> %" SCNd64, &(current->start)) <1))
592  break;
593  }
594 
595  while (read_line_from_input(demuxstr, line))
596  {
597  std::string out {};
598  sub_readtext(line.data(),out);
599  if (out.empty())
600  break;
601  current->text.push_back(out);
602  current->end = -1;
603  }
604  return (!current->text.empty())? current : nullptr;
605 }
606 
608  std::string line1;
609  std::string line2;
610  std::string directive; directive.resize(LINE_LEN);
611  char *p = nullptr;
612  char *q = nullptr;
613  unsigned a1=0, a2=0, a3=0, a4=0, b1=0, b2=0, b3=0, b4=0; // NOLINT(readability-isolate-declaration)
614  unsigned comment = 0;
615  static uint32_t s_jacoTimeRes = 30;
616  static uint32_t s_jacoShift = 0;
617 
618  while (current->text.empty()) {
619  if (!read_line_from_input(demuxstr, line1)) {
620  return nullptr;
621  }
622  // Support continuation lines
623  if (line1.size() >= 2) {
624  while ((line1[line1.size()-2] == '\\') && (line1[line1.size()-1] == '\n')) {
625  line1.resize(line1.size()-2);
626  if (!read_line_from_input(demuxstr, line2))
627  return nullptr;
628  size_t index = line2.find_first_not_of(" \t\r\n");
629  if (index != std::string::npos)
630  line2.erase(0, index);
631  line1 += line2;
632  }
633  }
634  line2.resize(0);
635  line2.resize(LINE_LEN);
636  if (sscanf
637  (line1.c_str(), "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
638  &b1, &b2, &b3, &b4, line2.data()) < 9) {
639  if (sscanf(line1.data(), "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2.data()) < 3) {
640  if (line1[0] == '#') {
641  int hours = 0;
642  int minutes = 0;
643  int seconds = 0;
644  int delta = 0;
645  uint32_t units = s_jacoShift;
646  switch (toupper(line1[1])) {
647  case 'S':
648  if (isalpha(line1[2])) {
649  delta = 6;
650  } else {
651  delta = 2;
652  }
653  if (sscanf(&line1[delta], "%d", &hours)) {
654  int inverter = 1;
655  if (hours < 0) {
656  hours *= -1;
657  inverter = -1;
658  }
659  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
660  if (sscanf
661  (&line1[delta], "%*d:%*d:%d",
662  &seconds)) {
663  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
664  &units);
665  } else {
666  hours = 0;
667  sscanf(&line1[delta], "%d:%d.%u",
668  &minutes, &seconds, &units);
669  minutes *= inverter;
670  }
671  } else {
672  hours = minutes = 0;
673  sscanf(&line1[delta], "%d.%u", &seconds,
674  &units);
675  seconds *= inverter;
676  }
677  s_jacoShift =
678  ((hours * 3600 + minutes * 60 +
679  seconds) * s_jacoTimeRes +
680  units) * inverter;
681  }
682  break;
683  case 'T':
684  if (isalpha(line1[2])) {
685  delta = 8;
686  } else {
687  delta = 2;
688  }
689  sscanf(&line1[delta], "%u", &s_jacoTimeRes);
690  break;
691  }
692  }
693  continue;
694  }
695  current->start =
696  (unsigned long) ((a4 + s_jacoShift) * 100.0 /
697  s_jacoTimeRes);
698  current->end =
699  (unsigned long) ((b4 + s_jacoShift) * 100.0 /
700  s_jacoTimeRes);
701  } else {
702  current->start =
703  (unsigned
704  long) (((a1 * 3600 + a2 * 60 + a3) * s_jacoTimeRes + a4 +
705  s_jacoShift) * 100.0 / s_jacoTimeRes);
706  current->end =
707  (unsigned
708  long) (((b1 * 3600 + b2 * 60 + b3) * s_jacoTimeRes + b4 +
709  s_jacoShift) * 100.0 / s_jacoTimeRes);
710  }
711  p = line2.data();
712  while ((*p == ' ') || (*p == '\t')) {
713  ++p;
714  }
715  if (isalpha(*p)||*p == '[') {
716  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive.data(), line1.data()) < 2)
717  return (subtitle_t *)ERR;
718  directive.resize(strlen(directive.c_str()));
719  std::transform(directive.begin(), directive.end(), directive.begin(),
720  [](unsigned char c){ return std::toupper(c);});
721  if ( (directive.find("RDB") != std::string::npos)
722  || (directive.find("RDC") != std::string::npos)
723  || (directive.find("RLB") != std::string::npos)
724  || (directive.find("RLG") != std::string::npos)) {
725  continue;
726  }
727  /* no alignment */
728 #if 0
729  if (directive.find("JL") != std::string::npos) {
730  current->alignment = SUB_ALIGNMENT_HLEFT;
731  } else if (directive.find("JR") != std::string::npos) {
732  current->alignment = SUB_ALIGNMENT_HRIGHT;
733  } else {
734  current->alignment = SUB_ALIGNMENT_HCENTER;
735  }
736 #endif
737  line2 = line1;
738  p = line2.data();
739  }
740  for (q = line1.data(); (!isEol(*p)); ++p) {
741  switch (*p) {
742  case '{':
743  comment++;
744  break;
745  case '}':
746  if (comment) {
747  --comment;
748  /* the next line to get rid of a blank after the comment */
749  if ((*(p + 1)) == ' ')
750  p++;
751  }
752  break;
753  case '~':
754  if (!comment) {
755  *q = ' ';
756  ++q;
757  }
758  break;
759  case ' ':
760  case '\t':
761  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
762  break;
763  if (!comment) {
764  *q = ' ';
765  ++q;
766  }
767  break;
768  case '\\':
769  if (*(p + 1) == 'n') {
770  *q = '\0';
771  q = line1.data();
772  current->text.push_back(line1);
773  ++p;
774  break;
775  }
776  if ((toupper(*(p + 1)) == 'C')
777  || (toupper(*(p + 1)) == 'F')) {
778  ++p,++p;
779  break;
780  }
781  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
782  /* actually this means "insert current date here" */
783  (*(p + 1) == 'D') ||
784  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
785  (*(p + 1) == 'N') ||
786  /* actually this means "insert current time here" */
787  (*(p + 1) == 'T') ||
788  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
789  ++p;
790  break;
791  }
792  if ((*(p + 1) == '\\') ||
793  (*(p + 1) == '~') || (*(p + 1) == '{')) {
794  ++p;
795  } else if (isEol(*(p + 1))) {
796  std::string tmpstr {};
797  if (!read_line_from_input(demuxstr, tmpstr))
798  return nullptr;
799  trail_space(tmpstr);
800  // The std::string addition can reallocate...
801  size_t offset = p - line2.data();
802  line2 += tmpstr;
803  p = line2.data() + offset;
804  break;
805  }
806  // Checked xine-lib-1.2.8. No fix there. Seems like it
807  // should be a break.
808  break;
809  default:
810  if (!comment) {
811  *q = *p;
812  ++q;
813  }
814  }
815  }
816  *q = '\0';
817  current->text.push_back(line1);
818  }
819  return current;
820 }
821 
823  std::string line;
824  int a1=0,a2=0,a3=0,a4=0; // NOLINT(readability-isolate-declaration)
825 
826  while (current->text.empty()) {
827  if (!read_line_from_input(demuxstr, line)) return nullptr;
828  if (line[0]!='{')
829  continue;
830  if (sscanf (line.data(), "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4) < 4)
831  continue;
832  current->start = a1*360000+a2*6000+a3*100+a4/10;
833  for (;;) {
834  if (!read_line_from_input(demuxstr, line)) break;
835  if (line[0]=='}') break;
836  size_t len = line.find_first_of("\n\r");
837  if (len == 0)
838  break;
839  current->text.push_back(line.substr(0, len));
840  }
841  }
842  return current;
843 }
844 
846  std::string line;
847  int h = 0;
848  int m = 0;
849  int s = 0;
850 
851  do {
852  if (!read_line_from_input (demuxstr, line)) return nullptr;
853  } while (sscanf (line.data(), "[%d:%d:%d]", &h, &m, &s) != 3);
854 
855  if (!read_line_from_input (demuxstr, line)) return nullptr;
856 
857  current->start = 360000 * h + 6000 * m + 100 * s;
858  current->end = -1;
859 
860  char *next=line.data();
861  std::string out {};
862  while ((next = sub_readtext (next, out))) {
863  if (next==ERR) return (subtitle_t *)ERR;
864  current->text.push_back(out);
865  }
866  current->text.push_back(out);
867 
868  return current;
869 }
870 
871 /* Code from subreader.c of MPlayer
872 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
873 */
874 
876  std::string line;
877  std::string line2; line2.resize(LINE_LEN);
878 
879  do {
880  if (!read_line_from_input (demuxstr, line)) return nullptr;
881  } while ((sscanf (line.data(),
882  "[%" SCNd64 "][%" SCNd64 "]%" LINE_LEN_QUOT "[^\r\n]",
883  &(current->start), &(current->end), line2.data()) < 3));
884  current->start *= 10;
885  current->end *= 10;
886 
887  char *p=line2.data();
888  char *next=p;
889  std::string out {};
890  while ((next = sub_readtext (next, out))) {
891  if (next == ERR) {return (subtitle_t *)ERR;}
892  current->text.push_back(out);
893  }
894  current->text.push_back(out);
895 
896  return current;
897 }
898 
899 
900 static int sub_autodetect (demux_sputext_t *demuxstr) {
901 
902  std::string line;
903  int i = 0;
904  int j = 0;
905  char p = 0;
906 
907  while (j < 100) {
908  j++;
909  if (!read_line_from_input(demuxstr, line))
910  return FORMAT_UNKNOWN;
911 
912  std::transform(line.begin(), line.end(), line.begin(),
913  [](unsigned char c){ return std::tolower(c);});
914 
915  if ((sscanf (line.data(), "{%d}{}", &i)==1) ||
916  (sscanf (line.data(), "{%d}{%d}", &i, &i)==2)) {
917  demuxstr->uses_time=0;
918  return FORMAT_MICRODVD;
919  }
920 
921  if (sscanf (line.data(), "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
922  demuxstr->uses_time=1;
923  return FORMAT_SUBRIP;
924  }
925 
926  if (sscanf (line.data(), "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
927  demuxstr->uses_time=1;
928  return FORMAT_SUBVIEWER;
929  }
930 
931  if (sscanf (line.data(), "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
932  demuxstr->uses_time=1;
933  return FORMAT_SUBVIEWER;
934  }
935 
936  if (line.find("<sami>") != std::string::npos) {
937  demuxstr->uses_time=1;
938  return FORMAT_SAMI;
939  }
940  // Sscanf stops looking at the format string once it populates the
941  // last argument, so it never validates the colon after the
942  // seconds. Add a final "the rest of the line" argument to get
943  // that validation, so that JACO subtitles can be distinguished
944  // from this format.
945  std::string line2; line2.resize(LINE_LEN);
946  if (sscanf (line.data(), "%d:%d:%d:%" LINE_LEN_QUOT "[^\n\r]",
947  &i, &i, &i, line2.data() )==4) {
948  demuxstr->uses_time=1;
949  return FORMAT_VPLAYER;
950  }
951  /*
952  * A RealText format is a markup language, starts with <window> tag,
953  * options (behaviour modifiers) are possible.
954  */
955  if (line.find("<window") != std::string::npos) {
956  demuxstr->uses_time=1;
957  return FORMAT_RT;
958  }
959  if ((line.find("dialogue: marked") != std::string::npos) ||
960  (line.find("dialogue: ") != std::string::npos)) {
961  demuxstr->uses_time=1;
962  return FORMAT_SSA;
963  }
964  if (sscanf (line.data(), "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
965  demuxstr->uses_time=0;
966  return FORMAT_PJS;
967  }
968  if (sscanf (line.data(), "format=%d", &i) == 1) {
969  demuxstr->uses_time=0;
970  return FORMAT_MPSUB;
971  }
972  if (sscanf (line.data(), "format=tim%c", &p)==1 && p=='e') {
973  demuxstr->uses_time=1;
974  return FORMAT_MPSUB;
975  }
976  if (line.find("-->>") != std::string::npos) {
977  demuxstr->uses_time=0;
978  return FORMAT_AQTITLE;
979  }
980  if (sscanf(line.data(), "@%d @%d", &i, &i) == 2 ||
981  sscanf(line.data(), "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
982  demuxstr->uses_time = 1;
983  return FORMAT_JACOBSUB;
984  }
985  if (sscanf(line.data(), "{t %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
986  demuxstr->uses_time = 1;
987  return FORMAT_SUBVIEWER2;
988  }
989  if (sscanf(line.data(), "[%d:%d:%d]", &i, &i, &i) == 3) {
990  demuxstr->uses_time = 1;
991  return FORMAT_SUBRIP09;
992  }
993 
994  if (sscanf (line.data(), "[%d][%d]", &i, &i) == 2) {
995  demuxstr->uses_time = 1;
996  return FORMAT_MPL2;
997  }
998  }
999  return FORMAT_UNKNOWN; /* too many bad lines */
1000 }
1001 
1002 // These functions all return either 1) nullptr, 2) (subtitle_t*)ERR,
1003 // or 3) a pointer to the dest parameter.
1005 const std::array<read_func_ptr, 14> read_func
1021 };
1022 
1023 bool sub_read_file (demux_sputext_t *demuxstr) {
1024 
1025  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1026  demuxstr->rbuffer_cur = 0;
1027  demuxstr->buf.clear();
1028  demuxstr->buf.reserve(SUB_BUFSIZE);
1029 
1030  demuxstr->format=sub_autodetect (demuxstr);
1031  if (demuxstr->format==FORMAT_UNKNOWN) {
1032  return false;
1033  }
1034 
1035  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1036 
1037  /* Rewind */
1038  demuxstr->rbuffer_cur = 0;
1039  demuxstr->buf.clear();
1040 
1041  demuxstr->num=0;
1042  int timeout = MAX_TIMEOUT;
1043 
1044  if (demuxstr->uses_time) timeout *= 100;
1045  else timeout *= 10;
1046 
1047  while(true) {
1048  subtitle_t dummy {};
1049  subtitle_t *sub = read_func[demuxstr->format] (demuxstr, &dummy);
1050  if (!sub) {
1051  break; /* EOF */
1052  }
1053 
1054  if (sub==ERR)
1055  ++demuxstr->errs;
1056  else {
1057  demuxstr->subtitles.push_back(*sub);
1058  if (demuxstr->num > 0 && demuxstr->subtitles[demuxstr->num-1].end == -1) {
1059  /* end time not defined in the subtitle */
1060  if (timeout > sub->start - demuxstr->subtitles[demuxstr->num-1].start) {
1061  demuxstr->subtitles[demuxstr->num-1].end = sub->start;
1062  } else {
1063  demuxstr->subtitles[demuxstr->num-1].end = demuxstr->subtitles[demuxstr->num-1].start + timeout;
1064  }
1065  }
1066  ++demuxstr->num; /* Error vs. Valid */
1067  }
1068  }
1069  /* timeout of last subtitle */
1070  if (demuxstr->num > 0 && demuxstr->subtitles[demuxstr->num-1].end == -1)
1071  {
1072  demuxstr->subtitles[demuxstr->num-1].end = demuxstr->subtitles[demuxstr->num-1].start + timeout;
1073  }
1074 
1075 #if DEBUG_XINE_DEMUX_SPUTEXT
1076  {
1077  char buffer[1024];
1078 
1079  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1080 
1081  if(demuxstr->errs)
1082  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1083  else
1084  strcat(buffer, "\n");
1085 
1086  printf("%s", buffer);
1087  }
1088 #endif
1089 
1090  return true;
1091 }
sub_read_line_jacobsub
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:607
sub_read_file
bool sub_read_file(demux_sputext_t *demuxstr)
Definition: xine_demux_sputext.cpp:1023
demux_sputext_t::rbuffer_cur
off_t rbuffer_cur
Definition: xine_demux_sputext.h:41
build_compdb.dest
dest
Definition: build_compdb.py:9
sub_read_line_mpsub
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:538
hardwareprofile.smolt.timeout
float timeout
Definition: smolt.py:103
demux_sputext_t::errs
int errs
Definition: xine_demux_sputext.h:51
demux_sputext_t::uses_time
int uses_time
Definition: xine_demux_sputext.h:50
isEol
static bool isEol(char p)
Definition: xine_demux_sputext.cpp:64
demux_sputext_t
Definition: xine_demux_sputext.h:37
sub_autodetect
static int sub_autodetect(demux_sputext_t *demuxstr)
Definition: xine_demux_sputext.cpp:900
FORMAT_RT
#define FORMAT_RT
Definition: xine_demux_sputext.h:19
FORMAT_VPLAYER
#define FORMAT_VPLAYER
Definition: xine_demux_sputext.h:18
sub_read_line_aqt
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:584
sub_read_line_sami
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:114
sub_read_line_ssa
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:435
read_line_from_input
static char * read_line_from_input(demux_sputext_t *demuxstr, std::string &line)
Definition: xine_demux_sputext.cpp:81
demux_sputext_t::rbuffer_text
char * rbuffer_text
Definition: xine_demux_sputext.h:39
MAX_TIMEOUT
#define MAX_TIMEOUT
Definition: xine_demux_sputext.h:9
MythDate::current
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
tmp
static guint32 * tmp
Definition: goom_core.cpp:31
demux_sputext_t::num
int num
Definition: xine_demux_sputext.h:53
trail_space
static void trail_space(std::string &str)
Definition: xine_demux_sputext.cpp:68
read_func_ptr
subtitle_t *(*)(demux_sputext_t *demuxstr, subtitle_t *dest) read_func_ptr
Definition: xine_demux_sputext.cpp:1004
sub_read_line_mpl2
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:875
FORMAT_SUBVIEWER
#define FORMAT_SUBVIEWER
Definition: xine_demux_sputext.h:16
sub_read_line_microdvd
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:220
mythlogging.h
FORMAT_SSA
#define FORMAT_SSA
Definition: xine_demux_sputext.h:20
demux_sputext_t::buf
std::string buf
Definition: xine_demux_sputext.h:45
hardwareprofile.config.p
p
Definition: config.py:33
mark
Definition: lang.cpp:21
sub_read_line_subrip
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:276
demux_sputext_t::mpsub_position
float mpsub_position
Definition: xine_demux_sputext.h:48
p2
static guint32 * p2
Definition: goom_core.cpp:31
SUB_BUFSIZE
#define SUB_BUFSIZE
Definition: xine_demux_sputext.h:8
LINE_LEN
#define LINE_LEN
Definition: xine_demux_sputext.cpp:57
FORMAT_MPL2
#define FORMAT_MPL2
Definition: xine_demux_sputext.h:27
FORMAT_JACOBSUB
#define FORMAT_JACOBSUB
Definition: xine_demux_sputext.h:24
FORMAT_AQTITLE
#define FORMAT_AQTITLE
Definition: xine_demux_sputext.h:23
FORMAT_UNKNOWN
#define FORMAT_UNKNOWN
Definition: xine_demux_sputext.h:13
subtitle_t
Definition: xine_demux_sputext.h:29
hardwareprofile.smolt.long
long
Definition: smolt.py:76
FORMAT_SUBRIP
#define FORMAT_SUBRIP
Definition: xine_demux_sputext.h:15
sub_read_line_subviewer2
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:822
xine_demux_sputext.h
LINE_LEN_QUOT
#define LINE_LEN_QUOT
Definition: xine_demux_sputext.cpp:58
FORMAT_PJS
#define FORMAT_PJS
Definition: xine_demux_sputext.h:21
sub_readtext
static char * sub_readtext(char *source, std::string &dest)
Extract the next token from a string.
Definition: xine_demux_sputext.cpp:200
off_t
#define off_t
Definition: mythiowrapper.cpp:238
FORMAT_SAMI
#define FORMAT_SAMI
Definition: xine_demux_sputext.h:17
read_func
const std::array< read_func_ptr, 14 > read_func
Definition: xine_demux_sputext.cpp:1006
sub_read_line_pjs
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:509
demux_sputext_t::next_line
std::string next_line
Definition: xine_demux_sputext.h:56
sub_read_line_subviewer
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:243
sub_read_line_vplayer
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:346
sub_read_line_rt
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:392
subtitle_t::start
int64_t start
Starting time in msec or starting frame.
Definition: xine_demux_sputext.h:31
FORMAT_MPSUB
#define FORMAT_MPSUB
Definition: xine_demux_sputext.h:22
demux_sputext_t::format
int format
Definition: xine_demux_sputext.h:55
FORMAT_SUBRIP09
#define FORMAT_SUBRIP09
Definition: xine_demux_sputext.h:26
demux_sputext_t::subtitles
std::vector< subtitle_t > subtitles
Definition: xine_demux_sputext.h:52
sub_read_line_subrip09
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
Definition: xine_demux_sputext.cpp:845
ERR
#define ERR
Definition: xine_demux_sputext.cpp:56
FORMAT_SUBVIEWER2
#define FORMAT_SUBVIEWER2
Definition: xine_demux_sputext.h:25
demux_sputext_t::rbuffer_len
off_t rbuffer_len
Definition: xine_demux_sputext.h:40
FORMAT_MICRODVD
#define FORMAT_MICRODVD
Definition: xine_demux_sputext.h:14