MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <cctype>
36 #include <cstdio>
37 #include <cstdlib>
38 #include <cstring>
39 #include <fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <unistd.h>
44 
45 #define LOG_MODULE "demux_sputext"
46 #define LOG_VERBOSE
47 /*
48 #define LOG
49 */
50 
51 #define ERR ((void *)-1)
52 #define LINE_LEN 1000
53 #define LINE_LEN_QUOT "1000"
54 
55 /*
56  * Demuxer code start
57  */
58 
59 #define FORMAT_UNKNOWN (-1)
60 #define FORMAT_MICRODVD 0
61 #define FORMAT_SUBRIP 1
62 #define FORMAT_SUBVIEWER 2
63 #define FORMAT_SAMI 3
64 #define FORMAT_VPLAYER 4
65 #define FORMAT_RT 5
66 #define FORMAT_SSA 6 /* Sub Station Alpha */
67 #define FORMAT_PJS 7
68 #define FORMAT_MPSUB 8
69 #define FORMAT_AQTITLE 9
70 #define FORMAT_JACOBSUB 10
71 #define FORMAT_SUBVIEWER2 11
72 #define FORMAT_SUBRIP09 12
73 #define FORMAT_MPL2 13 /*Mplayer sub 2 ?*/
74 
75 static bool eol(char p) {
76  return (p=='\r' || p=='\n' || p=='\0');
77 }
78 
79 static inline void trail_space(char *s) {
80  while (isspace(*s)) {
81  char *copy = s;
82  do {
83  // The clang-tidy warning is wrong. All callers have a null
84  // terminated string. If the null is the first character in the
85  // string, this loop is never called. If not, there's
86  // guaranteed to at least be a second character, even if that
87  // second character is the null.
88  // NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign)
89  copy[0] = copy[1];
90  copy++;
91  } while(*copy != 0);
92  }
93  int i = strlen(s) - 1;
94  while (i > 0 && isspace(s[i]))
95  s[i--] = '\0';
96 }
97 
98 /*
99  * Reimplementation of fgets() using the input->read() method.
100  */
101 static char *read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len) {
102  off_t nread = 0;
103 
104  // Since our RemoteFile code sleeps 200ms whenever we get back less data
105  // than requested, but this code just keeps trying to read until it gets
106  // an error back, we check for empty reads so that we can stop reading
107  // when there is no more data to read
108  if (demuxstr->emptyReads == 0 && (len - demuxstr->buflen) > 512) {
109  nread = len - demuxstr->buflen;
110  if (nread > demuxstr->rbuffer_len - demuxstr->rbuffer_cur)
111  nread = demuxstr->rbuffer_len - demuxstr->rbuffer_cur;
112  if (nread < 0) {
113  printf("read failed.\n");
114  return nullptr;
115  }
116  memcpy(&demuxstr->buf[demuxstr->buflen],
117  &demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
118  nread);
119  demuxstr->rbuffer_cur += nread;
120  }
121 
122  if (!nread)
123  demuxstr->emptyReads++;
124 
125  demuxstr->buflen += nread;
126  demuxstr->buf[demuxstr->buflen] = '\0';
127 
128  char *s = strchr(demuxstr->buf, '\n');
129 
130  if (line && (s || demuxstr->buflen)) {
131 
132  int linelen = s ? (s - demuxstr->buf) + 1 : demuxstr->buflen;
133 
134  memcpy(line, demuxstr->buf, linelen);
135  line[linelen] = '\0';
136 
137  memmove(demuxstr->buf, &demuxstr->buf[linelen], SUB_BUFSIZE - linelen);
138  demuxstr->buflen -= linelen;
139 
140  return line;
141  }
142 
143  return nullptr;
144 }
145 
146 
148 
149  static char s_line[LINE_LEN + 1];
150  static char *s_s = nullptr;
151  char text[LINE_LEN + 1];
152 
153  char *p = nullptr;
154  current->lines = current->start = 0;
155  current->end = -1;
156  int state = 0;
157 
158  /* read the first line */
159  if (!s_s)
160  if (!(s_s = read_line_from_input(demuxstr, s_line, LINE_LEN))) return nullptr;
161 
162  do {
163  switch (state) {
164 
165  case 0: /* find "START=" */
166  s_s = strstr (s_s, "Start=");
167  if (s_s) {
168  current->start = strtol (s_s + 6, &s_s, 0) / 10;
169  state = 1; continue;
170  }
171  break;
172 
173  case 1: /* find "<P" */
174  if ((s_s = strstr (s_s, "<P"))) { s_s += 2; state = 2; continue; }
175  break;
176 
177  case 2: /* find ">" */
178  if ((s_s = strchr (s_s, '>'))) { s_s++; state = 3; p = text; continue; }
179  break;
180 
181  case 3: /* get all text until '<' appears */
182  if (*s_s == '\0') { break; }
183  else if (*s_s == '<') { state = 4; }
184  else if (strncasecmp (s_s, "&nbsp;", 6) == 0) { *p++ = ' '; s_s += 6; }
185  else if (*s_s == '\r') { s_s++; }
186  else if (strncasecmp (s_s, "<br>", 4) == 0 || *s_s == '\n') {
187  *p = '\0'; p = text; trail_space (text);
188  if (text[0] != '\0')
189  current->text[current->lines++] = strdup (text);
190  if (*s_s == '\n') s_s++; else s_s += 4;
191  }
192  else *p++ = *s_s++;
193  continue;
194 
195  case 4: /* get current->end or skip <TAG> */
196  char *q = strstr (s_s, "Start=");
197  if (q) {
198  current->end = strtol (q + 6, &q, 0) / 10 - 1;
199  *p = '\0'; trail_space (text);
200  if (text[0] != '\0')
201  current->text[current->lines++] = strdup (text);
202  if (current->lines > 0) { state = 99; break; }
203  state = 0; continue;
204  }
205  s_s = strchr (s_s, '>');
206  if (s_s) { s_s++; state = 3; continue; }
207  break;
208  }
209 
210  /* read next line */
211  if (state != 99 && !(s_s = read_line_from_input (demuxstr, s_line, LINE_LEN)))
212  return nullptr;
213 
214  } while (state != 99);
215 
216  return current;
217 }
218 
219 
220 
233 static char *sub_readtext(char *source, char **dest) {
234  int len=0;
235  char *p=source;
236 
237  while ( !eol(*p) && *p!= '|' ) {
238  p++,len++;
239  }
240 
241  if (!dest)
242  return (char*)ERR;
243 
244  *dest= (char *)malloc (len+1);
245  if (!(*dest))
246  return (char*)ERR;
247 
248  strncpy(*dest, source, len);
249  (*dest)[len]=0;
250 
251  while (*p=='\r' || *p=='\n' || *p=='|')
252  p++;
253 
254  if (*p) return p; /* not-last text field */
255  return (char*)nullptr; /* last text field */
256 }
257 
259 
260  char line[LINE_LEN + 1];
261  char line2[LINE_LEN + 1];
262 
263  memset (current, 0, sizeof(subtitle_t));
264 
265  current->end=-1;
266  do {
267  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
268  } while ((sscanf (line, "{%ld}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2) !=2) &&
269  (sscanf (line, "{%ld}{%ld}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2) !=3)
270  );
271 
272  char *p=line2;
273  char *next=p;
274  int i=0;
275  while ((next =sub_readtext (next, &(current->text[i])))) {
276  if (next==ERR) return (subtitle_t *)ERR;
277  i++;
278  if (i>=SUB_MAX_TEXT) {
279  printf ("Too many lines in a subtitle\n");
280  current->lines=i;
281  return current;
282  }
283  }
284  current->lines= ++i;
285 
286  return current;
287 }
288 
290 
291  char line[LINE_LEN + 1];
292  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
293 
294  memset (current, 0, sizeof(subtitle_t));
295 
296  while (true) {
297  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
298  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
299  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
300  continue;
301  }
302  current->start = a1*360000+a2*6000+a3*100+a4;
303  current->end = b1*360000+b2*6000+b3*100+b4;
304 
305  if (!read_line_from_input(demuxstr, line, LINE_LEN))
306  return nullptr;
307 
308  char *p=line;
309  for (current->lines=1; current->lines <= SUB_MAX_TEXT; current->lines++) {
310  char *q=nullptr;
311  int len = 0;
312  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
313  (strncasecmp(p,"[br]",4) != 0); p++,len++);
314  current->text[current->lines-1]=(char *)malloc (len+1);
315  if (!current->text[current->lines-1]) return (subtitle_t *)ERR;
316  strncpy (current->text[current->lines-1], q, len);
317  current->text[current->lines-1][len]='\0';
318  if (!*p || *p=='\r' || *p=='\n') break;
319  if (*p=='[') while (*p++!=']');
320  if (*p=='|') p++;
321  }
322  if (current->lines > SUB_MAX_TEXT) current->lines = SUB_MAX_TEXT;
323  break;
324  }
325  return current;
326 }
327 
329  char line[LINE_LEN + 1];
330  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
331  int i = 0;
332 
333  memset(current,0,sizeof(subtitle_t));
334  do {
335  if(!read_line_from_input(demuxstr,line,LINE_LEN))
336  return nullptr;
337  i = sscanf(line,"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
338  } while(i < 8);
339  current->start = a1*360000+a2*6000+a3*100+a4/10;
340  current->end = b1*360000+b2*6000+b3*100+b4/10;
341  i=0;
342  int end_sub=0;
343  do {
344  char *p = nullptr; /* pointer to the curently read char */
345  char temp_line[SUB_BUFSIZE]; /* subtitle line that will be transfered to current->text[i] */
346  int temp_index = 0; /* ... and its index wich 'points' to the first EMPTY place -> last read char is at temp_index-1 if temp_index>0 */
347  temp_line[SUB_BUFSIZE-1]='\0'; /* just in case... */
348  if(!read_line_from_input(demuxstr,line,LINE_LEN)) {
349  if(i)
350  break; /* if something was read, transmit it */
351  return nullptr; /* if not, repport EOF */
352  }
353  for(temp_index=0,p=line;*p!='\0' && !end_sub && temp_index<SUB_BUFSIZE && i<SUB_MAX_TEXT;p++) {
354  switch(*p) {
355  case '\\':
356  if(*(p+1)=='N' || *(p+1)=='n') {
357  temp_line[temp_index++]='\0'; /* end of curent line */
358  p++;
359  } else
360  temp_line[temp_index++]=*p;
361  break;
362  case '{':
363 #if 0 /* italic not implemented in renderer, ignore them for now */
364  if(!strncmp(p,"{\\i1}",5) && temp_index+3<SUB_BUFSIZE) {
365  temp_line[temp_index++]='<';
366  temp_line[temp_index++]='i';
367  temp_line[temp_index++]='>';
368 #else
369  if(strncmp(p,"{\\i1}",5) == 0) { // NOLINT(bugprone-branch-clone)
370 #endif
371  p+=4;
372  }
373 #if 0 /* italic not implemented in renderer, ignore them for now */
374  else if(!strncmp(p,"{\\i0}",5) && temp_index+4<SUB_BUFSIZE) {
375  temp_line[temp_index++]='<';
376  temp_line[temp_index++]='/';
377  temp_line[temp_index++]='i';
378  temp_line[temp_index++]='>';
379 #else
380  else if(strncmp(p,"{\\i0}",5) == 0) {
381 #endif
382  p+=4;
383  }
384  else
385  temp_line[temp_index++]=*p;
386  break;
387  case '\r': /* just ignore '\r's */
388  break;
389  case '\n':
390  temp_line[temp_index++]='\0';
391  break;
392  default:
393  temp_line[temp_index++]=*p;
394  break;
395  }
396  if(temp_index>0) {
397  if(temp_index==SUB_BUFSIZE)
398  printf("Too many characters in a subtitle line\n");
399  if(temp_line[temp_index-1]=='\0' || temp_index==SUB_BUFSIZE) {
400  if(temp_index>1) { /* more than 1 char (including '\0') -> that is a valid one */
401  current->text[i]=(char *)malloc(temp_index);
402  if(!current->text[i])
403  return (subtitle_t *)ERR;
404  strncpy(current->text[i],temp_line,temp_index); /* temp_index<=SUB_BUFSIZE is always true here */
405  i++;
406  temp_index=0;
407  } else
408  end_sub=1;
409  }
410  }
411  }
412  } while(i<SUB_MAX_TEXT && (end_sub == 0));
413  if(i>=SUB_MAX_TEXT)
414  printf("Too many lines in a subtitle\n");
415  current->lines=i;
416  return current;
417 }
418 
420  char line[LINE_LEN + 1];
421  int a1=0,a2=0,a3=0,b1=0,b2=0,b3=0; // NOLINT(readability-isolate-declaration)
422 
423  memset (current, 0, sizeof(subtitle_t));
424 
425  while (!current->text[0]) {
426  if( demuxstr->next_line[0] == '\0' ) { /* if the buffer is empty.... */
427  if( !read_line_from_input(demuxstr, line, LINE_LEN) ) return nullptr;
428  } else {
429  /* ... get the current line from buffer. */
430  strncpy( line, demuxstr->next_line, LINE_LEN);
431  line[LINE_LEN] = '\0'; /* I'm scared. This makes me feel better. */
432  demuxstr->next_line[0] = '\0'; /* mark the buffer as empty. */
433  }
434  /* Initialize buffer with next line */
435  if( ! read_line_from_input( demuxstr, demuxstr->next_line, LINE_LEN) ) {
436  demuxstr->next_line[0] = '\0';
437  return nullptr;
438  }
439  if( (sscanf( line, "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
440  (sscanf( demuxstr->next_line, "%d:%d:%d:", &b1, &b2, &b3) < 3) )
441  continue;
442  current->start = a1*360000+a2*6000+a3*100;
443  current->end = b1*360000+b2*6000+b3*100;
444  if ((current->end - current->start) > LINE_LEN)
445  current->end = current->start + LINE_LEN; /* not too long though. */
446  /* teraz czas na wkopiowanie stringu */
447  char *p=line;
448  /* finds the body of the subtitle_t */
449  for (int i=0; i<3; i++){
450  char *p2=strchr( p, ':');
451  if( p2 == nullptr ) break;
452  p=p2+1;
453  }
454 
455  char *next=p;
456  int i=0;
457  while( (next = sub_readtext( next, &(current->text[i]))) ) {
458  if (next==ERR)
459  return (subtitle_t *)ERR;
460  i++;
461  if (i>=SUB_MAX_TEXT) {
462  printf("Too many lines in a subtitle\n");
463  current->lines=i;
464  return current;
465  }
466  }
467  current->lines=++i;
468  }
469  return current;
470 }
471 
473  /*
474  * TODO: This format uses quite rich (sub/super)set of xhtml
475  * I couldn't check it since DTD is not included.
476  * WARNING: full XML parses can be required for proper parsing
477  */
478  char line[LINE_LEN + 1];
479  int a1=0,a2=0,a3=0,a4=0,b1=0,b2=0,b3=0,b4=0; // NOLINT(readability-isolate-declaration)
480  int plen = 0;
481 
482  memset (current, 0, sizeof(subtitle_t));
483 
484  while (!current->text[0]) {
485  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
486  /*
487  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
488  * to describe the same moment in time. Maybe there are even more formats in use.
489  */
490  if (sscanf (line, R"(<Time Begin="%d:%d:%d.%d" End="%d:%d:%d.%d")",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
491 
492  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
493  if (
494  (sscanf (line, R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&plen) < 4) &&
495  (sscanf (line, R"(<%*[tT]ime %*[bB]egin="%d:%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&b2,&b3,&b4,&plen) < 5) &&
496  /* (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen) < 5) && */
497  (sscanf (line, R"(<%*[tT]ime %*[bB]egin="%d:%d.%d" %*[Ee]nd="%d:%d.%d"%*[^<]<clear/>%n)",&a2,&a3,&a4,&b2,&b3,&b4,&plen) < 6) &&
498  (sscanf (line, R"(<%*[tT]ime %*[bB]egin="%d:%d:%d.%d" %*[Ee]nd="%d:%d:%d.%d"%*[^<]<clear/>%n)",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen) < 8)
499  )
500  continue;
501  current->start = a1*360000+a2*6000+a3*100+a4/10;
502  current->end = b1*360000+b2*6000+b3*100+b4/10;
503  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
504  char *next = strstr(line,"<clear/>")+8;
505  int i=0;
506  while ((next =sub_readtext (next, &(current->text[i])))) {
507  if (next==ERR)
508  return (subtitle_t *)ERR;
509  i++;
510  if (i>=SUB_MAX_TEXT) {
511  printf("Too many lines in a subtitle\n");
512  current->lines=i;
513  return current;
514  }
515  }
516  current->lines=i+1;
517  }
518  return current;
519 }
520 
522  int comma = 0;
523  static int s_maxComma = 32; /* let's use 32 for the case that the */
524  /* amount of commas increase with newer SSA versions */
525 
526  int hour1 = 0;
527  int min1 = 0;
528  int sec1 = 0;
529  int hunsec1 = 0;
530  int hour2 = 0;
531  int min2 = 0;
532  int sec2 = 0;
533  int hunsec2 = 0;
534  int nothing = 0;
535  char line[LINE_LEN + 1];
536  char line3[LINE_LEN + 1];
537  char *tmp = nullptr;
538 
539  do {
540  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
541  } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
542  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
543  &hour1, &min1, &sec1, &hunsec1,
544  &hour2, &min2, &sec2, &hunsec2,
545  line3) < 9
546  &&
547  sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
548  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
549  &hour1, &min1, &sec1, &hunsec1,
550  &hour2, &min2, &sec2, &hunsec2,
551  line3) < 9 );
552 
553  char *line2=strchr(line3, ',');
554  if (!line2)
555  return nullptr;
556 
557  for (comma = 4; comma < s_maxComma; comma ++)
558  {
559  tmp = line2;
560  if(!(tmp=strchr(++tmp, ','))) break;
561  if(*(++tmp) == ' ') break;
562  /* a space after a comma means we're already in a sentence */
563  line2 = tmp;
564  }
565 
566  if(comma < s_maxComma)s_maxComma = comma;
567  /* eliminate the trailing comma */
568  if(*line2 == ',') line2++;
569 
570  current->lines=0;
571  int num=0;
572  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
573  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
574 
575  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
576  current->text[num]=(char *)malloc(tmp-line2+1);
577  strncpy (current->text[num], line2, tmp-line2);
578  current->text[num][tmp-line2]='\0';
579  line2=tmp+2;
580  num++;
581  current->lines++;
582  if (current->lines >= SUB_MAX_TEXT) return current;
583  }
584 
585  current->text[num]=strdup(line2);
586  current->lines++;
587 
588  return current;
589 }
590 
591 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
592  * From MPlayer subreader.c :
593  *
594  * PJS subtitles reader.
595  * That's the "Phoenix Japanimation Society" format.
596  * I found some of them in http://www.scriptsclub.org/ (used for anime).
597  * The time is in tenths of second.
598  *
599  * by set, based on code by szabi (dunnowhat sub format ;-)
600  */
601 
603  char line[LINE_LEN + 1];
604  char text[LINE_LEN + 1];
605  char *s = nullptr;
606  char *d = nullptr;
607 
608  memset (current, 0, sizeof(subtitle_t));
609 
610  if (!read_line_from_input(demuxstr, line, LINE_LEN))
611  return nullptr;
612  for (s = line; *s && isspace(*s); s++);
613  if (*s == 0)
614  return nullptr;
615  if (sscanf (line, "%ld,%ld,", &(current->start),
616  &(current->end)) <2)
617  return (subtitle_t *)ERR;
618  /* the files I have are in tenths of second */
619  current->start *= 10;
620  current->end *= 10;
621 
622  /* walk to the beggining of the string */
623  for (; *s; s++) if (*s==',') break;
624  if (*s) {
625  for (s++; *s; s++) if (*s==',') break;
626  if (*s) s++;
627  }
628  if (*s!='"') {
629  return (subtitle_t *)ERR;
630  }
631  /* copy the string to the text buffer */
632  for (s++, d=text; *s && *s!='"'; s++, d++)
633  *d=*s;
634  *d=0;
635  current->text[0] = strdup(text);
636  current->lines = 1;
637 
638  return current;
639 }
640 
642  char line[LINE_LEN + 1];
643  float a = NAN;
644  float b = NAN;
645  int num=0;
646 
647  do {
648  if (!read_line_from_input(demuxstr, line, LINE_LEN))
649  return nullptr;
650  } while (sscanf (line, "%f %f", &a, &b) !=2);
651 
652  demuxstr->mpsub_position += (a*100.0F);
653  current->start = (int) demuxstr->mpsub_position;
654  demuxstr->mpsub_position += (b*100.0F);
655  current->end = (int) demuxstr->mpsub_position;
656 
657  while (num < SUB_MAX_TEXT) {
658  if (!read_line_from_input(demuxstr, line, LINE_LEN))
659  return nullptr;
660 
661  char *p=line;
662  while (isspace(*p))
663  p++;
664 
665  if (eol(*p) && num > 0)
666  return current;
667 
668  if (eol(*p))
669  return nullptr;
670 
671  char *q = nullptr;
672  for (q=p; !eol(*q); q++);
673  *q='\0';
674  if (strlen(p)) {
675  current->text[num]=strdup(p);
676  printf(">%s<\n",p);
677  current->lines = ++num;
678  } else {
679  if (num)
680  return current;
681  return nullptr;
682  }
683  }
684 
685  return nullptr;
686 }
687 
689  char line[LINE_LEN + 1];
690 
691  memset (current, 0, sizeof(subtitle_t));
692 
693  while (true) {
694  /* try to locate next subtitle_t */
695  if (!read_line_from_input(demuxstr, line, LINE_LEN))
696  return nullptr;
697  if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
698  break;
699  }
700 
701  if (!read_line_from_input(demuxstr, line, LINE_LEN))
702  return nullptr;
703 
704  sub_readtext((char *) &line,&current->text[0]);
705  current->lines = 1;
706  current->end = -1;
707 
708  if (!read_line_from_input(demuxstr, line, LINE_LEN))
709  return current;;
710 
711  sub_readtext((char *) &line,&current->text[1]);
712  current->lines = 2;
713 
714  if ((current->text[0][0]==0) && (current->text[1][0]==0)) {
715  return nullptr;
716  }
717 
718  return current;
719 }
720 
722  char line1[LINE_LEN+1];
723  char line2[LINE_LEN+1];
724  char directive[LINE_LEN+1];
725  char *p = nullptr;
726  char *q = nullptr;
727  unsigned a1=0, a2=0, a3=0, a4=0, b1=0, b2=0, b3=0, b4=0; // NOLINT(readability-isolate-declaration)
728  unsigned comment = 0;
729  static unsigned s_jacoTimeRes = 30;
730  static int s_jacoShift = 0;
731 
732  memset(current, 0, sizeof(subtitle_t));
733  memset(line1, 0, LINE_LEN+1);
734  memset(line2, 0, LINE_LEN+1);
735  memset(directive, 0, LINE_LEN+1);
736  while (!current->text[0]) {
737  if (!read_line_from_input(demuxstr, line1, LINE_LEN)) {
738  return nullptr;
739  }
740  if (sscanf
741  (line1, "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
742  &b1, &b2, &b3, &b4, line2) < 9) {
743  if (sscanf(line1, "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2) < 3) {
744  if (line1[0] == '#') {
745  int hours = 0;
746  int minutes = 0;
747  int seconds = 0;
748  int delta = 0;
749  unsigned units = s_jacoShift;
750  int inverter = 1;
751  switch (toupper(line1[1])) {
752  case 'S':
753  if (isalpha(line1[2])) {
754  delta = 6;
755  } else {
756  delta = 2;
757  }
758  if (sscanf(&line1[delta], "%d", &hours)) {
759  if (hours < 0) {
760  hours *= -1;
761  inverter = -1;
762  }
763  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
764  if (sscanf
765  (&line1[delta], "%*d:%*d:%d",
766  &seconds)) {
767  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
768  &units);
769  } else {
770  hours = 0;
771  sscanf(&line1[delta], "%d:%d.%u",
772  &minutes, &seconds, &units);
773  minutes *= inverter;
774  }
775  } else {
776  hours = minutes = 0;
777  sscanf(&line1[delta], "%d.%u", &seconds,
778  &units);
779  seconds *= inverter;
780  }
781  s_jacoShift =
782  ((hours * 3600 + minutes * 60 +
783  seconds) * s_jacoTimeRes +
784  units) * inverter;
785  }
786  break;
787  case 'T':
788  if (isalpha(line1[2])) {
789  delta = 8;
790  } else {
791  delta = 2;
792  }
793  sscanf(&line1[delta], "%u", &s_jacoTimeRes);
794  break;
795  }
796  }
797  continue;
798  }
799  current->start =
800  (unsigned long) ((a4 + s_jacoShift) * 100.0 /
801  s_jacoTimeRes);
802  current->end =
803  (unsigned long) ((b4 + s_jacoShift) * 100.0 /
804  s_jacoTimeRes);
805  } else {
806  current->start =
807  (unsigned
808  long) (((a1 * 3600 + a2 * 60 + a3) * s_jacoTimeRes + a4 +
809  s_jacoShift) * 100.0 / s_jacoTimeRes);
810  current->end =
811  (unsigned
812  long) (((b1 * 3600 + b2 * 60 + b3) * s_jacoTimeRes + b4 +
813  s_jacoShift) * 100.0 / s_jacoTimeRes);
814  }
815  current->lines = 0;
816  p = line2;
817  while ((*p == ' ') || (*p == '\t')) {
818  ++p;
819  }
820  if (isalpha(*p)||*p == '[') {
821  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive, line1) < 2)
822  return (subtitle_t *)ERR;
823  int jLength = strlen(directive);
824  for (int cont = 0; cont < jLength; ++cont) {
825  if (isalpha(*(directive + cont)))
826  *(directive + cont) = toupper(*(directive + cont));
827  }
828  if ((strstr(directive, "RDB") != nullptr)
829  || (strstr(directive, "RDC") != nullptr)
830  || (strstr(directive, "RLB") != nullptr)
831  || (strstr(directive, "RLG") != nullptr)) {
832  continue;
833  }
834  /* no alignment */
835 #if 0
836  if (strstr(directive, "JL") != nullptr) {
837  current->alignment = SUB_ALIGNMENT_HLEFT;
838  } else if (strstr(directive, "JR") != nullptr) {
839  current->alignment = SUB_ALIGNMENT_HRIGHT;
840  } else {
841  current->alignment = SUB_ALIGNMENT_HCENTER;
842  }
843 #endif
844  strcpy(line2, line1);
845  p = line2;
846  }
847  for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
848  switch (*p) {
849  case '{':
850  comment++;
851  break;
852  case '}':
853  if (comment) {
854  --comment;
855  /* the next line to get rid of a blank after the comment */
856  if ((*(p + 1)) == ' ')
857  p++;
858  }
859  break;
860  case '~':
861  if (!comment) {
862  *q = ' ';
863  ++q;
864  }
865  break;
866  case ' ':
867  case '\t':
868  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
869  break;
870  if (!comment) {
871  *q = ' ';
872  ++q;
873  }
874  break;
875  case '\\':
876  if (*(p + 1) == 'n') {
877  *q = '\0';
878  q = line1;
879  current->text[current->lines++] = strdup(line1);
880  ++p;
881  break;
882  }
883  if ((toupper(*(p + 1)) == 'C')
884  || (toupper(*(p + 1)) == 'F')) {
885  ++p,++p;
886  break;
887  }
888  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
889  /* actually this means "insert current date here" */
890  (*(p + 1) == 'D') ||
891  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
892  (*(p + 1) == 'N') ||
893  /* actually this means "insert current time here" */
894  (*(p + 1) == 'T') ||
895  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
896  ++p;
897  break;
898  }
899  if ((*(p + 1) == '\\') ||
900  (*(p + 1) == '~') || (*(p + 1) == '{')) {
901  ++p;
902  } else if (eol(*(p + 1))) {
903  if (!read_line_from_input(demuxstr, directive, LINE_LEN))
904  return nullptr;
905  trail_space(directive);
906  strncat(line2, directive,
907  ((LINE_LEN > 511) ? LINE_LEN-1 : 511)
908  - strlen(line2));
909  break;
910  }
911  // Checked xine-lib-1.2.8. No fix there. Seems like it
912  // should be a break.
913  break;
914  default:
915  if (!comment) {
916  *q = *p;
917  ++q;
918  }
919  }
920  }
921  *q = '\0';
922  if (current->lines < SUB_MAX_TEXT)
923  current->text[current->lines] = strdup(line1);
924  else
925  printf ("Too many lines in a subtitle\n");
926  }
927  current->lines++;
928  return current;
929 }
930 
932  char line[LINE_LEN+1];
933  int a1=0,a2=0,a3=0,a4=0; // NOLINT(readability-isolate-declaration)
934  char *p=nullptr;
935  int i = 0;
936 
937  while (!current->text[0]) {
938  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
939  if (line[0]!='{')
940  continue;
941  if (sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4) < 4)
942  continue;
943  current->start = a1*360000+a2*6000+a3*100+a4/10;
944  for (i=0; i<SUB_MAX_TEXT;) {
945  if (!read_line_from_input(demuxstr, line, LINE_LEN)) break;
946  if (line[0]=='}') break;
947  int len=0;
948  for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
949  if (len) {
950  current->text[i]=(char *)malloc (len+1);
951  if (!current->text[i]) return (subtitle_t *)ERR;
952  strncpy (current->text[i], line, len); current->text[i][len]='\0';
953  ++i;
954  } else {
955  break;
956  }
957  }
958  current->lines=i;
959  }
960  return current;
961 }
962 
964  char line[LINE_LEN + 1];
965  int h = 0;
966  int m = 0;
967  int s = 0;
968 
969  memset (current, 0, sizeof(subtitle_t));
970 
971  do {
972  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
973  } while (sscanf (line, "[%d:%d:%d]", &h, &m, &s) != 3);
974 
975  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
976 
977  current->start = 360000 * h + 6000 * m + 100 * s;
978  current->end = -1;
979 
980  char *next=line;
981  int i=0;
982  while ((next = sub_readtext (next, &(current->text[i])))) {
983  if (next==ERR) return (subtitle_t *)ERR;
984  i++;
985  if (i>=SUB_MAX_TEXT) {
986  printf("Too many lines in a subtitle\n");
987  current->lines=i;
988  return current;
989  }
990  }
991  current->lines= ++i;
992 
993  return current;
994 }
995 
996 /* Code from subreader.c of MPlayer
997 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
998 */
999 
1001  char line[LINE_LEN+1];
1002  char line2[LINE_LEN+1];
1003 
1004  memset (current, 0, sizeof(subtitle_t));
1005  do {
1006  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
1007  } while ((sscanf (line,
1008  "[%ld][%ld]%" LINE_LEN_QUOT "[^\r\n]",
1009  &(current->start), &(current->end), line2) < 3));
1010  current->start *= 10;
1011  current->end *= 10;
1012 
1013  char *p=line2;
1014  char *next=p;
1015  int i=0;
1016  while ((next = sub_readtext (next, &(current->text[i])))) {
1017  if (next == ERR) {return (subtitle_t *)ERR;}
1018  i++;
1019  if (i >= SUB_MAX_TEXT) {
1020  printf("Too many lines in a subtitle\n");
1021  current->lines = i;
1022  return current;
1023  }
1024  }
1025  current->lines= ++i;
1026 
1027  return current;
1028 }
1029 
1030 
1031 static int sub_autodetect (demux_sputext_t *demuxstr) {
1032 
1033  char line[LINE_LEN + 1];
1034  int i = 0;
1035  int j = 0;
1036  char p = 0;
1037 
1038  while (j < 100) {
1039  j++;
1040  if (!read_line_from_input(demuxstr, line, LINE_LEN))
1041  return FORMAT_UNKNOWN;
1042 
1043  if ((sscanf (line, "{%d}{}", &i)==1) ||
1044  (sscanf (line, "{%d}{%d}", &i, &i)==2)) {
1045  demuxstr->uses_time=0;
1046  return FORMAT_MICRODVD;
1047  }
1048 
1049  if (sscanf (line, "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
1050  demuxstr->uses_time=1;
1051  return FORMAT_SUBRIP;
1052  }
1053 
1054  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1055  demuxstr->uses_time=1;
1056  return FORMAT_SUBVIEWER;
1057  }
1058 
1059  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1060  demuxstr->uses_time=1;
1061  return FORMAT_SUBVIEWER;
1062  }
1063 
1064  if (strstr (line, "<SAMI>")) {
1065  demuxstr->uses_time=1;
1066  return FORMAT_SAMI;
1067  }
1068  if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3) {
1069  demuxstr->uses_time=1;
1070  return FORMAT_VPLAYER;
1071  }
1072  /*
1073  * A RealText format is a markup language, starts with <window> tag,
1074  * options (behaviour modifiers) are possible.
1075  */
1076  if ( strcasecmp(line, "<window") == 0 ) {
1077  demuxstr->uses_time=1;
1078  return FORMAT_RT;
1079  }
1080  if ((memcmp(line, "Dialogue: Marked", 16) == 0) || (memcmp(line, "Dialogue: ", 10) == 0)) {
1081  demuxstr->uses_time=1;
1082  return FORMAT_SSA;
1083  }
1084  if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
1085  demuxstr->uses_time=0;
1086  return FORMAT_PJS;
1087  }
1088  if (sscanf (line, "FORMAT=%d", &i) == 1) {
1089  demuxstr->uses_time=0;
1090  return FORMAT_MPSUB;
1091  }
1092  if (sscanf (line, "FORMAT=TIM%c", &p)==1 && p=='E') {
1093  demuxstr->uses_time=1;
1094  return FORMAT_MPSUB;
1095  }
1096  if (strstr (line, "-->>")) {
1097  demuxstr->uses_time=0;
1098  return FORMAT_AQTITLE;
1099  }
1100  if (sscanf(line, "@%d @%d", &i, &i) == 2 ||
1101  sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
1102  demuxstr->uses_time = 1;
1103  return FORMAT_JACOBSUB;
1104  }
1105  if (sscanf(line, "{T %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
1106  demuxstr->uses_time = 1;
1107  return FORMAT_SUBVIEWER2;
1108  }
1109  if (sscanf(line, "[%d:%d:%d]", &i, &i, &i) == 3) {
1110  demuxstr->uses_time = 1;
1111  return FORMAT_SUBRIP09;
1112  }
1113 
1114  if (sscanf (line, "[%d][%d]", &i, &i) == 2) {
1115  demuxstr->uses_time = 1;
1116  return FORMAT_MPL2;
1117  }
1118  }
1119  return FORMAT_UNKNOWN; /* too many bad lines */
1120 }
1121 
1123 
1124  // These functions all return either 1) nullptr, 2) (subtitle_t*)ERR,
1125  // or 3) a pointer to the dest parameter.
1126  subtitle_t * (*func[])(demux_sputext_t *demuxstr,subtitle_t *dest)=
1127  {
1142  };
1143 
1144  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1145  demuxstr->rbuffer_cur = 0;
1146  demuxstr->buflen = 0;
1147  demuxstr->emptyReads = 0;
1148 
1149  demuxstr->format=sub_autodetect (demuxstr);
1150  if (demuxstr->format==FORMAT_UNKNOWN) {
1151  return nullptr;
1152  }
1153 
1154  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1155 
1156  /* Rewind */
1157  demuxstr->rbuffer_cur = 0;
1158  demuxstr->buflen = 0;
1159  demuxstr->emptyReads = 0;
1160 
1161  demuxstr->num=0;
1162  int n_max=32;
1163  auto *first = (subtitle_t *) malloc(n_max*sizeof(subtitle_t));
1164  if(!first) return nullptr;
1165  memset(first, 0, n_max*sizeof(subtitle_t));
1166  int timeout = MAX_TIMEOUT;
1167 
1168  if (demuxstr->uses_time) timeout *= 100;
1169  else timeout *= 10;
1170 
1171  while(true) {
1172  if(demuxstr->num>=n_max){
1173  int old_size = n_max*sizeof(subtitle_t);
1174  n_max+=16;
1175  auto *new_first=(subtitle_t *)realloc(first,n_max*sizeof(subtitle_t));
1176  if (new_first == nullptr) {
1177  // clang-tidy-11 says this is fine. ct-9 produces a weird
1178  // warning here. NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
1179  free(first);
1180  return nullptr;
1181  }
1182  // Clear only the new space at the end of the array.
1183  memset((char*)new_first + old_size, 0, 16*sizeof(subtitle_t));
1184  first = new_first;
1185  }
1186 
1187  subtitle_t *sub = func[demuxstr->format] (demuxstr, &first[demuxstr->num]);
1188  if (!sub) {
1189  break; /* EOF */
1190  }
1191  demuxstr->emptyReads = 0;
1192 
1193  if (sub==ERR)
1194  ++demuxstr->errs;
1195  else {
1196  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1) {
1197  /* end time not defined in the subtitle */
1198  if (timeout > 0) {
1199  /* timeout */
1200  if (timeout > sub->start - first[demuxstr->num-1].start) {
1201  first[demuxstr->num-1].end = sub->start;
1202  } else
1203  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1204  } else {
1205  /* no timeout */
1206  first[demuxstr->num-1].end = sub->start;
1207  }
1208  }
1209  ++demuxstr->num; /* Error vs. Valid */
1210  }
1211  }
1212  /* timeout of last subtitle */
1213  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1)
1214  {
1215  if (timeout > 0) {
1216  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1217  }
1218  }
1219 
1220 #ifdef DEBUG_XINE_DEMUX_SPUTEXT
1221  {
1222  char buffer[1024];
1223 
1224  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1225 
1226  if(demuxstr->errs)
1227  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1228  else
1229  strcat(buffer, "\n");
1230 
1231  printf("%s", buffer);
1232  }
1233 #endif
1234 
1235  // No memory leak of 'sub' here. 'Sub' always points to an element in 'first'.
1236  // NOLINT(clang-analyzer-unix.Malloc)
1237  return first;
1238 }
#define MAX_TIMEOUT
static bool eol(char p)
#define LINE_LEN
#define SUB_MAX_TEXT
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_AQTITLE
static guint32 * p2
Definition: goom_core.cpp:30
long end
Ending time in msec or starting frame.
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_MPSUB
subtitle_t * sub_read_file(demux_sputext_t *demuxstr)
#define FORMAT_SSA
#define SUB_BUFSIZE
#define FORMAT_SUBVIEWER2
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
static guint32 * tmp
Definition: goom_core.cpp:30
long long copy(QFile &dst, QFile &src, uint block_size)
Copies src file to dst file.
char buf[SUB_BUFSIZE]
#define off_t
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_UNKNOWN
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP09
static const uint16_t * d
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBVIEWER
#define FORMAT_SAMI
static char * read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len)
#define FORMAT_VPLAYER
static int sub_autodetect(demux_sputext_t *demuxstr)
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_PJS
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
PictureAttribute next(PictureAttributeSupported Supported, PictureAttribute Attribute)
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
static char * sub_readtext(char *source, char **dest)
Extract the next token from a string.
#define FORMAT_JACOBSUB
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
static void trail_space(char *s)
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP
#define ERR
long start
Starting time in msec or starting frame.
#define FORMAT_MPL2
#define FORMAT_RT
#define FORMAT_MICRODVD
char next_line[SUB_BUFSIZE]
#define LINE_LEN_QUOT