MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <cctype>
36 #include <cstdio>
37 #include <cstdlib>
38 #include <cstring>
39 #include <fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <unistd.h>
43 #include "xine_demux_sputext.h"
44 
45 #define LOG_MODULE "demux_sputext"
46 #define LOG_VERBOSE
47 /*
48 #define LOG
49 */
50 
51 #define ERR ((void *)-1)
52 #define LINE_LEN 1000
53 #define LINE_LEN_QUOT "1000"
54 
55 /*
56  * Demuxer code start
57  */
58 
59 #define FORMAT_UNKNOWN (-1)
60 #define FORMAT_MICRODVD 0
61 #define FORMAT_SUBRIP 1
62 #define FORMAT_SUBVIEWER 2
63 #define FORMAT_SAMI 3
64 #define FORMAT_VPLAYER 4
65 #define FORMAT_RT 5
66 #define FORMAT_SSA 6 /* Sub Station Alpha */
67 #define FORMAT_PJS 7
68 #define FORMAT_MPSUB 8
69 #define FORMAT_AQTITLE 9
70 #define FORMAT_JACOBSUB 10
71 #define FORMAT_SUBVIEWER2 11
72 #define FORMAT_SUBRIP09 12
73 #define FORMAT_MPL2 13 /*Mplayer sub 2 ?*/
74 
75 static bool eol(char p) {
76  return (p=='\r' || p=='\n' || p=='\0');
77 }
78 
79 static inline void trail_space(char *s) {
80  int i;
81  while (isspace(*s)) {
82  char *copy = s;
83  do {
84  copy[0] = copy[1];
85  copy++;
86  } while(*copy);
87  }
88  i = strlen(s) - 1;
89  while (i > 0 && isspace(s[i]))
90  s[i--] = '\0';
91 }
92 
93 /*
94  * Reimplementation of fgets() using the input->read() method.
95  */
96 static char *read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len) {
97  off_t nread = 0;
98  char *s;
99 
100  // Since our RemoteFile code sleeps 200ms whenever we get back less data
101  // than requested, but this code just keeps trying to read until it gets
102  // an error back, we check for empty reads so that we can stop reading
103  // when there is no more data to read
104  if (demuxstr->emptyReads == 0 && (len - demuxstr->buflen) > 512) {
105  nread = len - demuxstr->buflen;
106  if (nread > demuxstr->rbuffer_len - demuxstr->rbuffer_cur)
107  nread = demuxstr->rbuffer_len - demuxstr->rbuffer_cur;
108  if (nread < 0) {
109  printf("read failed.\n");
110  return nullptr;
111  }
112  memcpy(&demuxstr->buf[demuxstr->buflen],
113  &demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
114  nread);
115  demuxstr->rbuffer_cur += nread;
116  }
117 
118  if (!nread)
119  demuxstr->emptyReads++;
120 
121  demuxstr->buflen += nread;
122  demuxstr->buf[demuxstr->buflen] = '\0';
123 
124  s = strchr(demuxstr->buf, '\n');
125 
126  if (line && (s || demuxstr->buflen)) {
127 
128  int linelen = s ? (s - demuxstr->buf) + 1 : demuxstr->buflen;
129 
130  memcpy(line, demuxstr->buf, linelen);
131  line[linelen] = '\0';
132 
133  memmove(demuxstr->buf, &demuxstr->buf[linelen], SUB_BUFSIZE - linelen);
134  demuxstr->buflen -= linelen;
135 
136  return line;
137  }
138 
139  return nullptr;
140 }
141 
142 
144 
145  static char s_line[LINE_LEN + 1];
146  static char *s_s = nullptr;
147  char text[LINE_LEN + 1];
148 
149  char *p = nullptr;
150  current->lines = current->start = 0;
151  current->end = -1;
152  int state = 0;
153 
154  /* read the first line */
155  if (!s_s)
156  if (!(s_s = read_line_from_input(demuxstr, s_line, LINE_LEN))) return nullptr;
157 
158  do {
159  switch (state) {
160 
161  case 0: /* find "START=" */
162  s_s = strstr (s_s, "Start=");
163  if (s_s) {
164  current->start = strtol (s_s + 6, &s_s, 0) / 10;
165  state = 1; continue;
166  }
167  break;
168 
169  case 1: /* find "<P" */
170  if ((s_s = strstr (s_s, "<P"))) { s_s += 2; state = 2; continue; }
171  break;
172 
173  case 2: /* find ">" */
174  if ((s_s = strchr (s_s, '>'))) { s_s++; state = 3; p = text; continue; }
175  break;
176 
177  case 3: /* get all text until '<' appears */
178  if (*s_s == '\0') { break; }
179  else if (*s_s == '<') { state = 4; }
180  else if (strncasecmp (s_s, "&nbsp;", 6) == 0) { *p++ = ' '; s_s += 6; }
181  else if (*s_s == '\r') { s_s++; }
182  else if (strncasecmp (s_s, "<br>", 4) == 0 || *s_s == '\n') {
183  *p = '\0'; p = text; trail_space (text);
184  if (text[0] != '\0')
185  current->text[current->lines++] = strdup (text);
186  if (*s_s == '\n') s_s++; else s_s += 4;
187  }
188  else *p++ = *s_s++;
189  continue;
190 
191  case 4: /* get current->end or skip <TAG> */
192  char *q = strstr (s_s, "Start=");
193  if (q) {
194  current->end = strtol (q + 6, &q, 0) / 10 - 1;
195  *p = '\0'; trail_space (text);
196  if (text[0] != '\0')
197  current->text[current->lines++] = strdup (text);
198  if (current->lines > 0) { state = 99; break; }
199  state = 0; continue;
200  }
201  s_s = strchr (s_s, '>');
202  if (s_s) { s_s++; state = 3; continue; }
203  break;
204  }
205 
206  /* read next line */
207  if (state != 99 && !(s_s = read_line_from_input (demuxstr, s_line, LINE_LEN)))
208  return nullptr;
209 
210  } while (state != 99);
211 
212  return current;
213 }
214 
215 
216 
229 static char *sub_readtext(char *source, char **dest) {
230  int len=0;
231  char *p=source;
232 
233  while ( !eol(*p) && *p!= '|' ) {
234  p++,len++;
235  }
236 
237  if (!dest)
238  return (char*)ERR;
239 
240  *dest= (char *)malloc (len+1);
241  if (!(*dest))
242  return (char*)ERR;
243 
244  strncpy(*dest, source, len);
245  (*dest)[len]=0;
246 
247  while (*p=='\r' || *p=='\n' || *p=='|')
248  p++;
249 
250  if (*p) return p; /* not-last text field */
251  return (char*)nullptr; /* last text field */
252 }
253 
255 
256  char line[LINE_LEN + 1];
257  char line2[LINE_LEN + 1];
258 
259  memset (current, 0, sizeof(subtitle_t));
260 
261  current->end=-1;
262  do {
263  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
264  } while ((sscanf (line, "{%ld}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2) !=2) &&
265  (sscanf (line, "{%ld}{%ld}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2) !=3)
266  );
267 
268  char *p=line2;
269  char *next=p;
270  int i=0;
271  while ((next =sub_readtext (next, &(current->text[i])))) {
272  if (next==ERR) return (subtitle_t *)ERR;
273  i++;
274  if (i>=SUB_MAX_TEXT) {
275  printf ("Too many lines in a subtitle\n");
276  current->lines=i;
277  return current;
278  }
279  }
280  current->lines= ++i;
281 
282  return current;
283 }
284 
286 
287  char line[LINE_LEN + 1];
288  int a1,a2,a3,a4,b1,b2,b3,b4; // NOLINT(readability-isolate-declaration)
289 
290  memset (current, 0, sizeof(subtitle_t));
291 
292  while (true) {
293  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
294  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
295  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
296  continue;
297  }
298  current->start = a1*360000+a2*6000+a3*100+a4;
299  current->end = b1*360000+b2*6000+b3*100+b4;
300 
301  if (!read_line_from_input(demuxstr, line, LINE_LEN))
302  return nullptr;
303 
304  char *p=line;
305  for (current->lines=1; current->lines <= SUB_MAX_TEXT; current->lines++) {
306  char *q=nullptr;
307  int len;
308  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
309  (strncasecmp(p,"[br]",4) != 0); p++,len++);
310  current->text[current->lines-1]=(char *)malloc (len+1);
311  if (!current->text[current->lines-1]) return (subtitle_t *)ERR;
312  strncpy (current->text[current->lines-1], q, len);
313  current->text[current->lines-1][len]='\0';
314  if (!*p || *p=='\r' || *p=='\n') break;
315  if (*p=='[') while (*p++!=']');
316  if (*p=='|') p++;
317  }
318  if (current->lines > SUB_MAX_TEXT) current->lines = SUB_MAX_TEXT;
319  break;
320  }
321  return current;
322 }
323 
325  char line[LINE_LEN + 1];
326  int a1,a2,a3,a4,b1,b2,b3,b4; // NOLINT(readability-isolate-declaration)
327  int i;
328 
329  memset(current,0,sizeof(subtitle_t));
330  do {
331  if(!read_line_from_input(demuxstr,line,LINE_LEN))
332  return nullptr;
333  i = sscanf(line,"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
334  } while(i < 8);
335  current->start = a1*360000+a2*6000+a3*100+a4/10;
336  current->end = b1*360000+b2*6000+b3*100+b4/10;
337  i=0;
338  int end_sub=0;
339  do {
340  char *p; /* pointer to the curently read char */
341  char temp_line[SUB_BUFSIZE]; /* subtitle line that will be transfered to current->text[i] */
342  int temp_index; /* ... and its index wich 'points' to the first EMPTY place -> last read char is at temp_index-1 if temp_index>0 */
343  temp_line[SUB_BUFSIZE-1]='\0'; /* just in case... */
344  if(!read_line_from_input(demuxstr,line,LINE_LEN)) {
345  if(i)
346  break; /* if something was read, transmit it */
347  return nullptr; /* if not, repport EOF */
348  }
349  for(temp_index=0,p=line;*p!='\0' && !end_sub && temp_index<SUB_BUFSIZE && i<SUB_MAX_TEXT;p++) {
350  switch(*p) {
351  case '\\':
352  if(*(p+1)=='N' || *(p+1)=='n') {
353  temp_line[temp_index++]='\0'; /* end of curent line */
354  p++;
355  } else
356  temp_line[temp_index++]=*p;
357  break;
358  case '{':
359 #if 0 /* italic not implemented in renderer, ignore them for now */
360  if(!strncmp(p,"{\\i1}",5) && temp_index+3<SUB_BUFSIZE) {
361  temp_line[temp_index++]='<';
362  temp_line[temp_index++]='i';
363  temp_line[temp_index++]='>';
364 #else
365  if(strncmp(p,"{\\i1}",5) == 0) { // NOLINT(bugprone-branch-clone)
366 #endif
367  p+=4;
368  }
369 #if 0 /* italic not implemented in renderer, ignore them for now */
370  else if(!strncmp(p,"{\\i0}",5) && temp_index+4<SUB_BUFSIZE) {
371  temp_line[temp_index++]='<';
372  temp_line[temp_index++]='/';
373  temp_line[temp_index++]='i';
374  temp_line[temp_index++]='>';
375 #else
376  else if(strncmp(p,"{\\i0}",5) == 0) {
377 #endif
378  p+=4;
379  }
380  else
381  temp_line[temp_index++]=*p;
382  break;
383  case '\r': /* just ignore '\r's */
384  break;
385  case '\n':
386  temp_line[temp_index++]='\0';
387  break;
388  default:
389  temp_line[temp_index++]=*p;
390  break;
391  }
392  if(temp_index>0) {
393  if(temp_index==SUB_BUFSIZE)
394  printf("Too many characters in a subtitle line\n");
395  if(temp_line[temp_index-1]=='\0' || temp_index==SUB_BUFSIZE) {
396  if(temp_index>1) { /* more than 1 char (including '\0') -> that is a valid one */
397  current->text[i]=(char *)malloc(temp_index);
398  if(!current->text[i])
399  return (subtitle_t *)ERR;
400  strncpy(current->text[i],temp_line,temp_index); /* temp_index<=SUB_BUFSIZE is always true here */
401  i++;
402  temp_index=0;
403  } else
404  end_sub=1;
405  }
406  }
407  }
408  } while(i<SUB_MAX_TEXT && !end_sub);
409  if(i>=SUB_MAX_TEXT)
410  printf("Too many lines in a subtitle\n");
411  current->lines=i;
412  return current;
413 }
414 
416  char line[LINE_LEN + 1];
417  int a1,a2,a3,b1,b2,b3; // NOLINT(readability-isolate-declaration)
418 
419  memset (current, 0, sizeof(subtitle_t));
420 
421  while (!current->text[0]) {
422  if( demuxstr->next_line[0] == '\0' ) { /* if the buffer is empty.... */
423  if( !read_line_from_input(demuxstr, line, LINE_LEN) ) return nullptr;
424  } else {
425  /* ... get the current line from buffer. */
426  strncpy( line, demuxstr->next_line, LINE_LEN);
427  line[LINE_LEN] = '\0'; /* I'm scared. This makes me feel better. */
428  demuxstr->next_line[0] = '\0'; /* mark the buffer as empty. */
429  }
430  /* Initialize buffer with next line */
431  if( ! read_line_from_input( demuxstr, demuxstr->next_line, LINE_LEN) ) {
432  demuxstr->next_line[0] = '\0';
433  return nullptr;
434  }
435  if( (sscanf( line, "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
436  (sscanf( demuxstr->next_line, "%d:%d:%d:", &b1, &b2, &b3) < 3) )
437  continue;
438  current->start = a1*360000+a2*6000+a3*100;
439  current->end = b1*360000+b2*6000+b3*100;
440  if ((current->end - current->start) > LINE_LEN)
441  current->end = current->start + LINE_LEN; /* not too long though. */
442  /* teraz czas na wkopiowanie stringu */
443  char *p=line;
444  /* finds the body of the subtitle_t */
445  for (int i=0; i<3; i++){
446  char *p2=strchr( p, ':');
447  if( p2 == nullptr ) break;
448  p=p2+1;
449  }
450 
451  char *next=p;
452  int i=0;
453  while( (next = sub_readtext( next, &(current->text[i]))) ) {
454  if (next==ERR)
455  return (subtitle_t *)ERR;
456  i++;
457  if (i>=SUB_MAX_TEXT) {
458  printf("Too many lines in a subtitle\n");
459  current->lines=i;
460  return current;
461  }
462  }
463  current->lines=++i;
464  }
465  return current;
466 }
467 
469  /*
470  * TODO: This format uses quite rich (sub/super)set of xhtml
471  * I couldn't check it since DTD is not included.
472  * WARNING: full XML parses can be required for proper parsing
473  */
474  char line[LINE_LEN + 1];
475  int a1,a2,a3,a4,b1,b2,b3,b4; // NOLINT(readability-isolate-declaration)
476  int plen;
477 
478  memset (current, 0, sizeof(subtitle_t));
479 
480  while (!current->text[0]) {
481  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
482  /*
483  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
484  * to describe the same moment in time. Maybe there are even more formats in use.
485  */
486  if (sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
487 
488  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
489  if (
490  (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen) < 4) &&
491  (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen) < 5) &&
492  /* (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen) < 5) && */
493  (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen) < 6) &&
494  (sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen) < 8)
495  )
496  continue;
497  current->start = a1*360000+a2*6000+a3*100+a4/10;
498  current->end = b1*360000+b2*6000+b3*100+b4/10;
499  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
500  char *next = strstr(line,"<clear/>")+8;
501  int i=0;
502  while ((next =sub_readtext (next, &(current->text[i])))) {
503  if (next==ERR)
504  return (subtitle_t *)ERR;
505  i++;
506  if (i>=SUB_MAX_TEXT) {
507  printf("Too many lines in a subtitle\n");
508  current->lines=i;
509  return current;
510  }
511  }
512  current->lines=i+1;
513  }
514  return current;
515 }
516 
518  int comma;
519  static int s_maxComma = 32; /* let's use 32 for the case that the */
520  /* amount of commas increase with newer SSA versions */
521 
522  int hour1;
523  int min1;
524  int sec1;
525  int hunsec1;
526  int hour2;
527  int min2;
528  int sec2;
529  int hunsec2;
530  int nothing;
531  char line[LINE_LEN + 1];
532  char line3[LINE_LEN + 1];
533  char *tmp;
534 
535  do {
536  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
537  } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
538  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
539  &hour1, &min1, &sec1, &hunsec1,
540  &hour2, &min2, &sec2, &hunsec2,
541  line3) < 9
542  &&
543  sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
544  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
545  &hour1, &min1, &sec1, &hunsec1,
546  &hour2, &min2, &sec2, &hunsec2,
547  line3) < 9 );
548 
549  char *line2=strchr(line3, ',');
550  if (!line2)
551  return nullptr;
552 
553  for (comma = 4; comma < s_maxComma; comma ++)
554  {
555  tmp = line2;
556  if(!(tmp=strchr(++tmp, ','))) break;
557  if(*(++tmp) == ' ') break;
558  /* a space after a comma means we're already in a sentence */
559  line2 = tmp;
560  }
561 
562  if(comma < s_maxComma)s_maxComma = comma;
563  /* eliminate the trailing comma */
564  if(*line2 == ',') line2++;
565 
566  current->lines=0;
567  int num=0;
568  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
569  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
570 
571  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
572  current->text[num]=(char *)malloc(tmp-line2+1);
573  strncpy (current->text[num], line2, tmp-line2);
574  current->text[num][tmp-line2]='\0';
575  line2=tmp+2;
576  num++;
577  current->lines++;
578  if (current->lines >= SUB_MAX_TEXT) return current;
579  }
580 
581  current->text[num]=strdup(line2);
582  current->lines++;
583 
584  return current;
585 }
586 
587 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
588  * From MPlayer subreader.c :
589  *
590  * PJS subtitles reader.
591  * That's the "Phoenix Japanimation Society" format.
592  * I found some of them in http://www.scriptsclub.org/ (used for anime).
593  * The time is in tenths of second.
594  *
595  * by set, based on code by szabi (dunnowhat sub format ;-)
596  */
597 
599  char line[LINE_LEN + 1];
600  char text[LINE_LEN + 1];
601  char *s;
602  char *d;
603 
604  memset (current, 0, sizeof(subtitle_t));
605 
606  if (!read_line_from_input(demuxstr, line, LINE_LEN))
607  return nullptr;
608  for (s = line; *s && isspace(*s); s++);
609  if (*s == 0)
610  return nullptr;
611  if (sscanf (line, "%ld,%ld,", &(current->start),
612  &(current->end)) <2)
613  return (subtitle_t *)ERR;
614  /* the files I have are in tenths of second */
615  current->start *= 10;
616  current->end *= 10;
617 
618  /* walk to the beggining of the string */
619  for (; *s; s++) if (*s==',') break;
620  if (*s) {
621  for (s++; *s; s++) if (*s==',') break;
622  if (*s) s++;
623  }
624  if (*s!='"') {
625  return (subtitle_t *)ERR;
626  }
627  /* copy the string to the text buffer */
628  for (s++, d=text; *s && *s!='"'; s++, d++)
629  *d=*s;
630  *d=0;
631  current->text[0] = strdup(text);
632  current->lines = 1;
633 
634  return current;
635 }
636 
638  char line[LINE_LEN + 1];
639  float a;
640  float b;
641  int num=0;
642 
643  do {
644  if (!read_line_from_input(demuxstr, line, LINE_LEN))
645  return nullptr;
646  } while (sscanf (line, "%f %f", &a, &b) !=2);
647 
648  demuxstr->mpsub_position += (a*100.0F);
649  current->start = (int) demuxstr->mpsub_position;
650  demuxstr->mpsub_position += (b*100.0F);
651  current->end = (int) demuxstr->mpsub_position;
652 
653  while (num < SUB_MAX_TEXT) {
654  if (!read_line_from_input(demuxstr, line, LINE_LEN))
655  return nullptr;
656 
657  char *p=line;
658  while (isspace(*p))
659  p++;
660 
661  if (eol(*p) && num > 0)
662  return current;
663 
664  if (eol(*p))
665  return nullptr;
666 
667  char *q;
668  for (q=p; !eol(*q); q++);
669  *q='\0';
670  if (strlen(p)) {
671  current->text[num]=strdup(p);
672  printf(">%s<\n",p);
673  current->lines = ++num;
674  } else {
675  if (num)
676  return current;
677  return nullptr;
678  }
679  }
680 
681  return nullptr;
682 }
683 
685  char line[LINE_LEN + 1];
686 
687  memset (current, 0, sizeof(subtitle_t));
688 
689  while (true) {
690  /* try to locate next subtitle_t */
691  if (!read_line_from_input(demuxstr, line, LINE_LEN))
692  return nullptr;
693  if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
694  break;
695  }
696 
697  if (!read_line_from_input(demuxstr, line, LINE_LEN))
698  return nullptr;
699 
700  sub_readtext((char *) &line,&current->text[0]);
701  current->lines = 1;
702  current->end = -1;
703 
704  if (!read_line_from_input(demuxstr, line, LINE_LEN))
705  return current;;
706 
707  sub_readtext((char *) &line,&current->text[1]);
708  current->lines = 2;
709 
710  if ((current->text[0][0]==0) && (current->text[1][0]==0)) {
711  return nullptr;
712  }
713 
714  return current;
715 }
716 
718  char line1[LINE_LEN+1];
719  char line2[LINE_LEN+1];
720  char directive[LINE_LEN+1];
721  char *p;
722  char *q;
723  unsigned a1, a2, a3, a4, b1, b2, b3, b4; // NOLINT(readability-isolate-declaration)
724  unsigned comment = 0;
725  static unsigned s_jacoTimeRes = 30;
726  static int s_jacoShift = 0;
727 
728  memset(current, 0, sizeof(subtitle_t));
729  memset(line1, 0, LINE_LEN+1);
730  memset(line2, 0, LINE_LEN+1);
731  memset(directive, 0, LINE_LEN+1);
732  while (!current->text[0]) {
733  if (!read_line_from_input(demuxstr, line1, LINE_LEN)) {
734  return nullptr;
735  }
736  if (sscanf
737  (line1, "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
738  &b1, &b2, &b3, &b4, line2) < 9) {
739  if (sscanf(line1, "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2) < 3) {
740  if (line1[0] == '#') {
741  int hours = 0;
742  int minutes = 0;
743  int seconds;
744  int delta;
745  unsigned units = s_jacoShift;
746  int inverter = 1;
747  switch (toupper(line1[1])) {
748  case 'S':
749  if (isalpha(line1[2])) {
750  delta = 6;
751  } else {
752  delta = 2;
753  }
754  if (sscanf(&line1[delta], "%d", &hours)) {
755  if (hours < 0) {
756  hours *= -1;
757  inverter = -1;
758  }
759  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
760  if (sscanf
761  (&line1[delta], "%*d:%*d:%d",
762  &seconds)) {
763  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
764  &units);
765  } else {
766  hours = 0;
767  sscanf(&line1[delta], "%d:%d.%u",
768  &minutes, &seconds, &units);
769  minutes *= inverter;
770  }
771  } else {
772  hours = minutes = 0;
773  sscanf(&line1[delta], "%d.%u", &seconds,
774  &units);
775  seconds *= inverter;
776  }
777  s_jacoShift =
778  ((hours * 3600 + minutes * 60 +
779  seconds) * s_jacoTimeRes +
780  units) * inverter;
781  }
782  break;
783  case 'T':
784  if (isalpha(line1[2])) {
785  delta = 8;
786  } else {
787  delta = 2;
788  }
789  sscanf(&line1[delta], "%u", &s_jacoTimeRes);
790  break;
791  }
792  }
793  continue;
794  }
795  current->start =
796  (unsigned long) ((a4 + s_jacoShift) * 100.0 /
797  s_jacoTimeRes);
798  current->end =
799  (unsigned long) ((b4 + s_jacoShift) * 100.0 /
800  s_jacoTimeRes);
801  } else {
802  current->start =
803  (unsigned
804  long) (((a1 * 3600 + a2 * 60 + a3) * s_jacoTimeRes + a4 +
805  s_jacoShift) * 100.0 / s_jacoTimeRes);
806  current->end =
807  (unsigned
808  long) (((b1 * 3600 + b2 * 60 + b3) * s_jacoTimeRes + b4 +
809  s_jacoShift) * 100.0 / s_jacoTimeRes);
810  }
811  current->lines = 0;
812  p = line2;
813  while ((*p == ' ') || (*p == '\t')) {
814  ++p;
815  }
816  if (isalpha(*p)||*p == '[') {
817  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive, line1) < 2)
818  return (subtitle_t *)ERR;
819  int jLength = strlen(directive);
820  for (int cont = 0; cont < jLength; ++cont) {
821  if (isalpha(*(directive + cont)))
822  *(directive + cont) = toupper(*(directive + cont));
823  }
824  if ((strstr(directive, "RDB") != nullptr)
825  || (strstr(directive, "RDC") != nullptr)
826  || (strstr(directive, "RLB") != nullptr)
827  || (strstr(directive, "RLG") != nullptr)) {
828  continue;
829  }
830  /* no alignment */
831 #if 0
832  if (strstr(directive, "JL") != nullptr) {
833  current->alignment = SUB_ALIGNMENT_HLEFT;
834  } else if (strstr(directive, "JR") != nullptr) {
835  current->alignment = SUB_ALIGNMENT_HRIGHT;
836  } else {
837  current->alignment = SUB_ALIGNMENT_HCENTER;
838  }
839 #endif
840  strcpy(line2, line1);
841  p = line2;
842  }
843  for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
844  switch (*p) {
845  case '{':
846  comment++;
847  break;
848  case '}':
849  if (comment) {
850  --comment;
851  /* the next line to get rid of a blank after the comment */
852  if ((*(p + 1)) == ' ')
853  p++;
854  }
855  break;
856  case '~':
857  if (!comment) {
858  *q = ' ';
859  ++q;
860  }
861  break;
862  case ' ':
863  case '\t':
864  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
865  break;
866  if (!comment) {
867  *q = ' ';
868  ++q;
869  }
870  break;
871  case '\\':
872  if (*(p + 1) == 'n') {
873  *q = '\0';
874  q = line1;
875  current->text[current->lines++] = strdup(line1);
876  ++p;
877  break;
878  }
879  if ((toupper(*(p + 1)) == 'C')
880  || (toupper(*(p + 1)) == 'F')) {
881  ++p,++p;
882  break;
883  }
884  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
885  /* actually this means "insert current date here" */
886  (*(p + 1) == 'D') ||
887  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
888  (*(p + 1) == 'N') ||
889  /* actually this means "insert current time here" */
890  (*(p + 1) == 'T') ||
891  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
892  ++p;
893  break;
894  }
895  if ((*(p + 1) == '\\') ||
896  (*(p + 1) == '~') || (*(p + 1) == '{')) {
897  ++p;
898  } else if (eol(*(p + 1))) {
899  if (!read_line_from_input(demuxstr, directive, LINE_LEN))
900  return nullptr;
901  trail_space(directive);
902  strncat(line2, directive,
903  ((LINE_LEN > 511) ? LINE_LEN-1 : 511)
904  - strlen(line2));
905  break;
906  }
907  // Checked xine-lib-1.2.8. No fix there. Seems like it
908  // should be a break.
909  break;
910  default:
911  if (!comment) {
912  *q = *p;
913  ++q;
914  }
915  }
916  }
917  *q = '\0';
918  if (current->lines < SUB_MAX_TEXT)
919  current->text[current->lines] = strdup(line1);
920  else
921  printf ("Too many lines in a subtitle\n");
922  }
923  current->lines++;
924  return current;
925 }
926 
928  char line[LINE_LEN+1];
929  int a1,a2,a3,a4; // NOLINT(readability-isolate-declaration)
930  char *p=nullptr;
931  int i;
932 
933  while (!current->text[0]) {
934  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
935  if (line[0]!='{')
936  continue;
937  if (sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4) < 4)
938  continue;
939  current->start = a1*360000+a2*6000+a3*100+a4/10;
940  for (i=0; i<SUB_MAX_TEXT;) {
941  if (!read_line_from_input(demuxstr, line, LINE_LEN)) break;
942  if (line[0]=='}') break;
943  int len=0;
944  for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
945  if (len) {
946  current->text[i]=(char *)malloc (len+1);
947  if (!current->text[i]) return (subtitle_t *)ERR;
948  strncpy (current->text[i], line, len); current->text[i][len]='\0';
949  ++i;
950  } else {
951  break;
952  }
953  }
954  current->lines=i;
955  }
956  return current;
957 }
958 
960  char line[LINE_LEN + 1];
961  int h;
962  int m;
963  int s;
964 
965  memset (current, 0, sizeof(subtitle_t));
966 
967  do {
968  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
969  } while (sscanf (line, "[%d:%d:%d]", &h, &m, &s) != 3);
970 
971  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
972 
973  current->start = 360000 * h + 6000 * m + 100 * s;
974  current->end = -1;
975 
976  char *next=line;
977  int i=0;
978  while ((next = sub_readtext (next, &(current->text[i])))) {
979  if (next==ERR) return (subtitle_t *)ERR;
980  i++;
981  if (i>=SUB_MAX_TEXT) {
982  printf("Too many lines in a subtitle\n");
983  current->lines=i;
984  return current;
985  }
986  }
987  current->lines= ++i;
988 
989  return current;
990 }
991 
992 /* Code from subreader.c of MPlayer
993 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
994 */
995 
997  char line[LINE_LEN+1];
998  char line2[LINE_LEN+1];
999 
1000  memset (current, 0, sizeof(subtitle_t));
1001  do {
1002  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
1003  } while ((sscanf (line,
1004  "[%ld][%ld]%" LINE_LEN_QUOT "[^\r\n]",
1005  &(current->start), &(current->end), line2) < 3));
1006  current->start *= 10;
1007  current->end *= 10;
1008 
1009  char *p=line2;
1010  char *next=p;
1011  int i=0;
1012  while ((next = sub_readtext (next, &(current->text[i])))) {
1013  if (next == ERR) {return (subtitle_t *)ERR;}
1014  i++;
1015  if (i >= SUB_MAX_TEXT) {
1016  printf("Too many lines in a subtitle\n");
1017  current->lines = i;
1018  return current;
1019  }
1020  }
1021  current->lines= ++i;
1022 
1023  return current;
1024 }
1025 
1026 
1027 static int sub_autodetect (demux_sputext_t *demuxstr) {
1028 
1029  char line[LINE_LEN + 1];
1030  int i;
1031  int j=0;
1032  char p;
1033 
1034  while (j < 100) {
1035  j++;
1036  if (!read_line_from_input(demuxstr, line, LINE_LEN))
1037  return FORMAT_UNKNOWN;
1038 
1039  if ((sscanf (line, "{%d}{}", &i)==1) ||
1040  (sscanf (line, "{%d}{%d}", &i, &i)==2)) {
1041  demuxstr->uses_time=0;
1042  return FORMAT_MICRODVD;
1043  }
1044 
1045  if (sscanf (line, "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
1046  demuxstr->uses_time=1;
1047  return FORMAT_SUBRIP;
1048  }
1049 
1050  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1051  demuxstr->uses_time=1;
1052  return FORMAT_SUBVIEWER;
1053  }
1054 
1055  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1056  demuxstr->uses_time=1;
1057  return FORMAT_SUBVIEWER;
1058  }
1059 
1060  if (strstr (line, "<SAMI>")) {
1061  demuxstr->uses_time=1;
1062  return FORMAT_SAMI;
1063  }
1064  if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3) {
1065  demuxstr->uses_time=1;
1066  return FORMAT_VPLAYER;
1067  }
1068  /*
1069  * A RealText format is a markup language, starts with <window> tag,
1070  * options (behaviour modifiers) are possible.
1071  */
1072  if ( strcasecmp(line, "<window") == 0 ) {
1073  demuxstr->uses_time=1;
1074  return FORMAT_RT;
1075  }
1076  if ((memcmp(line, "Dialogue: Marked", 16) == 0) || (memcmp(line, "Dialogue: ", 10) == 0)) {
1077  demuxstr->uses_time=1;
1078  return FORMAT_SSA;
1079  }
1080  if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
1081  demuxstr->uses_time=0;
1082  return FORMAT_PJS;
1083  }
1084  if (sscanf (line, "FORMAT=%d", &i) == 1) {
1085  demuxstr->uses_time=0;
1086  return FORMAT_MPSUB;
1087  }
1088  if (sscanf (line, "FORMAT=TIM%c", &p)==1 && p=='E') {
1089  demuxstr->uses_time=1;
1090  return FORMAT_MPSUB;
1091  }
1092  if (strstr (line, "-->>")) {
1093  demuxstr->uses_time=0;
1094  return FORMAT_AQTITLE;
1095  }
1096  if (sscanf(line, "@%d @%d", &i, &i) == 2 ||
1097  sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
1098  demuxstr->uses_time = 1;
1099  return FORMAT_JACOBSUB;
1100  }
1101  if (sscanf(line, "{T %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
1102  demuxstr->uses_time = 1;
1103  return FORMAT_SUBVIEWER2;
1104  }
1105  if (sscanf(line, "[%d:%d:%d]", &i, &i, &i) == 3) {
1106  demuxstr->uses_time = 1;
1107  return FORMAT_SUBRIP09;
1108  }
1109 
1110  if (sscanf (line, "[%d][%d]", &i, &i) == 2) {
1111  demuxstr->uses_time = 1;
1112  return FORMAT_MPL2;
1113  }
1114  }
1115  return FORMAT_UNKNOWN; /* too many bad lines */
1116 }
1117 
1119 
1120  int n_max;
1121  int timeout;
1122  subtitle_t *first;
1123  // These functions all return either 1) nullptr, 2) (subtitle_t*)ERR,
1124  // or 3) a pointer to the dest parameter.
1125  subtitle_t * (*func[])(demux_sputext_t *demuxstr,subtitle_t *dest)=
1126  {
1141  };
1142 
1143  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1144  demuxstr->rbuffer_cur = 0;
1145  demuxstr->buflen = 0;
1146  demuxstr->emptyReads = 0;
1147 
1148  demuxstr->format=sub_autodetect (demuxstr);
1149  if (demuxstr->format==FORMAT_UNKNOWN) {
1150  return nullptr;
1151  }
1152 
1153  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1154 
1155  /* Rewind */
1156  demuxstr->rbuffer_cur = 0;
1157  demuxstr->buflen = 0;
1158  demuxstr->emptyReads = 0;
1159 
1160  demuxstr->num=0;n_max=32;
1161  first = (subtitle_t *) malloc(n_max*sizeof(subtitle_t));
1162  if(!first) return nullptr;
1163  timeout = MAX_TIMEOUT;
1164 
1165  if (demuxstr->uses_time) timeout *= 100;
1166  else timeout *= 10;
1167 
1168  while(true) {
1169  if(demuxstr->num>=n_max){
1170  n_max+=16;
1171  auto *new_first=(subtitle_t *)realloc(first,n_max*sizeof(subtitle_t));
1172  if (new_first == nullptr) {
1173  free(first);
1174  return nullptr;
1175  }
1176  first = new_first;
1177  }
1178 
1179  subtitle_t *sub = func[demuxstr->format] (demuxstr, &first[demuxstr->num]);
1180  if (!sub) {
1181  break; /* EOF */
1182  }
1183  demuxstr->emptyReads = 0;
1184 
1185  if (sub==ERR)
1186  ++demuxstr->errs;
1187  else {
1188  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1) {
1189  /* end time not defined in the subtitle */
1190  if (timeout > 0) {
1191  /* timeout */
1192  if (timeout > sub->start - first[demuxstr->num-1].start) {
1193  first[demuxstr->num-1].end = sub->start;
1194  } else
1195  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1196  } else {
1197  /* no timeout */
1198  first[demuxstr->num-1].end = sub->start;
1199  }
1200  }
1201  ++demuxstr->num; /* Error vs. Valid */
1202  }
1203  }
1204  /* timeout of last subtitle */
1205  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1)
1206  if (timeout > 0) {
1207  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1208  }
1209 
1210 #ifdef DEBUG_XINE_DEMUX_SPUTEXT
1211  {
1212  char buffer[1024];
1213 
1214  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1215 
1216  if(demuxstr->errs)
1217  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1218  else
1219  strcat(buffer, "\n");
1220 
1221  printf("%s", buffer);
1222  }
1223 #endif
1224 
1225  // No memory leak of 'sub' here. 'Sub' always points to an element in 'first'.
1226  // NOLINT(clang-analyzer-unix.Malloc)
1227  return first;
1228 }
#define MAX_TIMEOUT
static bool eol(char p)
#define LINE_LEN
#define SUB_MAX_TEXT
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_AQTITLE
long end
Ending time in msec or starting frame.
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_MPSUB
subtitle_t * sub_read_file(demux_sputext_t *demuxstr)
#define FORMAT_SSA
#define SUB_BUFSIZE
#define FORMAT_SUBVIEWER2
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
long long copy(QFile &dst, QFile &src, uint block_size)
Copies src file to dst file.
static guint32 * tmp
Definition: goom_core.c:35
char buf[SUB_BUFSIZE]
#define off_t
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_UNKNOWN
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP09
static const uint16_t * d
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBVIEWER
#define FORMAT_SAMI
static char * read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len)
#define FORMAT_VPLAYER
static int sub_autodetect(demux_sputext_t *demuxstr)
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_PJS
PictureAttribute next(PictureAttributeSupported supported, PictureAttribute attribute)
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
static char * sub_readtext(char *source, char **dest)
Extract the next token from a string.
#define FORMAT_JACOBSUB
static guint32 * p2
Definition: goom_core.c:35
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
static void trail_space(char *s)
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP
#define ERR
long start
Starting time in msec or starting frame.
#define FORMAT_MPL2
#define FORMAT_RT
#define FORMAT_MICRODVD
char next_line[SUB_BUFSIZE]
#define LINE_LEN_QUOT