MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <cctype>
36 #include <cstdio>
37 #include <cstdlib>
38 #include <cstring>
39 #include <fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <unistd.h>
43 #include "xine_demux_sputext.h"
44 
45 #define LOG_MODULE "demux_sputext"
46 #define LOG_VERBOSE
47 /*
48 #define LOG
49 */
50 
51 #define ERR ((void *)-1)
52 #define LINE_LEN 1000
53 #define LINE_LEN_QUOT "1000"
54 
55 /*
56  * Demuxer code start
57  */
58 
59 #define FORMAT_UNKNOWN (-1)
60 #define FORMAT_MICRODVD 0
61 #define FORMAT_SUBRIP 1
62 #define FORMAT_SUBVIEWER 2
63 #define FORMAT_SAMI 3
64 #define FORMAT_VPLAYER 4
65 #define FORMAT_RT 5
66 #define FORMAT_SSA 6 /* Sub Station Alpha */
67 #define FORMAT_PJS 7
68 #define FORMAT_MPSUB 8
69 #define FORMAT_AQTITLE 9
70 #define FORMAT_JACOBSUB 10
71 #define FORMAT_SUBVIEWER2 11
72 #define FORMAT_SUBRIP09 12
73 #define FORMAT_MPL2 13 /*Mplayer sub 2 ?*/
74 
75 static bool eol(char p) {
76  return (p=='\r' || p=='\n' || p=='\0');
77 }
78 
79 static inline void trail_space(char *s) {
80  int i;
81  while (isspace(*s)) {
82  char *copy = s;
83  do {
84  copy[0] = copy[1];
85  copy++;
86  } while(*copy);
87  }
88  i = strlen(s) - 1;
89  while (i > 0 && isspace(s[i]))
90  s[i--] = '\0';
91 }
92 
93 /*
94  * Reimplementation of fgets() using the input->read() method.
95  */
96 static char *read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len) {
97  off_t nread = 0;
98  char *s;
99 
100  // Since our RemoteFile code sleeps 200ms whenever we get back less data
101  // than requested, but this code just keeps trying to read until it gets
102  // an error back, we check for empty reads so that we can stop reading
103  // when there is no more data to read
104  if (demuxstr->emptyReads == 0 && (len - demuxstr->buflen) > 512) {
105  nread = len - demuxstr->buflen;
106  if (nread > demuxstr->rbuffer_len - demuxstr->rbuffer_cur)
107  nread = demuxstr->rbuffer_len - demuxstr->rbuffer_cur;
108  if (nread < 0) {
109  printf("read failed.\n");
110  return nullptr;
111  }
112  memcpy(&demuxstr->buf[demuxstr->buflen],
113  &demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
114  nread);
115  demuxstr->rbuffer_cur += nread;
116  }
117 
118  if (!nread)
119  demuxstr->emptyReads++;
120 
121  demuxstr->buflen += nread;
122  demuxstr->buf[demuxstr->buflen] = '\0';
123 
124  s = strchr(demuxstr->buf, '\n');
125 
126  if (line && (s || demuxstr->buflen)) {
127 
128  int linelen = s ? (s - demuxstr->buf) + 1 : demuxstr->buflen;
129 
130  memcpy(line, demuxstr->buf, linelen);
131  line[linelen] = '\0';
132 
133  memmove(demuxstr->buf, &demuxstr->buf[linelen], SUB_BUFSIZE - linelen);
134  demuxstr->buflen -= linelen;
135 
136  return line;
137  }
138 
139  return nullptr;
140 }
141 
142 
144 
145  static char line[LINE_LEN + 1];
146  static char *s = nullptr;
147  char text[LINE_LEN + 1], *p, *q;
148  int state;
149 
150  p = nullptr;
151  current->lines = current->start = 0;
152  current->end = -1;
153  state = 0;
154 
155  /* read the first line */
156  if (!s)
157  if (!(s = read_line_from_input(demuxstr, line, LINE_LEN))) return nullptr;
158 
159  do {
160  switch (state) {
161 
162  case 0: /* find "START=" */
163  s = strstr (s, "Start=");
164  if (s) {
165  current->start = strtol (s + 6, &s, 0) / 10;
166  state = 1; continue;
167  }
168  break;
169 
170  case 1: /* find "<P" */
171  if ((s = strstr (s, "<P"))) { s += 2; state = 2; continue; }
172  break;
173 
174  case 2: /* find ">" */
175  if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; }
176  break;
177 
178  case 3: /* get all text until '<' appears */
179  if (*s == '\0') { break; }
180  else if (*s == '<') { state = 4; }
181  else if (strncasecmp (s, "&nbsp;", 6) == 0) { *p++ = ' '; s += 6; }
182  else if (*s == '\r') { s++; }
183  else if (strncasecmp (s, "<br>", 4) == 0 || *s == '\n') {
184  *p = '\0'; p = text; trail_space (text);
185  if (text[0] != '\0')
186  current->text[current->lines++] = strdup (text);
187  if (*s == '\n') s++; else s += 4;
188  }
189  else *p++ = *s++;
190  continue;
191 
192  case 4: /* get current->end or skip <TAG> */
193  q = strstr (s, "Start=");
194  if (q) {
195  current->end = strtol (q + 6, &q, 0) / 10 - 1;
196  *p = '\0'; trail_space (text);
197  if (text[0] != '\0')
198  current->text[current->lines++] = strdup (text);
199  if (current->lines > 0) { state = 99; break; }
200  state = 0; continue;
201  }
202  s = strchr (s, '>');
203  if (s) { s++; state = 3; continue; }
204  break;
205  }
206 
207  /* read next line */
208  if (state != 99 && !(s = read_line_from_input (demuxstr, line, LINE_LEN)))
209  return nullptr;
210 
211  } while (state != 99);
212 
213  return current;
214 }
215 
216 
217 static char *sub_readtext(char *source, char **dest) {
218  int len=0;
219  char *p=source;
220 
221  while ( !eol(*p) && *p!= '|' ) {
222  p++,len++;
223  }
224 
225  if (!dest)
226  return (char*)ERR;
227 
228  *dest= (char *)malloc (len+1);
229  if (!(*dest))
230  return (char*)ERR;
231 
232  strncpy(*dest, source, len);
233  (*dest)[len]=0;
234 
235  while (*p=='\r' || *p=='\n' || *p=='|')
236  p++;
237 
238  if (*p) return p; /* not-last text field */
239  return (char*)nullptr; /* last text field */
240 }
241 
243 
244  char line[LINE_LEN + 1];
245  char line2[LINE_LEN + 1];
246  char *p, *next;
247  int i;
248 
249  memset (current, 0, sizeof(subtitle_t));
250 
251  current->end=-1;
252  do {
253  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
254  } while ((sscanf (line, "{%ld}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2) !=2) &&
255  (sscanf (line, "{%ld}{%ld}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2) !=3)
256  );
257 
258  p=line2;
259 
260  next=p, i=0;
261  while ((next =sub_readtext (next, &(current->text[i])))) {
262  if (current->text[i]==ERR) return (subtitle_t *)ERR;
263  i++;
264  if (i>=SUB_MAX_TEXT) {
265  printf ("Too many lines in a subtitle\n");
266  current->lines=i;
267  return current;
268  }
269  }
270  current->lines= ++i;
271 
272  return current;
273 }
274 
276 
277  char line[LINE_LEN + 1];
278  int a1,a2,a3,a4,b1,b2,b3,b4;
279  char *p=nullptr, *q=nullptr;
280  int len;
281 
282  memset (current, 0, sizeof(subtitle_t));
283 
284  while (true) {
285  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
286  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
287  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
288  continue;
289  }
290  current->start = a1*360000+a2*6000+a3*100+a4;
291  current->end = b1*360000+b2*6000+b3*100+b4;
292 
293  if (!read_line_from_input(demuxstr, line, LINE_LEN))
294  return nullptr;
295 
296  p=q=line;
297  for (current->lines=1; current->lines <= SUB_MAX_TEXT; current->lines++) {
298  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
299  (strncasecmp(p,"[br]",4) != 0); p++,len++);
300  current->text[current->lines-1]=(char *)malloc (len+1);
301  if (!current->text[current->lines-1]) return (subtitle_t *)ERR;
302  strncpy (current->text[current->lines-1], q, len);
303  current->text[current->lines-1][len]='\0';
304  if (!*p || *p=='\r' || *p=='\n') break;
305  if (*p=='[') while (*p++!=']');
306  if (*p=='|') p++;
307  }
308  if (current->lines > SUB_MAX_TEXT) current->lines = SUB_MAX_TEXT;
309  break;
310  }
311  return current;
312 }
313 
315  char line[LINE_LEN + 1];
316  int a1,a2,a3,a4,b1,b2,b3,b4;
317  int i,end_sub;
318 
319  memset(current,0,sizeof(subtitle_t));
320  do {
321  if(!read_line_from_input(demuxstr,line,LINE_LEN))
322  return nullptr;
323  i = sscanf(line,"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
324  } while(i < 8);
325  current->start = a1*360000+a2*6000+a3*100+a4/10;
326  current->end = b1*360000+b2*6000+b3*100+b4/10;
327  i=0;
328  end_sub=0;
329  do {
330  char *p; /* pointer to the curently read char */
331  char temp_line[SUB_BUFSIZE]; /* subtitle line that will be transfered to current->text[i] */
332  int temp_index; /* ... and its index wich 'points' to the first EMPTY place -> last read char is at temp_index-1 if temp_index>0 */
333  temp_line[SUB_BUFSIZE-1]='\0'; /* just in case... */
334  if(!read_line_from_input(demuxstr,line,LINE_LEN)) {
335  if(i)
336  break; /* if something was read, transmit it */
337  return nullptr; /* if not, repport EOF */
338  }
339  for(temp_index=0,p=line;*p!='\0' && !end_sub && temp_index<SUB_BUFSIZE && i<SUB_MAX_TEXT;p++) {
340  switch(*p) {
341  case '\\':
342  if(*(p+1)=='N' || *(p+1)=='n') {
343  temp_line[temp_index++]='\0'; /* end of curent line */
344  p++;
345  } else
346  temp_line[temp_index++]=*p;
347  break;
348  case '{':
349 #if 0 /* italic not implemented in renderer, ignore them for now */
350  if(!strncmp(p,"{\\i1}",5) && temp_index+3<SUB_BUFSIZE) {
351  temp_line[temp_index++]='<';
352  temp_line[temp_index++]='i';
353  temp_line[temp_index++]='>';
354 #else
355  if(strncmp(p,"{\\i1}",5) == 0) {
356 #endif
357  p+=4;
358  }
359 #if 0 /* italic not implemented in renderer, ignore them for now */
360  else if(!strncmp(p,"{\\i0}",5) && temp_index+4<SUB_BUFSIZE) {
361  temp_line[temp_index++]='<';
362  temp_line[temp_index++]='/';
363  temp_line[temp_index++]='i';
364  temp_line[temp_index++]='>';
365 #else
366  else if(strncmp(p,"{\\i0}",5) == 0) {
367 #endif
368  p+=4;
369  }
370  else
371  temp_line[temp_index++]=*p;
372  break;
373  case '\r': /* just ignore '\r's */
374  break;
375  case '\n':
376  temp_line[temp_index++]='\0';
377  break;
378  default:
379  temp_line[temp_index++]=*p;
380  break;
381  }
382  if(temp_index>0) {
383  if(temp_index==SUB_BUFSIZE)
384  printf("Too many characters in a subtitle line\n");
385  if(temp_line[temp_index-1]=='\0' || temp_index==SUB_BUFSIZE) {
386  if(temp_index>1) { /* more than 1 char (including '\0') -> that is a valid one */
387  current->text[i]=(char *)malloc(temp_index);
388  if(!current->text[i])
389  return (subtitle_t *)ERR;
390  strncpy(current->text[i],temp_line,temp_index); /* temp_index<=SUB_BUFSIZE is always true here */
391  i++;
392  temp_index=0;
393  } else
394  end_sub=1;
395  }
396  }
397  }
398  } while(i<SUB_MAX_TEXT && !end_sub);
399  if(i>=SUB_MAX_TEXT)
400  printf("Too many lines in a subtitle\n");
401  current->lines=i;
402  return current;
403 }
404 
406  char line[LINE_LEN + 1];
407  int a1,a2,a3,b1,b2,b3;
408  char *p=nullptr, *next, *p2;
409  int i;
410 
411  memset (current, 0, sizeof(subtitle_t));
412 
413  while (!current->text[0]) {
414  if( demuxstr->next_line[0] == '\0' ) { /* if the buffer is empty.... */
415  if( !read_line_from_input(demuxstr, line, LINE_LEN) ) return nullptr;
416  } else {
417  /* ... get the current line from buffer. */
418  strncpy( line, demuxstr->next_line, LINE_LEN);
419  line[LINE_LEN] = '\0'; /* I'm scared. This makes me feel better. */
420  demuxstr->next_line[0] = '\0'; /* mark the buffer as empty. */
421  }
422  /* Initialize buffer with next line */
423  if( ! read_line_from_input( demuxstr, demuxstr->next_line, LINE_LEN) ) {
424  demuxstr->next_line[0] = '\0';
425  return nullptr;
426  }
427  if( (sscanf( line, "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
428  (sscanf( demuxstr->next_line, "%d:%d:%d:", &b1, &b2, &b3) < 3) )
429  continue;
430  current->start = a1*360000+a2*6000+a3*100;
431  current->end = b1*360000+b2*6000+b3*100;
432  if ((current->end - current->start) > LINE_LEN)
433  current->end = current->start + LINE_LEN; /* not too long though. */
434  /* teraz czas na wkopiowanie stringu */
435  p=line;
436  /* finds the body of the subtitle_t */
437  for (i=0; i<3; i++){
438  p2=strchr( p, ':');
439  if( p2 == nullptr ) break;
440  p=p2+1;
441  }
442 
443  next=p;
444  i=0;
445  while( (next = sub_readtext( next, &(current->text[i]))) ) {
446  if (current->text[i]==ERR)
447  return (subtitle_t *)ERR;
448  i++;
449  if (i>=SUB_MAX_TEXT) {
450  printf("Too many lines in a subtitle\n");
451  current->lines=i;
452  return current;
453  }
454  }
455  current->lines=++i;
456  }
457  return current;
458 }
459 
461  /*
462  * TODO: This format uses quite rich (sub/super)set of xhtml
463  * I couldn't check it since DTD is not included.
464  * WARNING: full XML parses can be required for proper parsing
465  */
466  char line[LINE_LEN + 1];
467  int a1,a2,a3,a4,b1,b2,b3,b4;
468  char *p=nullptr,*next=nullptr;
469  int i,plen;
470 
471  memset (current, 0, sizeof(subtitle_t));
472 
473  while (!current->text[0]) {
474  int len;
475  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
476  /*
477  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
478  * to describe the same moment in time. Maybe there are even more formats in use.
479  */
480  if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
481 
482  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
483  if (
484  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) &&
485  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) &&
486  /* ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) && */
487  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) &&
488  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8)
489  )
490  continue;
491  current->start = a1*360000+a2*6000+a3*100+a4/10;
492  current->end = b1*360000+b2*6000+b3*100+b4/10;
493  p=line; p+=plen;i=0;
494  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
495  next = strstr(line,"<clear/>")+8;i=0;
496  while ((next =sub_readtext (next, &(current->text[i])))) {
497  if (current->text[i]==ERR)
498  return (subtitle_t *)ERR;
499  i++;
500  if (i>=SUB_MAX_TEXT) {
501  printf("Too many lines in a subtitle\n");
502  current->lines=i;
503  return current;
504  }
505  }
506  current->lines=i+1;
507  }
508  return current;
509 }
510 
512  int comma;
513  static int max_comma = 32; /* let's use 32 for the case that the */
514  /* amount of commas increase with newer SSA versions */
515 
516  int hour1, min1, sec1, hunsec1, hour2, min2, sec2, hunsec2, nothing;
517  int num;
518  char line[LINE_LEN + 1], line3[LINE_LEN + 1], *line2;
519  char *tmp;
520 
521  do {
522  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
523  } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
524  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
525  &hour1, &min1, &sec1, &hunsec1,
526  &hour2, &min2, &sec2, &hunsec2,
527  line3) < 9
528  &&
529  sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
530  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
531  &hour1, &min1, &sec1, &hunsec1,
532  &hour2, &min2, &sec2, &hunsec2,
533  line3) < 9 );
534 
535  line2=strchr(line3, ',');
536  if (!line2)
537  return nullptr;
538 
539  for (comma = 4; comma < max_comma; comma ++)
540  {
541  tmp = line2;
542  if(!(tmp=strchr(++tmp, ','))) break;
543  if(*(++tmp) == ' ') break;
544  /* a space after a comma means we're already in a sentence */
545  line2 = tmp;
546  }
547 
548  if(comma < max_comma)max_comma = comma;
549  /* eliminate the trailing comma */
550  if(*line2 == ',') line2++;
551 
552  current->lines=0;num=0;
553  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
554  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
555 
556  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
557  current->text[num]=(char *)malloc(tmp-line2+1);
558  strncpy (current->text[num], line2, tmp-line2);
559  current->text[num][tmp-line2]='\0';
560  line2=tmp+2;
561  num++;
562  current->lines++;
563  if (current->lines >= SUB_MAX_TEXT) return current;
564  }
565 
566  current->text[num]=strdup(line2);
567  current->lines++;
568 
569  return current;
570 }
571 
572 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
573  * From MPlayer subreader.c :
574  *
575  * PJS subtitles reader.
576  * That's the "Phoenix Japanimation Society" format.
577  * I found some of them in http://www.scriptsclub.org/ (used for anime).
578  * The time is in tenths of second.
579  *
580  * by set, based on code by szabi (dunnowhat sub format ;-)
581  */
582 
584  char line[LINE_LEN + 1];
585  char text[LINE_LEN + 1];
586  char *s, *d;
587 
588  memset (current, 0, sizeof(subtitle_t));
589 
590  if (!read_line_from_input(demuxstr, line, LINE_LEN))
591  return nullptr;
592  for (s = line; *s && isspace(*s); s++);
593  if (*s == 0)
594  return nullptr;
595  if (sscanf (line, "%ld,%ld,", &(current->start),
596  &(current->end)) <2)
597  return (subtitle_t *)ERR;
598  /* the files I have are in tenths of second */
599  current->start *= 10;
600  current->end *= 10;
601 
602  /* walk to the beggining of the string */
603  for (; *s; s++) if (*s==',') break;
604  if (*s) {
605  for (s++; *s; s++) if (*s==',') break;
606  if (*s) s++;
607  }
608  if (*s!='"') {
609  return (subtitle_t *)ERR;
610  }
611  /* copy the string to the text buffer */
612  for (s++, d=text; *s && *s!='"'; s++, d++)
613  *d=*s;
614  *d=0;
615  current->text[0] = strdup(text);
616  current->lines = 1;
617 
618  return current;
619 }
620 
622  char line[LINE_LEN + 1];
623  float a,b;
624  int num=0;
625  char *p, *q;
626 
627  do {
628  if (!read_line_from_input(demuxstr, line, LINE_LEN))
629  return nullptr;
630  } while (sscanf (line, "%f %f", &a, &b) !=2);
631 
632  demuxstr->mpsub_position += (a*100.0F);
633  current->start = (int) demuxstr->mpsub_position;
634  demuxstr->mpsub_position += (b*100.0F);
635  current->end = (int) demuxstr->mpsub_position;
636 
637  while (num < SUB_MAX_TEXT) {
638  if (!read_line_from_input(demuxstr, line, LINE_LEN))
639  return nullptr;
640 
641  p=line;
642  while (isspace(*p))
643  p++;
644 
645  if (eol(*p) && num > 0)
646  return current;
647 
648  if (eol(*p))
649  return nullptr;
650 
651  for (q=p; !eol(*q); q++);
652  *q='\0';
653  if (strlen(p)) {
654  current->text[num]=strdup(p);
655  printf(">%s<\n",p);
656  current->lines = ++num;
657  } else {
658  if (num)
659  return current;
660  return nullptr;
661  }
662  }
663 
664  return nullptr;
665 }
666 
668  char line[LINE_LEN + 1];
669 
670  memset (current, 0, sizeof(subtitle_t));
671 
672  while (true) {
673  /* try to locate next subtitle_t */
674  if (!read_line_from_input(demuxstr, line, LINE_LEN))
675  return nullptr;
676  if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
677  break;
678  }
679 
680  if (!read_line_from_input(demuxstr, line, LINE_LEN))
681  return nullptr;
682 
683  sub_readtext((char *) &line,&current->text[0]);
684  current->lines = 1;
685  current->end = -1;
686 
687  if (!read_line_from_input(demuxstr, line, LINE_LEN))
688  return current;;
689 
690  sub_readtext((char *) &line,&current->text[1]);
691  current->lines = 2;
692 
693  if ((current->text[0][0]==0) && (current->text[1][0]==0)) {
694  return nullptr;
695  }
696 
697  return current;
698 }
699 
701  char line1[LINE_LEN+1], line2[LINE_LEN+1], directive[LINE_LEN+1], *p, *q;
702  unsigned a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
703  static unsigned jacoTimeres = 30;
704  static int jacoShift = 0;
705 
706  memset(current, 0, sizeof(subtitle_t));
707  memset(line1, 0, LINE_LEN+1);
708  memset(line2, 0, LINE_LEN+1);
709  memset(directive, 0, LINE_LEN+1);
710  while (!current->text[0]) {
711  if (!read_line_from_input(demuxstr, line1, LINE_LEN)) {
712  return nullptr;
713  }
714  if (sscanf
715  (line1, "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
716  &b1, &b2, &b3, &b4, line2) < 9) {
717  if (sscanf(line1, "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2) < 3) {
718  if (line1[0] == '#') {
719  int hours = 0, minutes = 0, seconds, delta;
720  unsigned units = jacoShift;
721  int inverter = 1;
722  switch (toupper(line1[1])) {
723  case 'S':
724  if (isalpha(line1[2])) {
725  delta = 6;
726  } else {
727  delta = 2;
728  }
729  if (sscanf(&line1[delta], "%d", &hours)) {
730  if (hours < 0) {
731  hours *= -1;
732  inverter = -1;
733  }
734  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
735  if (sscanf
736  (&line1[delta], "%*d:%*d:%d",
737  &seconds)) {
738  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
739  &units);
740  } else {
741  hours = 0;
742  sscanf(&line1[delta], "%d:%d.%u",
743  &minutes, &seconds, &units);
744  minutes *= inverter;
745  }
746  } else {
747  hours = minutes = 0;
748  sscanf(&line1[delta], "%d.%u", &seconds,
749  &units);
750  seconds *= inverter;
751  }
752  jacoShift =
753  ((hours * 3600 + minutes * 60 +
754  seconds) * jacoTimeres +
755  units) * inverter;
756  }
757  break;
758  case 'T':
759  if (isalpha(line1[2])) {
760  delta = 8;
761  } else {
762  delta = 2;
763  }
764  sscanf(&line1[delta], "%u", &jacoTimeres);
765  break;
766  }
767  }
768  continue;
769  }
770  current->start =
771  (unsigned long) ((a4 + jacoShift) * 100.0 /
772  jacoTimeres);
773  current->end =
774  (unsigned long) ((b4 + jacoShift) * 100.0 /
775  jacoTimeres);
776  } else {
777  current->start =
778  (unsigned
779  long) (((a1 * 3600 + a2 * 60 + a3) * jacoTimeres + a4 +
780  jacoShift) * 100.0 / jacoTimeres);
781  current->end =
782  (unsigned
783  long) (((b1 * 3600 + b2 * 60 + b3) * jacoTimeres + b4 +
784  jacoShift) * 100.0 / jacoTimeres);
785  }
786  current->lines = 0;
787  p = line2;
788  while ((*p == ' ') || (*p == '\t')) {
789  ++p;
790  }
791  if (isalpha(*p)||*p == '[') {
792  int cont, jLength;
793 
794  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive, line1) < 2)
795  return (subtitle_t *)ERR;
796  jLength = strlen(directive);
797  for (cont = 0; cont < jLength; ++cont) {
798  if (isalpha(*(directive + cont)))
799  *(directive + cont) = toupper(*(directive + cont));
800  }
801  if ((strstr(directive, "RDB") != nullptr)
802  || (strstr(directive, "RDC") != nullptr)
803  || (strstr(directive, "RLB") != nullptr)
804  || (strstr(directive, "RLG") != nullptr)) {
805  continue;
806  }
807  /* no alignment */
808 #if 0
809  if (strstr(directive, "JL") != nullptr) {
810  current->alignment = SUB_ALIGNMENT_HLEFT;
811  } else if (strstr(directive, "JR") != nullptr) {
812  current->alignment = SUB_ALIGNMENT_HRIGHT;
813  } else {
814  current->alignment = SUB_ALIGNMENT_HCENTER;
815  }
816 #endif
817  strcpy(line2, line1);
818  p = line2;
819  }
820  for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
821  switch (*p) {
822  case '{':
823  comment++;
824  break;
825  case '}':
826  if (comment) {
827  --comment;
828  /* the next line to get rid of a blank after the comment */
829  if ((*(p + 1)) == ' ')
830  p++;
831  }
832  break;
833  case '~':
834  if (!comment) {
835  *q = ' ';
836  ++q;
837  }
838  break;
839  case ' ':
840  case '\t':
841  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
842  break;
843  if (!comment) {
844  *q = ' ';
845  ++q;
846  }
847  break;
848  case '\\':
849  if (*(p + 1) == 'n') {
850  *q = '\0';
851  q = line1;
852  current->text[current->lines++] = strdup(line1);
853  ++p;
854  break;
855  }
856  if ((toupper(*(p + 1)) == 'C')
857  || (toupper(*(p + 1)) == 'F')) {
858  ++p,++p;
859  break;
860  }
861  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
862  /* actually this means "insert current date here" */
863  (*(p + 1) == 'D') ||
864  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
865  (*(p + 1) == 'N') ||
866  /* actually this means "insert current time here" */
867  (*(p + 1) == 'T') ||
868  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
869  ++p;
870  break;
871  }
872  if ((*(p + 1) == '\\') ||
873  (*(p + 1) == '~') || (*(p + 1) == '{')) {
874  ++p;
875  } else if (eol(*(p + 1))) {
876  if (!read_line_from_input(demuxstr, directive, LINE_LEN))
877  return nullptr;
878  trail_space(directive);
879  strncat(line2, directive,
880  ((LINE_LEN > 511) ? LINE_LEN-1 : 511)
881  - strlen(line2));
882  break;
883  }
884  // Checked xine-lib-1.2.8. No fix there. Seems like it
885  // should be a break.
886  break;
887  default:
888  if (!comment) {
889  *q = *p;
890  ++q;
891  }
892  }
893  }
894  *q = '\0';
895  if (current->lines < SUB_MAX_TEXT)
896  current->text[current->lines] = strdup(line1);
897  else
898  printf ("Too many lines in a subtitle\n");
899  }
900  current->lines++;
901  return current;
902 }
903 
905  char line[LINE_LEN+1];
906  int a1,a2,a3,a4;
907  char *p=nullptr;
908  int i,len;
909 
910  while (!current->text[0]) {
911  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
912  if (line[0]!='{')
913  continue;
914  if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
915  continue;
916  current->start = a1*360000+a2*6000+a3*100+a4/10;
917  for (i=0; i<SUB_MAX_TEXT;) {
918  if (!read_line_from_input(demuxstr, line, LINE_LEN)) break;
919  if (line[0]=='}') break;
920  len=0;
921  for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
922  if (len) {
923  current->text[i]=(char *)malloc (len+1);
924  if (!current->text[i]) return (subtitle_t *)ERR;
925  strncpy (current->text[i], line, len); current->text[i][len]='\0';
926  ++i;
927  } else {
928  break;
929  }
930  }
931  current->lines=i;
932  }
933  return current;
934 }
935 
937  char line[LINE_LEN + 1];
938  char *next;
939  int h, m, s;
940  int i;
941 
942  memset (current, 0, sizeof(subtitle_t));
943 
944  do {
945  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
946  } while (sscanf (line, "[%d:%d:%d]", &h, &m, &s) != 3);
947 
948  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
949 
950  current->start = 360000 * h + 6000 * m + 100 * s;
951  current->end = -1;
952 
953  next=line;
954  i=0;
955  while ((next = sub_readtext (next, &(current->text[i])))) {
956  if (current->text[i]==ERR) return (subtitle_t *)ERR;
957  i++;
958  if (i>=SUB_MAX_TEXT) {
959  printf("Too many lines in a subtitle\n");
960  current->lines=i;
961  return current;
962  }
963  }
964  current->lines= ++i;
965 
966  return current;
967 }
968 
969 /* Code from subreader.c of MPlayer
970 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
971 */
972 
974  char line[LINE_LEN+1];
975  char line2[LINE_LEN+1];
976  char *p, *next;
977  int i;
978 
979  memset (current, 0, sizeof(subtitle_t));
980  do {
981  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
982  } while ((sscanf (line,
983  "[%ld][%ld]%" LINE_LEN_QUOT "[^\r\n]",
984  &(current->start), &(current->end), line2) < 3));
985  current->start *= 10;
986  current->end *= 10;
987  p=line2;
988 
989  next=p, i=0;
990  while ((next = sub_readtext (next, &(current->text[i])))) {
991  if (current->text[i] == ERR) {return (subtitle_t *)ERR;}
992  i++;
993  if (i >= SUB_MAX_TEXT) {
994  printf("Too many lines in a subtitle\n");
995  current->lines = i;
996  return current;
997  }
998  }
999  current->lines= ++i;
1000 
1001  return current;
1002 }
1003 
1004 
1005 static int sub_autodetect (demux_sputext_t *demuxstr) {
1006 
1007  char line[LINE_LEN + 1];
1008  int i, j=0;
1009  char p;
1010 
1011  while (j < 100) {
1012  j++;
1013  if (!read_line_from_input(demuxstr, line, LINE_LEN))
1014  return FORMAT_UNKNOWN;
1015 
1016  if ((sscanf (line, "{%d}{}", &i)==1) ||
1017  (sscanf (line, "{%d}{%d}", &i, &i)==2)) {
1018  demuxstr->uses_time=0;
1019  return FORMAT_MICRODVD;
1020  }
1021 
1022  if (sscanf (line, "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
1023  demuxstr->uses_time=1;
1024  return FORMAT_SUBRIP;
1025  }
1026 
1027  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1028  demuxstr->uses_time=1;
1029  return FORMAT_SUBVIEWER;
1030  }
1031 
1032  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1033  demuxstr->uses_time=1;
1034  return FORMAT_SUBVIEWER;
1035  }
1036 
1037  if (strstr (line, "<SAMI>")) {
1038  demuxstr->uses_time=1;
1039  return FORMAT_SAMI;
1040  }
1041  if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3) {
1042  demuxstr->uses_time=1;
1043  return FORMAT_VPLAYER;
1044  }
1045  /*
1046  * A RealText format is a markup language, starts with <window> tag,
1047  * options (behaviour modifiers) are possible.
1048  */
1049  if ( strcasecmp(line, "<window") == 0 ) {
1050  demuxstr->uses_time=1;
1051  return FORMAT_RT;
1052  }
1053  if ((memcmp(line, "Dialogue: Marked", 16) == 0) || (memcmp(line, "Dialogue: ", 10) == 0)) {
1054  demuxstr->uses_time=1;
1055  return FORMAT_SSA;
1056  }
1057  if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
1058  demuxstr->uses_time=0;
1059  return FORMAT_PJS;
1060  }
1061  if (sscanf (line, "FORMAT=%d", &i) == 1) {
1062  demuxstr->uses_time=0;
1063  return FORMAT_MPSUB;
1064  }
1065  if (sscanf (line, "FORMAT=TIM%c", &p)==1 && p=='E') {
1066  demuxstr->uses_time=1;
1067  return FORMAT_MPSUB;
1068  }
1069  if (strstr (line, "-->>")) {
1070  demuxstr->uses_time=0;
1071  return FORMAT_AQTITLE;
1072  }
1073  if (sscanf(line, "@%d @%d", &i, &i) == 2 ||
1074  sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
1075  demuxstr->uses_time = 1;
1076  return FORMAT_JACOBSUB;
1077  }
1078  if (sscanf(line, "{T %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
1079  demuxstr->uses_time = 1;
1080  return FORMAT_SUBVIEWER2;
1081  }
1082  if (sscanf(line, "[%d:%d:%d]", &i, &i, &i) == 3) {
1083  demuxstr->uses_time = 1;
1084  return FORMAT_SUBRIP09;
1085  }
1086 
1087  if (sscanf (line, "[%d][%d]", &i, &i) == 2) {
1088  demuxstr->uses_time = 1;
1089  return FORMAT_MPL2;
1090  }
1091  }
1092  return FORMAT_UNKNOWN; /* too many bad lines */
1093 }
1094 
1096 
1097  int n_max;
1098  int timeout;
1099  subtitle_t *first;
1100  subtitle_t * (*func[])(demux_sputext_t *demuxstr,subtitle_t *dest)=
1101  {
1116  };
1117 
1118  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1119  demuxstr->rbuffer_cur = 0;
1120  demuxstr->buflen = 0;
1121  demuxstr->emptyReads = 0;
1122 
1123  demuxstr->format=sub_autodetect (demuxstr);
1124  if (demuxstr->format==FORMAT_UNKNOWN) {
1125  return nullptr;
1126  }
1127 
1128  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1129 
1130  /* Rewind */
1131  demuxstr->rbuffer_cur = 0;
1132  demuxstr->buflen = 0;
1133  demuxstr->emptyReads = 0;
1134 
1135  demuxstr->num=0;n_max=32;
1136  first = (subtitle_t *) malloc(n_max*sizeof(subtitle_t));
1137  if(!first) return nullptr;
1138  timeout = MAX_TIMEOUT;
1139 
1140  if (demuxstr->uses_time) timeout *= 100;
1141  else timeout *= 10;
1142 
1143  while(true) {
1144  subtitle_t *sub;
1145 
1146  if(demuxstr->num>=n_max){
1147  n_max+=16;
1148  subtitle_t *new_first=(subtitle_t *)realloc(first,n_max*sizeof(subtitle_t));
1149  if (new_first == nullptr) {
1150  free(first);
1151  return nullptr;
1152  }
1153  first = new_first;
1154  }
1155 
1156  sub = func[demuxstr->format] (demuxstr, &first[demuxstr->num]);
1157 
1158  if (!sub) {
1159  break; /* EOF */
1160  }
1161  demuxstr->emptyReads = 0;
1162 
1163  if (sub==ERR)
1164  ++demuxstr->errs;
1165  else {
1166  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1) {
1167  /* end time not defined in the subtitle */
1168  if (timeout > 0) {
1169  /* timeout */
1170  if (timeout > sub->start - first[demuxstr->num-1].start) {
1171  first[demuxstr->num-1].end = sub->start;
1172  } else
1173  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1174  } else {
1175  /* no timeout */
1176  first[demuxstr->num-1].end = sub->start;
1177  }
1178  }
1179  ++demuxstr->num; /* Error vs. Valid */
1180  }
1181  }
1182  /* timeout of last subtitle */
1183  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1)
1184  if (timeout > 0) {
1185  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1186  }
1187 
1188 #ifdef DEBUG_XINE_DEMUX_SPUTEXT
1189  {
1190  char buffer[1024];
1191 
1192  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1193 
1194  if(demuxstr->errs)
1195  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1196  else
1197  strcat(buffer, "\n");
1198 
1199  printf("%s", buffer);
1200  }
1201 #endif
1202 
1203  return first;
1204 }
#define MAX_TIMEOUT
static bool eol(char p)
#define LINE_LEN
#define SUB_MAX_TEXT
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_AQTITLE
long end
Ending time in msec or starting frame.
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_MPSUB
subtitle_t * sub_read_file(demux_sputext_t *demuxstr)
#define FORMAT_SSA
#define SUB_BUFSIZE
#define FORMAT_SUBVIEWER2
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
long long copy(QFile &dst, QFile &src, uint block_size)
Copies src file to dst file.
static guint32 * tmp
Definition: goom_core.c:35
char buf[SUB_BUFSIZE]
#define off_t
unsigned char b
Definition: ParseText.cpp:329
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_UNKNOWN
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP09
static const uint16_t * d
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBVIEWER
#define FORMAT_SAMI
static char * read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len)
#define FORMAT_VPLAYER
static int sub_autodetect(demux_sputext_t *demuxstr)
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_PJS
PictureAttribute next(PictureAttributeSupported supported, PictureAttribute attribute)
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
static char * sub_readtext(char *source, char **dest)
#define FORMAT_JACOBSUB
static guint32 * p2
Definition: goom_core.c:35
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
static void trail_space(char *s)
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP
#define ERR
long start
Starting time in msec or starting frame.
#define FORMAT_MPL2
#define FORMAT_RT
#define FORMAT_MICRODVD
char next_line[SUB_BUFSIZE]
#define LINE_LEN_QUOT