Ticket #9836: external-subs-utf.diff

File external-subs-utf.diff, 9.1 KB (added by markk, 9 years ago)
  • mythtv/libs/libmythtv/textsubtitleparser.cpp

    diff --git a/mythtv/libs/libmythtv/textsubtitleparser.cpp b/mythtv/libs/libmythtv/textsubtitleparser.cpp
    index 3dcbfab..5005e1d 100644
    a b using std::lower_bound; 
    1717#include <QTextCodec>
    1818
    1919// MythTV
     20#include "mythlogging.h"
    2021#include "mythcorecontext.h"
    2122#include "ringbuffer.h"
    2223#include "textsubtitleparser.h"
    void TextSubtitles::Clear(void) 
    118119
    119120bool TextSubtitleParser::LoadSubtitles(QString fileName, TextSubtitles &target)
    120121{
     122    // Open text file
     123    QScopedPointer<RingBuffer> rb(RingBuffer::Create(fileName, 0, false));
     124
     125    if (!rb.data())
     126        return false;
     127
     128    // ensure we are the start of the file
     129    if (rb.data()->Seek(0, SEEK_SET) == -1)
     130    {
     131        LOG(VB_GENERAL, LOG_ERR, "Failed to seek to beginning of sub file");
     132        return false;
     133    }
     134
     135    // read a test chunk
     136    QScopedPointer<char> readbuf(new char[2048]);
     137    int testcount = rb.data()->Read(readbuf.data(), 2048);
     138    if (testcount < 1)
     139    {
     140        LOG(VB_GENERAL, LOG_ERR, "Failed to read from sub file.");
     141        return false;
     142    }
     143
     144    // try and determine the text codec
     145    QByteArray test(readbuf.data(), testcount);
     146    QTextCodec *textCodec = QTextCodec::codecForUtfText(test, NULL);
     147    if (!textCodec)
     148    {
     149        LOG(VB_GENERAL, LOG_WARNING, "Failed to autodetect a UTF encoding.");
     150        QString codec = gCoreContext->GetSetting("SubtitleCodec", "");
     151        if (!codec.isEmpty())
     152            textCodec = QTextCodec::codecForName(codec.toLatin1());
     153        if (!textCodec)
     154            textCodec = QTextCodec::codecForName("utf-8");
     155        if (!textCodec)
     156        {
     157            LOG(VB_GENERAL, LOG_ERR,
     158                QString("Failed to find codec for subtitle file '%1'")
     159                .arg(fileName));
     160            return false;
     161        }
     162    }
     163
     164    LOG(VB_GENERAL, LOG_INFO, QString("Opened subtitle file '%1' with codec '%2'")
     165        .arg(fileName).arg(textCodec->name().constData()));
     166
     167    // load the entire subtitle file, converting to unicode as we go
     168    QScopedPointer<QTextDecoder> dec(textCodec->makeDecoder());
     169    QString data = QString();
     170    rb.data()->Seek(0, SEEK_SET);
     171    int expecting = rb.data()->GetRealFileSize();
     172    int size = 0;
     173    while (1)
     174    {
     175        int res = rb.data()->Read(readbuf.data(), 2048);
     176        if (res < 0)
     177        {
     178            LOG(VB_GENERAL, LOG_ERR, "Failed to read from subtitle file.");
     179            return false;
     180        }
     181
     182        size += res;
     183        if (res == 0 && (size >= expecting))
     184            break;
     185        else
     186            data += dec->toUnicode((const char*)readbuf.data(), res);
     187    }
     188
     189    if (data.isEmpty())
     190    {
     191        LOG(VB_GENERAL, LOG_WARNING,
     192            QString("Data loaded from subtitle file '%1' is empty.")
     193            .arg(fileName));
     194        return false;
     195    }
     196
     197    LOG(VB_GENERAL, LOG_INFO,
     198        QString("Loaded %1 bytes (expected %2) from file '%3'")
     199        .arg(size).arg(expecting).arg(fileName));
     200
     201    // convert back to utf-8 for parsing
     202    QByteArray ba = data.toUtf8();
    121203    demux_sputext_t sub_data;
    122     sub_data.rbuffer = RingBuffer::Create(fileName, 0, false);
    123 
    124     if (!sub_data.rbuffer)
    125         return false;
     204    sub_data.buffer = (char*)ba.data();
     205    sub_data.buffersize = data.size();
    126206
    127207    subtitle_t *loaded_subs = sub_read_file(&sub_data);
    128208    if (!loaded_subs)
    129209    {
    130         delete sub_data.rbuffer;
     210        LOG(VB_GENERAL, LOG_ERR, QString("Failed to read subtitles from '%1'")
     211            .arg(fileName));
    131212        return false;
    132213    }
    133214
     215    LOG(VB_GENERAL, LOG_INFO, QString("Found %1 subtitles in file '%2'")
     216        .arg(sub_data.num).arg(fileName));
    134217    target.SetFrameBasedTiming(!sub_data.uses_time);
    135218
    136     QTextCodec *textCodec = NULL;
    137     QString codec = gCoreContext->GetSetting("SubtitleCodec", "");
    138     if (!codec.isEmpty())
    139         textCodec = QTextCodec::codecForName(codec.toLatin1());
    140     if (!textCodec)
     219    // convert the subtitles to our own format, free the original structures
     220    // and convert back to unicode
    141221        textCodec = QTextCodec::codecForName("utf-8");
    142     if (!textCodec)
    143     {
    144         delete sub_data.rbuffer;
    145         return false;
    146     }
     222    if (textCodec)
     223        dec.reset(textCodec->makeDecoder());
    147224
    148     QTextDecoder *dec = textCodec->makeDecoder();
    149 
    150     // convert the subtitles to our own format and free the original structures
    151225    for (int sub_i = 0; sub_i < sub_data.num; ++sub_i)
    152226    {
    153227        const subtitle_t *sub = &loaded_subs[sub_i];
    bool TextSubtitleParser::LoadSubtitles(QString fileName, TextSubtitles &target) 
    162236        for (int line = 0; line < sub->lines; ++line)
    163237        {
    164238            const char *subLine = sub->text[line];
    165             QString str = dec->toUnicode(subLine, strlen(subLine));
     239            QString str;
     240            if (textCodec)
     241                str = dec->toUnicode(subLine, strlen(subLine));
     242            else
     243                str = QString(subLine);
    166244            newsub.textLines.push_back(str);
    167 
    168245            free(sub->text[line]);
    169246        }
    170247        target.AddSubtitle(newsub);
    171248    }
    172249
    173     delete dec;
    174     // textCodec object is managed by Qt, do not delete...
    175 
    176250    free(loaded_subs);
    177     delete sub_data.rbuffer;
    178 
    179251    return true;
    180252}
  • mythtv/libs/libmythtv/xine_demux_sputext.cpp

    diff --git a/mythtv/libs/libmythtv/xine_demux_sputext.cpp b/mythtv/libs/libmythtv/xine_demux_sputext.cpp
    index 56220d1..7767f51 100644
    a b  
    4242#include <fcntl.h>
    4343#include <ctype.h>
    4444#include "xine_demux_sputext.h"
     45#include "mythlogging.h"
    4546
    4647#define LOG_MODULE "demux_sputext"
    4748#define LOG_VERBOSE
    static inline void trail_space(char *s) { 
    9596 * Reimplementation of fgets() using the input->read() method.
    9697 */
    9798static char *read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len) {
    98   off_t nread = 0;
    9999  char *s;
     100  char *readpos;
    100101  int linelen;
    101102
    102   // Since our RemoteFile code sleeps 200ms whenever we get back less data
    103   // than requested, but this code just keeps trying to read until it gets
    104   // an error back, we check for empty reads so that we can stop reading
    105   // when there is no more data to read
    106   if (demuxstr->emptyReads == 0 && (len - demuxstr->buflen) > 512) {
    107     nread = demuxstr->rbuffer->Read(
    108         &demuxstr->buf[demuxstr->buflen], len - demuxstr->buflen);
    109     if (nread < 0) {
    110       printf("read failed.\n");
     103  if (demuxstr->bufferposition >= demuxstr->buffersize)
    111104      return NULL;
    112     }
    113   }
    114105
    115   if (!nread)
    116     demuxstr->emptyReads++;
     106  readpos = demuxstr->buffer + demuxstr->bufferposition;
     107  s = strchr(readpos, '\n');
    117108
    118   demuxstr->buflen += nread;
    119   demuxstr->buf[demuxstr->buflen] = '\0';
    120 
    121   s = strchr(demuxstr->buf, '\n');
    122 
    123   if (line && (s || demuxstr->buflen)) {
    124 
    125     linelen = s ? (s - demuxstr->buf) + 1 : demuxstr->buflen;
    126 
    127     memcpy(line, demuxstr->buf, linelen);
     109  if (line && s) {
     110    linelen = s - readpos + 1;
     111    memcpy(line, readpos, linelen);
    128112    line[linelen] = '\0';
    129 
    130     memmove(demuxstr->buf, &demuxstr->buf[linelen], SUB_BUFSIZE - linelen);
    131     demuxstr->buflen -= linelen;
    132 
     113    demuxstr->bufferposition += linelen;
    133114    return line;
    134115  }
    135116
    subtitle_t *sub_read_file (demux_sputext_t *demuxstr) { 
    11071088    sub_read_line_mpl2,
    11081089  };
    11091090
    1110   /* Rewind (sub_autodetect() needs to read input from the beginning) */
    1111   if(demuxstr->rbuffer->Seek(0, SEEK_SET) == -1) {
    1112     printf("seek failed.\n");
    1113     return NULL;
    1114   }
    1115   demuxstr->buflen = 0;
    1116   demuxstr->emptyReads = 0;
    1117 
     1091  demuxstr->bufferposition = 0;
    11181092  demuxstr->format=sub_autodetect (demuxstr);
    11191093  if (demuxstr->format==FORMAT_UNKNOWN) {
    11201094    return NULL;
    subtitle_t *sub_read_file (demux_sputext_t *demuxstr) { 
    11221096
    11231097  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
    11241098
    1125   /* Rewind */
    1126   if(demuxstr->rbuffer->Seek(0, SEEK_SET) == -1) {
    1127     printf("seek failed.\n");
    1128     return NULL;
    1129   }
    1130   demuxstr->buflen = 0;
    1131   demuxstr->emptyReads = 0;
     1099  demuxstr->bufferposition = 0;
    11321100
    11331101  demuxstr->num=0;n_max=32;
    11341102  first = (subtitle_t *) malloc(n_max*sizeof(subtitle_t));
    subtitle_t *sub_read_file (demux_sputext_t *demuxstr) { 
    11481116
    11491117    sub = func[demuxstr->format] (demuxstr, &first[demuxstr->num]);
    11501118
    1151     if (!sub) {
     1119    if (!sub)
    11521120      break;   /* EOF */
    1153     } else {
    1154       demuxstr->emptyReads = 0;
    1155     }
    11561121
    11571122    if (sub==ERR)
    11581123      ++demuxstr->errs;
  • mythtv/libs/libmythtv/xine_demux_sputext.h

    diff --git a/mythtv/libs/libmythtv/xine_demux_sputext.h b/mythtv/libs/libmythtv/xine_demux_sputext.h
    index 82a4263..16849c2 100644
    a b typedef struct { 
    2121
    2222typedef struct {
    2323
    24   RingBuffer*        rbuffer;
    25 
    26   int                status;
    27 
    28   char               buf[SUB_BUFSIZE];
    29   off_t              buflen;
    30   off_t              emptyReads;
     24  char*              buffer;
     25  int                buffersize;
     26  int                bufferposition;
    3127
    3228  float              mpsub_position;
    3329