Ticket #4327: eitfixup.patch

File eitfixup.patch, 17.4 KB (added by anonymous, 13 years ago)
  • libs/libmythtv/eitfixup.h

     
    1414/// EIT Fix Up Functions
    1515class EITFixUp
    1616{
     17  protected:
     18     // max length of subtitle field in db.
     19     static const uint SUBTITLE_MAX_LEN = 128;
     20     // max number of words up to a period, question mark
     21     static const uint kDotToTitle = 6;
     22
    1723  public:
    1824    enum FixUpType
    1925    {
     
    5460
    5561  private:
    5662    void FixBellExpressVu(DBEvent &event) const; // Canada DVB-S
     63    void SetUKSubtitle(DBEvent &event) const;
    5764    void FixUK(DBEvent &event) const;            // UK DVB-T
    5865    void FixPBS(DBEvent &event) const;           // USA ATSC
    5966    void FixComHem(DBEvent &event, bool parse_subtitle) const; // Sweden DVB-C
     
    7582    const QRegExp m_ukSubtitle;
    7683    const QRegExp m_ukThen;
    7784    const QRegExp m_ukNew;
     85    const QRegExp m_ukNew1;
    7886    const QRegExp m_ukT4;
    7987    const QRegExp m_ukEQ;
    8088    const QRegExp m_ukEPQ;
     89    const QRegExp m_ukColonHyphen;
    8190    const QRegExp m_ukPStart;
    8291    const QRegExp m_ukPEnd;
    8392    const QRegExp m_ukSeries1;
    8493    const QRegExp m_ukSeries2;
     94    const QRegExp m_ukSeries3;
    8595    const QRegExp m_ukCC;
    8696    const QRegExp m_ukYear;
    8797    const QRegExp m_uk24ep;
    8898    const QRegExp m_ukStarring;
     99    const QRegExp m_ukBBC7rpt;
     100    const QRegExp m_ukCBBC;
     101    const QRegExp m_ukCBeebies;
     102    const QRegExp m_ukStarring1;
     103    const QRegExp m_ukDoubleDotEnd;
     104    const QRegExp m_ukDoubleDotStart;
    89105    const QRegExp m_comHemCountry;
    90106    const QRegExp m_comHemDirector;
    91107    const QRegExp m_comHemActor;
  • libs/libmythtv/eitfixup.cpp

     
    1717      m_bellPPVDescriptionEventId("\\([0-9]{5}\\)"),
    1818      m_ukSubtitle("\\[.*S\\]"),
    1919      m_ukThen("\\s*(Then|Followed by) 60 Seconds\\.", false),
    20       m_ukNew("\\s*(Brand New|New) Series\\s*[:\\.\\-]"),
     20      m_ukNew("\\s*(Brand New|New)\\s*Series\\s*[:\\.\\-]",false),
     21      m_ukNew1("^New\\."),
    2122      m_ukT4("^[tT]4:"),
    22       m_ukEQ("[\\!\\?]"),
     23      m_ukEQ("[:\\!\\?]"),
    2324      m_ukEPQ("[:\\!\\.\\?]"),
     25      m_ukColonHyphen("[:-]"),
    2426      m_ukPStart("^\\.+"),
    2527      m_ukPEnd("\\.+$"),
    2628      m_ukSeries1("^\\s*(\\d{1,2})/(\\d{1,2})\\."),
    2729      m_ukSeries2("\\((Part|Pt)\\s+(\\d{1,2})\\s+of\\s+(\\d{1,2})\\)", false),
     30      m_ukSeries3("\\s*Episode\\s+(\\d{1,2})\\s+of\\s+(\\d{1,2})\\.", false),
    2831      m_ukCC("\\[(AD)(,(S)){,1}(,SL){,1}\\]|\\[(S)(,AD){,1}(,SL){,1}\\]|"
    2932             "\\[(SL)(,AD){,1}(,(S)){,1}\\]"),
    3033      m_ukYear("[\\[\\(]([\\d]{4})[\\)\\]]"),
    3134      m_uk24ep("^\\d{1,2}:00[ap]m to \\d{1,2}:00[ap]m: "),
    3235      m_ukStarring("(?:Western\\s)?[Ss]tarring ([\\w\\s\\-']+)[Aa]nd\\s([\\w\\s\\-']+)[\\.|,](?:\\s)*(\\d{4})?(?:\\.\\s)?"),
     36      m_ukBBC7rpt("\\[Rptd?[^]]+\\d{1,2}\\.\\d{1,2}[ap]m\\]\\."),
     37      m_ukCBBC("^CBBC."),
     38      m_ukCBeebies("^CBeebies."),
     39      m_ukStarring1("star(ring)"),
     40      m_ukDoubleDotEnd("\\.\\.+$"),
     41      m_ukDoubleDotStart("^\\.\\.+"),
    3342      m_comHemCountry("^(\\(.+\\))?\\s?([^ ]+)\\s([^\\.0-9]+)"
    3443                      "(?:\\sfrån\\s([0-9]{4}))(?:\\smed\\s([^\\.]+))?\\.?"),
    3544      m_comHemDirector("[Rr]egi"),
     
    283292
    284293}
    285294
     295void EITFixUp::SetUKSubtitle(DBEvent &event) const
     296{
     297    QStringList strList1;
     298    int position1;
     299    int position2;
     300    if ((position1 = event.description.find(m_ukEQ)) != -1)
     301    {
     302        position2 = event.description.find(":",position1+1);
     303        if (position2 != -1)
     304        {
     305             strList1 = QStringList::split(" ",
     306                            event.description.mid(position1+1,
     307                            position2-position1+1));
     308             if (strList1.count() < kDotToTitle)
     309             {
     310                 event.subtitle = event.description.left(position2);
     311                 event.description = event.description.mid(position2 + 1);
     312             }
     313             else
     314             {
     315                 event.subtitle = event.description.left(position1);
     316                 event.description = event.description.mid(position1 + 1);
     317             }
     318        }
     319        else
     320        {
     321            if ((uint)position1 < SUBTITLE_MAX_LEN)
     322            {
     323                event.subtitle = event.description.left(position1);
     324                event.description = event.description.mid(position1+1);
     325            }
     326        }
     327    }
     328}
     329
    286330/** \fn EITFixUp::FixUK(DBEvent&) const
    287331 *  \brief Use this in the United Kingdom to standardize DVB-T guide.
    288332 */
    289333void EITFixUp::FixUK(DBEvent &event) const
    290334{
    291     const uint SUBTITLE_PCT = 30; //% of description to allow subtitle up to
    292     const uint SUBTITLE_MAX_LEN = 128; // max length of subtitle field in db.
    293     int position = event.description.find("New Series");
    294     if (position != -1)
    295     {
    296         // Do something here
    297     }
     335    int position1;
     336    int position2;
     337    QString strFull;
    298338
    299     position = event.description.find(m_ukSubtitle);
    300     if (position != -1)
     339    position1 = event.description.find(m_ukSubtitle);
     340    if (position1 != -1)
    301341    {
    302342        event.flags |= DBEvent::kSubtitled;
    303         event.description.replace(m_ukSubtitle, "");
     343        event.description.remove(m_ukSubtitle);
    304344    }
    305345
    306346    // BBC three case (could add another record here ?)
    307     event.description = event.description.replace(m_ukThen, "");
    308     event.description = event.description.replace(m_ukNew, "");
    309     event.title  = event.title.replace(m_ukT4, "");
     347    event.description = event.description.remove(m_ukThen);
     348    event.description = event.description.remove(m_ukNew);
    310349
    311     // First join up event data, that's spread across title/desc
    312     // at this point there is no subtitle.
    313     if (event.title.endsWith("...") ||
    314         event.description.startsWith("..") ||
    315         event.description.isEmpty())
    316     {
    317         // try and make the subtitle
    318         QString Full = event.title.replace(m_ukPEnd, "") + " " +
    319             event.description.replace(m_ukPStart, "");
     350    event.description = event.description.remove(m_ukNew1);
     351    event.title  = event.title.remove(m_ukT4);
    320352
    321         if ((position = Full.find(m_ukEPQ)) != -1)
    322         {
    323             if (Full[position] == '!' || Full[position] == '?' ||
    324                 Full[position] == '.')
    325                 position++;
    326             event.title = Full.left(position);
    327             event.description = Full.mid(position + 1);
    328         }
    329         if ((position = event.title.find(m_ukYear)) != -1)
    330         {
    331             // Looks like they are using the airdate as a delimiter
    332             event.description = event.title.mid(position);
    333             event.title = event.title.left(position);
    334         }
    335     }
     353    // BBC 7 [Rpt of ...] case.
     354    event.description = event.description.remove(m_ukBBC7rpt);
    336355
    337     // This is trying to catch the case where the subtitle is in the main title
    338     // but avoid cases where it isn't a subtitle e.g cd:uk
    339     if (((position = event.title.find(":")) != -1) &&
    340         (event.description.find(":") == -1) &&
    341         (event.title[position + 1].upper() == event.title[position + 1]))
    342     {
    343         event.subtitle = event.title.mid(position + 1);
    344         event.title = event.title.left(position);
    345     }
    346 
    347     // Special case for episodes of 24.
    348356    QRegExp tmp24ep = m_uk24ep;
    349     if ((position = tmp24ep.search(event.description)) != -1)
     357    QStringList strList1;
     358    if (!event.title.startsWith("CSI:") && !event.title.startsWith("CD:"))
    350359    {
    351         // -2 from the length cause we don't want ": " on the end
    352         event.subtitle = event.description.mid(position, tmp24ep.cap(0).length() - 2);
    353         event.description = event.description.replace(tmp24ep.cap(0),"");
    354     }
    355     else if ((position = event.description.find(":")) != -1)
    356     {
    357         // if the subtitle is less than 50% of the description use it.
    358         if (((uint)position < SUBTITLE_MAX_LEN) &&
    359             ((position*100)/event.description.length() < SUBTITLE_PCT))
     360        if (((position1=event.title.find(m_ukDoubleDotEnd))!=-1) &&
     361            ((position2=event.description.find(m_ukDoubleDotStart))!=-1))
    360362        {
    361             event.subtitle = event.description.left(position);
    362             event.description = event.description.mid(position + 1);
     363            strFull = event.title.remove(m_ukDoubleDotEnd)+" "+
     364                  event.description.remove(m_ukDoubleDotStart);
     365            if ((position1 = strFull.find(m_ukEPQ)) != -1)
     366            {
     367                 if (strFull[position1] == '!' || strFull[position1] == '?' ||
     368                     strFull[position1] == '.')
     369                     position1++;
     370                 event.title = strFull.left(position1);
     371                 event.description = strFull.mid(position1 + 1);
     372                 SetUKSubtitle(event);
     373            }
     374            else if ((position1 = strFull.find(m_ukYear)) != -1)
     375            {
     376                // Looks like they are using the airdate as a delimiter
     377                if ((uint)position1 < SUBTITLE_MAX_LEN)
     378                {
     379                    event.description = event.title.mid(position1);
     380                    event.title = event.title.left(position1);
     381                }
     382            }
    363383        }
    364     }
    365     else if (!(((position = event.description.find(m_ukYear)) != -1) && (position < 3))
    366             && ((position = event.description.find(m_ukEPQ)) != -1))
    367     {
    368         // only move stuff into the subtitle if the airdate isn't at
    369         // the beginning of the description
    370         if (((uint)position < SUBTITLE_MAX_LEN) &&
    371             ((position*100)/event.description.length() < SUBTITLE_PCT))
     384        else if ((position1 = tmp24ep.search(event.description)) != -1)
    372385        {
    373             event.subtitle = event.description.left(position + 1);
    374             event.description = event.description.mid(position + 2);
     386            // Special case for episodes of 24.
     387            // -2 from the length cause we don't want ": " on the end
     388            event.subtitle = event.description.mid(position1,
     389                                tmp24ep.cap(0).length() - 2);
     390            event.description = event.description.remove(tmp24ep.cap(0));
    375391        }
     392        else if (((position1 = event.title.find(m_ukColonHyphen)) != -1) &&
     393            (event.description.find(":") == -1))
     394        {
     395            if ((uint)position1 < SUBTITLE_MAX_LEN)
     396            {
     397                event.subtitle = event.title.mid(position1 + 1);
     398                event.title = event.title.left(position1);
     399            }
     400        }
     401        else
     402            SetUKSubtitle(event);
    376403    }
    377404
    378405    QRegExp tmpStarring = m_ukStarring;
     
    381408        // If the "Starring..." string got promoted to subtitle move it back.
    382409        event.description.prepend(". ");
    383410        event.description.prepend(tmpStarring.cap(0));
    384         event.subtitle.replace(tmpStarring.cap(0), "");
     411        event.subtitle.remove(tmpStarring.cap(0));
    385412    }
    386413    tmpStarring = m_ukStarring;
    387414    if (tmpStarring.search(event.description) != -1)
     
    398425                event.originalairdate = QDate(y, 1, 1);
    399426        }
    400427    }
    401     // Trim trailing '.'
    402     event.title.replace(m_ukPEnd, "");
    403     event.subtitle.replace(m_ukPEnd, "");
    404428
    405429    // Work out the episode numbers (if any)
    406430    bool    series  = false;
    407431    QRegExp tmpExp1 = m_ukSeries1;
    408432    QRegExp tmpExp2 = m_ukSeries2;
    409     if ((position = tmpExp1.search(event.title)) != -1)
     433    QRegExp tmpExp3 = m_ukSeries3;
     434    if ((position1 = tmpExp1.search(event.title)) != -1)
    410435    {
    411436        event.partnumber = tmpExp1.cap(1).toUInt();
    412437        event.parttotal  = tmpExp1.cap(2).toUInt();
    413438        // Remove from the title
    414439        event.title =
    415             event.title.mid(position + tmpExp1.cap(0).length());
    416         // but add it to the description
    417         event.description += tmpExp1.cap(0);
     440            event.title.mid(position1 + tmpExp1.cap(0).length());
    418441        series = true;
    419442    }
    420     else if ((position = tmpExp1.search(event.subtitle)) != -1)
     443    else if ((position1 = tmpExp1.search(event.subtitle)) != -1)
    421444    {
    422445        event.partnumber = tmpExp1.cap(1).toUInt();
    423446        event.parttotal  = tmpExp1.cap(2).toUInt();
    424447        // Remove from the sub title
    425448        event.subtitle =
    426             event.subtitle.mid(position + tmpExp1.cap(0).length());
    427         // but add it to the description
    428         event.description += tmpExp1.cap(0);
     449            event.subtitle.mid(position1 + tmpExp1.cap(0).length());
    429450        series = true;
    430451    }
    431     else if ((position = tmpExp1.search(event.description)) != -1)
     452    else if ((position1 = tmpExp1.search(event.description)) != -1)
    432453    {
    433454        event.partnumber = tmpExp1.cap(1).toUInt();
    434455        event.parttotal  = tmpExp1.cap(2).toUInt();
    435         // Don't cut it from the description
    436         //event.description = event.description.left(position) +
    437         //    event.description.mid(position + tmpExp1.cap(0).length());
     456        event.description = event.description.left(position1) +
     457            event.description.mid(position1 + tmpExp1.cap(0).length());
    438458        series = true;
    439459    }
    440     else if ((position = tmpExp2.search(event.description)) != -1)
     460    else if ((position1 = tmpExp2.search(event.description)) != -1)
    441461    {
    442462        event.partnumber = tmpExp2.cap(2).toUInt();
    443463        event.parttotal  = tmpExp2.cap(3).toUInt();
    444         // Don't cut it from the description
    445         //event.description = event.description.left(position) +
    446         //    event.description.mid(position + tmpExp2.cap(0).length());
     464        event.description = event.description.left(position1) +
     465            event.description.mid(position1 + tmpExp2.cap(0).length());
    447466        series = true;
    448467    }
     468    else if ((position1 = tmpExp3.search(event.description)) != -1)
     469    {
     470        event.partnumber = tmpExp3.cap(1).toUInt();
     471        event.parttotal  = tmpExp3.cap(2).toUInt();
     472        event.description = event.description.left(position1) +
     473            event.description.mid(position1 + tmpExp3.cap(0).length());
     474        series = true;
     475    }
    449476    if (series)
    450477        event.category_type = kCategorySeries;
    451478
     
    453480    QStringList captures;
    454481    QStringList::const_iterator it;
    455482    QRegExp tmpUKCC = m_ukCC;
    456     if ((position = tmpUKCC.search(event.description)) != -1)
     483    if ((position1 = tmpUKCC.search(event.description)) != -1)
    457484    {
    458485        // Enumerate throught and see if we have subtitles, don't modify
    459486        // the description as we might destroy other useful information
     
    464491                event.flags |= DBEvent::kSubtitled;
    465492        }
    466493    }
    467     else if ((position = tmpUKCC.search(event.subtitle)) != -1)
     494    else if ((position1 = tmpUKCC.search(event.subtitle)) != -1)
    468495    {
    469496        captures = tmpUKCC.capturedTexts();
    470497        for (it = captures.begin(); it != captures.end(); ++it)
     
    475502
    476503        // We remove [AD,S] from the subtitle.
    477504        QString stmp = event.subtitle;
    478         int     itmp = position + tmpUKCC.cap(0).length();
    479         event.subtitle = stmp.left(position) + stmp.mid(itmp);
     505        int     itmp = position1 + tmpUKCC.cap(0).length();
     506        event.subtitle = stmp.left(position1) + stmp.mid(itmp);
    480507    }
    481508
    482509    // Work out the year (if any)
    483510    QRegExp tmpUKYear = m_ukYear;
    484     if ((position = tmpUKYear.search(event.description)) != -1)
     511    if ((position1 = tmpUKYear.search(event.description)) != -1)
    485512    {
    486513        QString stmp = event.description;
    487         int     itmp = position + tmpUKYear.cap(0).length();
    488         event.description = stmp.left(position) + stmp.mid(itmp);
     514        int     itmp = position1 + tmpUKYear.cap(0).length();
     515        event.description = stmp.left(position1) + stmp.mid(itmp);
    489516        event.airdate = tmpUKYear.cap(1);
    490517        bool ok;
    491518        uint y = tmpUKYear.cap(1).toUInt(&ok);
    492519        if (ok)
    493520            event.originalairdate = QDate(y, 1, 1);
    494521    }
     522
     523    // Remove spurious channel names
     524    event.subtitle = event.subtitle.remove(m_ukCBBC);
     525    event.description = event.description.remove(m_ukCBBC);
     526    event.subtitle = event.subtitle.remove(m_ukCBeebies);
     527    event.description = event.description.remove(m_ukCBeebies);
     528
     529    // Trim trailing '.'
     530    event.title.remove(m_ukPEnd);
     531    event.subtitle.remove(m_ukPEnd);
     532
     533    if (event.subtitle.isEmpty() && !event.category.startsWith("Movie",false))
     534    {
     535       strList1 = QStringList::split(".",event.description);
     536       QStringList strList2 = QStringList::split("?",event.description);
     537       if (strList1.count()>1 || strList2.count()>1)
     538       {
     539           QString strEnd;
     540           if (strList1[0].length() > strList2[0].length())
     541           {
     542               strList1=strList2;
     543               strEnd="?";
     544           }
     545           strList2 = QStringList::split(" ",strList1[0]);
     546           if (strList2.count() < kDotToTitle)
     547           {
     548               QStringList strList3 = strList2.grep("Drama",false);
     549               QStringList strList4 = strList2.grep("sitcom",false);
     550               QStringList strList5 = strList2.grep(m_ukStarring1);
     551               if ((strList3.count()==0) && (strList4.count()==0) &&
     552                   (strList5.count()==0))
     553               {
     554                    event.subtitle = strList1[0]+strEnd;
     555                    event.description=
     556                                 event.description.mid(strList1[0].length()+1);
     557                                 
     558               }
     559           }
     560       }
     561    }
    495562}
    496563
    497564/** \fn EITFixUp::FixPBS(DBEvent&) const