Ticket #4327: eitfixup2.patch

File eitfixup2.patch, 17.9 KB (added by john {at} pullan {dot} org, 16 years ago)

Update of the patch

  • libs/libmythtv/eitfixup.h

     
    1414/// EIT Fix Up Functions
    1515class EITFixUp
    1616{
     17  protected:
     18     // max length of subtitle field in db.
     19     static const uint SUBTITLE_MAX_LEN = 128;
     20     // max number of words up to a period, question mark
     21     static const uint kDotToTitle = 8;
     22
    1723  public:
    1824    enum FixUpType
    1925    {
     
    5460
    5561  private:
    5662    void FixBellExpressVu(DBEvent &event) const; // Canada DVB-S
     63    void SetUKSubtitle(DBEvent &event) const;
    5764    void FixUK(DBEvent &event) const;            // UK DVB-T
    5865    void FixPBS(DBEvent &event) const;           // USA ATSC
    5966    void FixComHem(DBEvent &event, bool parse_subtitle) const; // Sweden DVB-C
     
    7582    const QRegExp m_ukSubtitle;
    7683    const QRegExp m_ukThen;
    7784    const QRegExp m_ukNew;
     85    const QRegExp m_ukNew1;
    7886    const QRegExp m_ukT4;
    7987    const QRegExp m_ukEQ;
    8088    const QRegExp m_ukEPQ;
     89    const QRegExp m_ukColonHyphen;
    8190    const QRegExp m_ukPStart;
    8291    const QRegExp m_ukPEnd;
    8392    const QRegExp m_ukSeries1;
    8493    const QRegExp m_ukSeries2;
     94    const QRegExp m_ukSeries3;
    8595    const QRegExp m_ukCC;
    8696    const QRegExp m_ukYear;
    8797    const QRegExp m_uk24ep;
    8898    const QRegExp m_ukStarring;
     99    const QRegExp m_ukBBC7rpt;
     100    const QRegExp m_ukCBBC;
     101    const QRegExp m_ukCBeebies;
     102    const QRegExp m_ukStarring1;
     103    const QRegExp m_ukDoubleDotEnd;
     104    const QRegExp m_ukDoubleDotStart;
    89105    const QRegExp m_comHemCountry;
    90106    const QRegExp m_comHemDirector;
    91107    const QRegExp m_comHemActor;
  • libs/libmythtv/eitfixup.cpp

     
    1717      m_bellPPVDescriptionEventId("\\([0-9]{5}\\)"),
    1818      m_ukSubtitle("\\[.*S\\]"),
    1919      m_ukThen("\\s*(Then|Followed by) 60 Seconds\\.", false),
    20       m_ukNew("\\s*(Brand New|New) Series\\s*[:\\.\\-]"),
     20      m_ukNew("\\s*(Brand New|New)\\s*Series\\s*[:\\.\\-]",false),
     21      m_ukNew1("^New\\."),
    2122      m_ukT4("^[tT]4:"),
    22       m_ukEQ("[\\!\\?]"),
     23      m_ukEQ("[:\\!\\?]"),
    2324      m_ukEPQ("[:\\!\\.\\?]"),
     25      m_ukColonHyphen("[:-]"),
    2426      m_ukPStart("^\\.+"),
    2527      m_ukPEnd("\\.+$"),
    26       m_ukSeries1("^\\s*(\\d{1,2})/(\\d{1,2})\\."),
     28      m_ukSeries1("\\s*(\\d{1,2})/(\\d{1,2})"),
    2729      m_ukSeries2("\\((Part|Pt)\\s+(\\d{1,2})\\s+of\\s+(\\d{1,2})\\)", false),
     30      m_ukSeries3("\\s*Episode\\s+(\\d{1,2})\\s+of\\s+(\\d{1,2})\\.", false),
    2831      m_ukCC("\\[(AD)(,(S)){,1}(,SL){,1}\\]|\\[(S)(,AD){,1}(,SL){,1}\\]|"
    2932             "\\[(SL)(,AD){,1}(,(S)){,1}\\]"),
    3033      m_ukYear("[\\[\\(]([\\d]{4})[\\)\\]]"),
    3134      m_uk24ep("^\\d{1,2}:00[ap]m to \\d{1,2}:00[ap]m: "),
    3235      m_ukStarring("(?:Western\\s)?[Ss]tarring ([\\w\\s\\-']+)[Aa]nd\\s([\\w\\s\\-']+)[\\.|,](?:\\s)*(\\d{4})?(?:\\.\\s)?"),
     36      m_ukBBC7rpt("\\[Rptd?[^]]+\\d{1,2}\\.\\d{1,2}[ap]m\\]\\."),
     37      m_ukCBBC("^CBBC."),
     38      m_ukCBeebies("^CBeebies."),
     39      m_ukStarring1("star(ring)"),
     40      m_ukDoubleDotEnd("\\.\\.+$"),
     41      m_ukDoubleDotStart("^\\.\\.+"),
    3342      m_comHemCountry("^(\\(.+\\))?\\s?([^ ]+)\\s([^\\.0-9]+)"
    3443                      "(?:\\sfrån\\s([0-9]{4}))(?:\\smed\\s([^\\.]+))?\\.?"),
    3544      m_comHemDirector("[Rr]egi"),
     
    283292
    284293}
    285294
     295void EITFixUp::SetUKSubtitle(DBEvent &event) const
     296{
     297    QStringList strList1;
     298    int position1;
     299    int position2;
     300
     301    if ((position1 = event.description.find(m_ukEQ)) >= 0)
     302    {
     303        position2 = event.description.find(":",position1+1);
     304        if (position2 != -1)
     305        {
     306             strList1 = QStringList::split(" ",
     307                            event.description.mid(position1+1,
     308                            position2-position1+1));
     309             if (strList1.count() < kDotToTitle)
     310             {
     311                 event.subtitle = event.description.left(position2);
     312                 event.description = event.description.mid(position2 + 1);
     313             }
     314             else
     315             {
     316                 event.subtitle = event.description.left(position1);
     317                 event.description = event.description.mid(position1 + 1);
     318             }
     319        }
     320        else
     321        {
     322            strList1 = QStringList::split(" ",
     323                            event.description.left(position1));
     324            if (strList1.count() < kDotToTitle)
     325            {
     326                if ((uint)position1 < SUBTITLE_MAX_LEN)
     327                {
     328                     event.subtitle = event.description.left(position1);
     329                     event.description = event.description.mid(position1+1);
     330                }
     331            }
     332        }
     333    }
     334}
     335
    286336/** \fn EITFixUp::FixUK(DBEvent&) const
    287337 *  \brief Use this in the United Kingdom to standardize DVB-T guide.
    288338 */
    289339void EITFixUp::FixUK(DBEvent &event) const
    290340{
    291     const uint SUBTITLE_PCT = 30; //% of description to allow subtitle up to
    292     const uint SUBTITLE_MAX_LEN = 128; // max length of subtitle field in db.
    293     int position = event.description.find("New Series");
    294     if (position != -1)
    295     {
    296         // Do something here
    297     }
     341    int position1;
     342    int position2;
     343    QString strFull;
    298344
    299     position = event.description.find(m_ukSubtitle);
    300     if (position != -1)
     345    position1 = event.description.find(m_ukSubtitle);
     346    if (position1 != -1)
    301347    {
    302348        event.flags |= DBEvent::kSubtitled;
    303         event.description.replace(m_ukSubtitle, "");
     349        event.description.remove(m_ukSubtitle);
    304350    }
    305351
    306352    // BBC three case (could add another record here ?)
    307     event.description = event.description.replace(m_ukThen, "");
    308     event.description = event.description.replace(m_ukNew, "");
    309     event.title  = event.title.replace(m_ukT4, "");
     353    event.description = event.description.remove(m_ukThen);
     354    event.description = event.description.remove(m_ukNew);
    310355
    311     // First join up event data, that's spread across title/desc
    312     // at this point there is no subtitle.
    313     if (event.title.endsWith("...") ||
    314         event.description.startsWith("..") ||
    315         event.description.isEmpty())
    316     {
    317         // try and make the subtitle
    318         QString Full = event.title.replace(m_ukPEnd, "") + " " +
    319             event.description.replace(m_ukPStart, "");
     356    event.description = event.description.remove(m_ukNew1);
     357    event.title  = event.title.remove(m_ukT4);
    320358
    321         if ((position = Full.find(m_ukEPQ)) != -1)
    322         {
    323             if (Full[position] == '!' || Full[position] == '?' ||
    324                 Full[position] == '.')
    325                 position++;
    326             event.title = Full.left(position);
    327             event.description = Full.mid(position + 1);
    328         }
    329         if ((position = event.title.find(m_ukYear)) != -1)
    330         {
    331             // Looks like they are using the airdate as a delimiter
    332             event.description = event.title.mid(position);
    333             event.title = event.title.left(position);
    334         }
    335     }
     359    // BBC 7 [Rpt of ...] case.
     360    event.description = event.description.remove(m_ukBBC7rpt);
    336361
    337     // This is trying to catch the case where the subtitle is in the main title
    338     // but avoid cases where it isn't a subtitle e.g cd:uk
    339     if (((position = event.title.find(":")) != -1) &&
    340         (event.description.find(":") == -1) &&
    341         (event.title[position + 1].upper() == event.title[position + 1]))
    342     {
    343         event.subtitle = event.title.mid(position + 1);
    344         event.title = event.title.left(position);
    345     }
    346 
    347     // Special case for episodes of 24.
    348362    QRegExp tmp24ep = m_uk24ep;
    349     if ((position = tmp24ep.search(event.description)) != -1)
     363    if (!event.title.startsWith("CSI:") && !event.title.startsWith("CD:"))
    350364    {
    351         // -2 from the length cause we don't want ": " on the end
    352         event.subtitle = event.description.mid(position, tmp24ep.cap(0).length() - 2);
    353         event.description = event.description.replace(tmp24ep.cap(0),"");
    354     }
    355     else if ((position = event.description.find(":")) != -1)
    356     {
    357         // if the subtitle is less than 50% of the description use it.
    358         if (((uint)position < SUBTITLE_MAX_LEN) &&
    359             ((position*100)/event.description.length() < SUBTITLE_PCT))
     365        if (((position1=event.title.find(m_ukDoubleDotEnd)) >= 0) &&
     366            ((position2=event.description.find(m_ukDoubleDotStart)) >= 0))
    360367        {
    361             event.subtitle = event.description.left(position);
    362             event.description = event.description.mid(position + 1);
     368            strFull = event.title.remove(m_ukDoubleDotEnd)+" "+
     369                  event.description.remove(m_ukDoubleDotStart);
     370            if ((position1 = strFull.find(m_ukEPQ)) != -1)
     371            {
     372                 if (strFull[position1] == '!' || strFull[position1] == '?' ||
     373                     strFull[position1] == '.')
     374                     position1++;
     375                 event.title = strFull.left(position1);
     376                 event.description = strFull.mid(position1 + 1);
     377                 SetUKSubtitle(event);
     378            }
     379            else if ((position1 = strFull.find(m_ukYear)) >= 0)
     380            {
     381                // Looks like they are using the airdate as a delimiter
     382                if ((uint)position1 < SUBTITLE_MAX_LEN)
     383                {
     384                    event.description = event.title.mid(position1);
     385                    event.title = event.title.left(position1);
     386                }
     387            }
    363388        }
    364     }
    365     else if (!(((position = event.description.find(m_ukYear)) != -1) && (position < 3))
    366             && ((position = event.description.find(m_ukEPQ)) != -1))
    367     {
    368         // only move stuff into the subtitle if the airdate isn't at
    369         // the beginning of the description
    370         if (((uint)position < SUBTITLE_MAX_LEN) &&
    371             ((position*100)/event.description.length() < SUBTITLE_PCT))
     389        else if ((position1 = tmp24ep.search(event.description)) >= 0)
    372390        {
    373             event.subtitle = event.description.left(position + 1);
    374             event.description = event.description.mid(position + 2);
     391            // Special case for episodes of 24.
     392            // -2 from the length cause we don't want ": " on the end
     393            event.subtitle = event.description.mid(position1,
     394                                tmp24ep.cap(0).length() - 2);
     395            event.description = event.description.remove(tmp24ep.cap(0));
    375396        }
     397        else if (((position1 = event.title.find(m_ukColonHyphen)) >= 0) &&
     398            (event.description.find(":") < 0 ))
     399        {
     400            if ((uint)position1 < SUBTITLE_MAX_LEN)
     401            {
     402                event.subtitle = event.title.mid(position1 + 1);
     403                event.title = event.title.left(position1);
     404            }
     405        }
     406        else
     407            SetUKSubtitle(event);
    376408    }
    377409
    378410    QRegExp tmpStarring = m_ukStarring;
    379     if (tmpStarring.search(event.subtitle) != -1)
     411    if (tmpStarring.search(event.subtitle) >= 0)
    380412    {
    381413        // If the "Starring..." string got promoted to subtitle move it back.
    382414        event.description.prepend(". ");
    383415        event.description.prepend(tmpStarring.cap(0));
    384         event.subtitle.replace(tmpStarring.cap(0), "");
     416        event.subtitle.remove(tmpStarring.cap(0));
    385417    }
    386418    tmpStarring = m_ukStarring;
    387     if (tmpStarring.search(event.description) != -1)
     419    if (tmpStarring.search(event.description) >= 0)
    388420    {
    389421        // if we match this we've captured 2 actors and an (optional) airdate
    390422        event.AddPerson(DBPerson::kActor, tmpStarring.cap(1));
     
    398430                event.originalairdate = QDate(y, 1, 1);
    399431        }
    400432    }
    401     // Trim trailing '.'
    402     event.title.replace(m_ukPEnd, "");
    403     event.subtitle.replace(m_ukPEnd, "");
    404433
    405434    // Work out the episode numbers (if any)
    406435    bool    series  = false;
    407436    QRegExp tmpExp1 = m_ukSeries1;
    408437    QRegExp tmpExp2 = m_ukSeries2;
    409     if ((position = tmpExp1.search(event.title)) != -1)
     438    QRegExp tmpExp3 = m_ukSeries3;
     439    if ((position1 = tmpExp1.search(event.title)) >= 0)
    410440    {
    411441        event.partnumber = tmpExp1.cap(1).toUInt();
    412442        event.parttotal  = tmpExp1.cap(2).toUInt();
    413443        // Remove from the title
    414444        event.title =
    415             event.title.mid(position + tmpExp1.cap(0).length());
    416         // but add it to the description
    417         event.description += tmpExp1.cap(0);
     445            event.title.mid(position1 + tmpExp1.cap(0).length());
    418446        series = true;
    419447    }
    420     else if ((position = tmpExp1.search(event.subtitle)) != -1)
     448    else if ((position1 = tmpExp1.search(event.subtitle)) >= 0)
    421449    {
    422450        event.partnumber = tmpExp1.cap(1).toUInt();
    423451        event.parttotal  = tmpExp1.cap(2).toUInt();
    424452        // Remove from the sub title
    425453        event.subtitle =
    426             event.subtitle.mid(position + tmpExp1.cap(0).length());
    427         // but add it to the description
    428         event.description += tmpExp1.cap(0);
     454            event.subtitle.mid(position1 + tmpExp1.cap(0).length());
    429455        series = true;
    430456    }
    431     else if ((position = tmpExp1.search(event.description)) != -1)
     457    else if ((position1 = tmpExp1.search(event.description)) >= 0)
    432458    {
    433459        event.partnumber = tmpExp1.cap(1).toUInt();
    434460        event.parttotal  = tmpExp1.cap(2).toUInt();
    435         // Don't cut it from the description
    436         //event.description = event.description.left(position) +
    437         //    event.description.mid(position + tmpExp1.cap(0).length());
     461        event.description = event.description.left(position1) +
     462            event.description.mid(position1 + tmpExp1.cap(0).length());
    438463        series = true;
    439464    }
    440     else if ((position = tmpExp2.search(event.description)) != -1)
     465    else if ((position1 = tmpExp2.search(event.description)) >= 0)
    441466    {
    442467        event.partnumber = tmpExp2.cap(2).toUInt();
    443468        event.parttotal  = tmpExp2.cap(3).toUInt();
    444         // Don't cut it from the description
    445         //event.description = event.description.left(position) +
    446         //    event.description.mid(position + tmpExp2.cap(0).length());
     469        event.description = event.description.left(position1) +
     470            event.description.mid(position1 + tmpExp2.cap(0).length());
    447471        series = true;
    448472    }
     473    else if ((position1 = tmpExp3.search(event.description)) >= 0)
     474    {
     475        event.partnumber = tmpExp3.cap(1).toUInt();
     476        event.parttotal  = tmpExp3.cap(2).toUInt();
     477        event.description = event.description.left(position1) +
     478            event.description.mid(position1 + tmpExp3.cap(0).length());
     479        series = true;
     480    }
    449481    if (series)
    450482        event.category_type = kCategorySeries;
    451483
     
    453485    QStringList captures;
    454486    QStringList::const_iterator it;
    455487    QRegExp tmpUKCC = m_ukCC;
    456     if ((position = tmpUKCC.search(event.description)) != -1)
     488    if ((position1 = tmpUKCC.search(event.description)) >= 0)
    457489    {
    458490        // Enumerate throught and see if we have subtitles, don't modify
    459491        // the description as we might destroy other useful information
     
    464496                event.flags |= DBEvent::kSubtitled;
    465497        }
    466498    }
    467     else if ((position = tmpUKCC.search(event.subtitle)) != -1)
     499    else if ((position1 = tmpUKCC.search(event.subtitle)) >= 0)
    468500    {
    469501        captures = tmpUKCC.capturedTexts();
    470502        for (it = captures.begin(); it != captures.end(); ++it)
     
    475507
    476508        // We remove [AD,S] from the subtitle.
    477509        QString stmp = event.subtitle;
    478         int     itmp = position + tmpUKCC.cap(0).length();
    479         event.subtitle = stmp.left(position) + stmp.mid(itmp);
     510        int     itmp = position1 + tmpUKCC.cap(0).length();
     511        event.subtitle = stmp.left(position1) + stmp.mid(itmp);
    480512    }
    481513
    482514    // Work out the year (if any)
    483515    QRegExp tmpUKYear = m_ukYear;
    484     if ((position = tmpUKYear.search(event.description)) != -1)
     516    if ((position1 = tmpUKYear.search(event.description)) >= 0)
    485517    {
    486518        QString stmp = event.description;
    487         int     itmp = position + tmpUKYear.cap(0).length();
    488         event.description = stmp.left(position) + stmp.mid(itmp);
     519        int     itmp = position1 + tmpUKYear.cap(0).length();
     520        event.description = stmp.left(position1) + stmp.mid(itmp);
    489521        event.airdate = tmpUKYear.cap(1);
    490522        bool ok;
    491523        uint y = tmpUKYear.cap(1).toUInt(&ok);
    492524        if (ok)
    493525            event.originalairdate = QDate(y, 1, 1);
    494526    }
     527
     528    // Remove spurious channel names
     529    event.subtitle = event.subtitle.remove(m_ukCBBC);
     530    event.description = event.description.remove(m_ukCBBC);
     531    event.subtitle = event.subtitle.remove(m_ukCBeebies);
     532    event.description = event.description.remove(m_ukCBeebies);
     533
     534    // Trim trailing '.'
     535    event.title.remove(m_ukPEnd);
     536    event.subtitle.remove(m_ukPEnd);
     537
     538    if (event.subtitle.isEmpty() && !event.category.startsWith("Movie",false))
     539    {
     540       QStringList strList1 = QStringList::split(".",event.description,TRUE);
     541       QStringList strList2 = QStringList::split("?",event.description,TRUE);
     542       if (strList1.count()>1 || strList2.count()>1)
     543       {
     544           QString strEnd;
     545           if (strList1[0].length() > strList2[0].length())
     546           {
     547               strList1=strList2;
     548               strEnd="?";
     549           }
     550           strList2 = QStringList::split(" ",strList1[0]);
     551           if (strList2.count() < kDotToTitle)
     552           {
     553               QStringList strList3 = strList2.grep("Drama",false);
     554               QStringList strList4 = strList2.grep("sitcom",false);
     555               QStringList strList5 = strList2.grep(m_ukStarring1);
     556               if ((strList3.count()==0) && (strList4.count()==0) &&
     557                   (strList5.count()==0))
     558               {
     559                    event.subtitle = strList1[0]+strEnd;
     560                    event.description=
     561                                 event.description.mid(strList1[0].length()+1);
     562                                 
     563               }
     564           }
     565       }
     566    }
    495567}
    496568
    497569/** \fn EITFixUp::FixPBS(DBEvent&) const