Ticket #5191: eitfixup3.patch
File eitfixup3.patch, 9.7 KB (added by , 16 years ago) |
---|
-
libs/libmythtv/eitfixup.cpp
1 /* vim: set expandtab tabstop=4 shiftwidth=4: */ 1 2 // C++ headers 2 3 #include <algorithm> 3 4 … … 25 26 m_ukNew("(New\\.|\\s*(Brand New|New)\\s*(Series|Episode)\\s*[:\\.\\-])",false), 26 27 m_ukCEPQ("[:\\!\\.\\?]"), 27 28 m_ukColonPeriod("[:\\.]"), 28 m_ukDotSpaceStart("^ \\."),29 m_ukDotSpaceStart("^(?:\\. | )"), 29 30 m_ukDotEnd("\\.$"), 30 31 m_ukSpaceColonStart("^[ |:]*"), 31 32 m_ukSpaceStart("^ "), … … 36 37 m_uk24ep("^\\d{1,2}:00[ap]m to \\d{1,2}:00[ap]m: "), 37 38 m_ukStarring("(?:Western\\s)?[Ss]tarring ([\\w\\s\\-']+)[Aa]nd\\s([\\w\\s\\-']+)[\\.|,](?:\\s)*(\\d{4})?(?:\\.\\s)?"), 38 39 m_ukBBC7rpt("\\[Rptd?[^]]+\\d{1,2}\\.\\d{1,2}[ap]m\\]\\."), 39 m_ukDescriptionRemove("^(?:CBBC\\s*\\.|CBeebies\\s*\\.|Class TV\\s*:|BBC Switch \\.)"),40 m_ukDescriptionRemove("^(?:CBBC\\s*\\.|CBeebies\\s*\\.|Class TV\\s*:|BBC Switch(?:\\.|:))"), 40 41 m_ukTitleRemove("^(?:[tT]4:|Schools\\s*:)"), 41 42 m_ukDoubleDotEnd("\\.\\.+$"), 42 43 m_ukDoubleDotStart("^\\.\\.+"), 43 44 m_ukTime("\\d{1,2}[\\.:]\\d{1,2}\\s*(am|pm|)"), 44 45 m_ukBBC34("BBC (?:THREE|FOUR) on BBC (?:ONE|TWO)\\.",false), 45 46 m_ukYearColon("^[\\d]{4}:"), 46 m_ukExclusionFromSubtitle("(starring|stars \\s|drama|series|sitcom)",false),47 m_ukExclusionFromSubtitle("(starring|stars|drama|series|sitcom|serial|^crime)",false), 47 48 m_ukCompleteDots("^\\.\\.+$"), 49 m_uk5xNumberHyphen3xNumber("\\d\\d\\d\\d\\d-\\d\\d\\d [A-Z]"), 48 50 m_comHemCountry("^(\\(.+\\))?\\s?([^ ]+)\\s([^\\.0-9]+)" 49 51 "(?:\\sfrån\\s([0-9]{4}))(?:\\smed\\s([^\\.]+))?\\.?"), 50 52 m_comHemDirector("[Rr]egi"), … … 305 307 */ 306 308 void EITFixUp::SetUKSubtitle(DBEvent &event) const 307 309 { 308 QStringList strListColon = event.description.split(":"); 310 if (!event.subtitle.isEmpty()) 311 return; 312 313 QStringList strListColon = QStringList::split(":",event.description,TRUE); 309 314 QStringList strListEnd; 310 315 311 316 bool fColon = false; … … 329 334 { 330 335 QString strTmp = event.description.mid(nPosition1+1, 331 336 nLength-nPosition1); 332 333 QStringList tmp = strTmp.split(" "); 334 if (((uint) tmp.size()) < kMaxDotToColon) 337 QStringList tmp = strTmp.split(" ",QString::SkipEmptyParts); 338 if (((uint) tmp.size()) <= kMaxDotToColon) 335 339 fSingleDot = false; 336 340 } 337 341 … … 342 346 } 343 347 else if (!fSingleDot) 344 348 { 345 QStringList strListTmp;346 uint nTitle=0;347 int n TitleMax=-1;349 int nCount=strListColon.count(); 350 int nMaxLength=0; 351 int nMaxTitle=0; 348 352 int i; 349 for (i =0; (i<(int)strListColon.count()) && (nTitleMax==-1);i++)353 for (i=0;i<(nCount-1);i++) 350 354 { 351 const QStringList tmp = strListColon[i].split(" "); 352 353 nTitle += tmp.size(); 354 355 if (nTitle < kMaxToTitle) 356 strListTmp.push_back(strListColon[i]); 357 else 358 nTitleMax=i; 355 QString strTmp = strListColon[i+1].stripWhiteSpace(); 356 QChar aLetter = strTmp.at(0); 357 QChar bLetter = aLetter.lower(); 358 uint nTmp= 359 QStringList::split(" ",strListColon[i],FALSE).count(); 360 if ((nMaxLength+nTmp > kMaxToTitle) || (aLetter==bLetter)) 361 break; 362 nMaxLength = nMaxLength+nTmp; 363 nMaxTitle = i; 359 364 } 360 QString strPartial; 361 for (i=0;i<(nTitleMax-1);i++) 362 strPartial+=strListTmp[i]+":"; 363 if (nTitleMax>0) 365 if (nMaxLength) 364 366 { 365 strPartial+=strListTmp[nTitleMax-1]; 367 QString strPartial; 368 for (i=0;i<=nMaxTitle;i++) 369 { 370 strPartial+=strListColon[i]; 371 if (i<(nMaxTitle)) 372 strPartial+=":"; 373 } 366 374 strListEnd.push_back(strPartial); 367 }368 for (i=nTitleMax+1;i<(int)strListColon.count();i++)369 strListEnd.push_back(strListColon[i]);370 fColon = true;375 for (i=(nMaxTitle+1);i<nCount;i++) 376 strListEnd.push_back(strListColon[i]); 377 fColon=true; 378 } 371 379 } 372 380 } 381 373 382 QStringList strListPeriod; 374 383 QStringList strListQuestion; 375 384 QStringList strListExcl; 376 385 if (!fColon) 377 386 { 378 strListPeriod = event.description.split("."); 387 int nLength=INT_MAX; 388 strListPeriod = QStringList::split(".",event.description,TRUE); 389 strListQuestion = QStringList::split("?",event.description,TRUE); 390 strListExcl = QStringList::split("!",event.description,TRUE); 391 379 392 if (strListPeriod.count() >1) 380 393 { 381 394 nPosition1 = event.description.find("."); 382 395 int nPosition2 = event.description.find(".."); 383 396 if ((nPosition1 < nPosition2) || (nPosition2==-1)) 397 { 398 nLength = strListPeriod[0].length(); 384 399 strListEnd = strListPeriod; 400 } 385 401 } 386 402 387 strListQuestion = event.description.split("?"); 388 strListExcl = event.description.split("!"); 389 if ((strListQuestion.size() > 1) && 390 ((uint)strListQuestion.size() <= kMaxQuestionExclamation)) 403 if ((strListQuestion.size()>1) && 404 ((uint)strListQuestion.size()<=kMaxQuestionExclamation) && 405 ((int)(strListQuestion[0].length())<nLength)) 391 406 { 392 407 strListEnd = strListQuestion; 393 408 strEnd = "?"; 394 409 } 395 else if ((strListExcl.size() > 1) && 396 ((uint)strListExcl.size() <= kMaxQuestionExclamation)) 410 else if ((strListExcl.size()>1) && 411 ((uint)strListExcl.size()<=kMaxQuestionExclamation) && 412 ((int)(strListExcl[0].length())<nLength)) 397 413 { 398 414 strListEnd = strListExcl; 399 415 strEnd = "!"; 400 416 } 401 417 else 402 strEnd = QString::null;418 strEnd = ""; 403 419 } 404 420 405 421 if (!strListEnd.empty()) 406 422 { 407 423 QStringList strListSpace = strListEnd[0].split( 408 424 " ", QString::SkipEmptyParts); 409 if ( fColon && ((uint)strListSpace.size() > kMaxToTitle))425 if (strListSpace.count() > kMaxToTitle) 410 426 return; 411 if ((uint)strListSpace.size() > kDotToTitle)412 return;413 427 if (strListSpace.grep(m_ukExclusionFromSubtitle).empty()) 414 428 { 415 429 event.subtitle = strListEnd[0]+strEnd; … … 501 515 } 502 516 503 517 QRegExp tmp24ep = m_uk24ep; 504 if (!event.title.startsWith("CSI:") && !event.title.startsWith("CD:")) 518 if (!event.title.startsWith("CSI:") && 519 !event.title.startsWith("Law & Order:") && 520 !event.title.startsWith("CD:")) 505 521 { 506 522 if (((position1=event.title.find(m_ukDoubleDotEnd)) != -1) && 507 523 ((position2=event.description.find(m_ukDoubleDotStart)) != -1)) … … 589 605 } 590 606 else 591 607 SetUKSubtitle(event); 608 609 if (event.subtitle.isEmpty() && 610 (position1=event.description.find(m_uk5xNumberHyphen3xNumber)) && 611 (position1 >0) && ((uint)position1<SUBTITLE_MAX_LEN)) 612 { 613 event.subtitle = event.description.left(position1+9); 614 event.description = event.description.mid(position1+9); 615 } 592 616 } 593 617 594 618 // Work out the year (if any) … … 606 630 } 607 631 608 632 // Trim leading/trailing '.' 633 event.description.remove(m_ukDotSpaceStart); 609 634 event.subtitle.remove(m_ukDotSpaceStart); 610 635 if (event.subtitle.findRev("..") != (((int)event.subtitle.length())-2)) 611 636 event.subtitle.remove(m_ukDotEnd); 612 637 638 // Demote the subtitle if it matches the title 639 if (!event.title.isEmpty() && !event.subtitle.isEmpty() && 640 event.title == event.subtitle) 641 { 642 event.description=event.subtitle+" "+event.description; 643 event.subtitle=QString::null; 644 } 645 613 646 // Reverse the subtitle and empty description 614 647 if (event.description.isEmpty() && !event.subtitle.isEmpty()) 615 648 { -
libs/libmythtv/eitfixup.h
1 1 /* 2 * vim: set expandtab tabstop=4 shiftwidth=4: 2 3 * Copyright 2004 - Taylor Jacob (rtjacob at earthlink.net) 3 4 */ 4 5 … … 18 19 // max length of subtitle field in db. 19 20 static const uint SUBTITLE_MAX_LEN = 128; 20 21 // max number of words included in a subtitle 21 static const uint kMaxToTitle = 14; 22 // max number of words up to a period, question mark 23 static const uint kDotToTitle = 9; 22 static const uint kMaxToTitle = 10; 24 23 // max number of question/exclamation marks 25 24 static const uint kMaxQuestionExclamation = 2; 26 25 // max number of difference in words between a period and a colon 27 static const uint kMaxDotToColon = 5;26 static const uint kMaxDotToColon = 6; 28 27 29 28 public: 30 29 enum FixUpType … … 108 107 const QRegExp m_ukYearColon; 109 108 const QRegExp m_ukExclusionFromSubtitle; 110 109 const QRegExp m_ukCompleteDots; 110 const QRegExp m_uk5xNumberHyphen3xNumber; 111 111 const QRegExp m_comHemCountry; 112 112 const QRegExp m_comHemDirector; 113 113 const QRegExp m_comHemActor;