Ticket #5191: eitfixup4.patch
File eitfixup4.patch, 10.3 KB (added by , 16 years ago) |
---|
-
libs/libmythtv/eitfixup.cpp
1 /* vim: set expandtab tabstop=4 shiftwidth=4: */ 1 2 // C++ headers 2 3 #include <algorithm> 3 4 … … 25 26 m_ukNew("(New\\.|\\s*(Brand New|New)\\s*(Series|Episode)\\s*[:\\.\\-])",false), 26 27 m_ukCEPQ("[:\\!\\.\\?]"), 27 28 m_ukColonPeriod("[:\\.]"), 28 m_ukDotSpaceStart("^ \\."),29 m_ukDotSpaceStart("^(?:\\. | )"), 29 30 m_ukDotEnd("\\.$"), 30 31 m_ukSpaceColonStart("^[ |:]*"), 31 32 m_ukSpaceStart("^ "), … … 36 37 m_uk24ep("^\\d{1,2}:00[ap]m to \\d{1,2}:00[ap]m: "), 37 38 m_ukStarring("(?:Western\\s)?[Ss]tarring ([\\w\\s\\-']+)[Aa]nd\\s([\\w\\s\\-']+)[\\.|,](?:\\s)*(\\d{4})?(?:\\.\\s)?"), 38 39 m_ukBBC7rpt("\\[Rptd?[^]]+\\d{1,2}\\.\\d{1,2}[ap]m\\]\\."), 39 m_ukDescriptionRemove("^(?:CBBC\\s*\\.|CBeebies\\s*\\.|Class TV\\s*:|BBC Switch \\.)"),40 m_ukDescriptionRemove("^(?:CBBC\\s*\\.|CBeebies\\s*\\.|Class TV\\s*:|BBC Switch(?:\\.|:))"), 40 41 m_ukTitleRemove("^(?:[tT]4:|Schools\\s*:)"), 41 42 m_ukDoubleDotEnd("\\.\\.+$"), 42 43 m_ukDoubleDotStart("^\\.\\.+"), 43 44 m_ukTime("\\d{1,2}[\\.:]\\d{1,2}\\s*(am|pm|)"), 44 45 m_ukBBC34("BBC (?:THREE|FOUR) on BBC (?:ONE|TWO)\\.",false), 45 46 m_ukYearColon("^[\\d]{4}:"), 46 m_ukExclusionFromSubtitle("(starring|stars \\s|drama|series|sitcom)",false),47 m_ukExclusionFromSubtitle("(starring|stars|drama|series|sitcom|serial|^crime)",false), 47 48 m_ukCompleteDots("^\\.\\.+$"), 49 m_uk5xNumberHyphen3xNumber("\\d\\d\\d\\d\\d-\\d\\d\\d [A-Z]"), 50 m_ukTerminus("\\d"), 48 51 m_comHemCountry("^(\\(.+\\))?\\s?([^ ]+)\\s([^\\.0-9]+)" 49 52 "(?:\\sfrån\\s([0-9]{4}))(?:\\smed\\s([^\\.]+))?\\.?"), 50 53 m_comHemDirector("[Rr]egi"), … … 305 308 */ 306 309 void EITFixUp::SetUKSubtitle(DBEvent &event) const 307 310 { 308 QStringList strListColon = event.description.split(":"); 311 if (!event.subtitle.isEmpty()) 312 return; 313 314 QStringList strListColon = QStringList::split(":",event.description,TRUE); 309 315 QStringList strListEnd; 310 316 311 317 bool fColon = false; … … 329 335 { 330 336 QString strTmp = event.description.mid(nPosition1+1, 331 337 nLength-nPosition1); 332 333 QStringList tmp = strTmp.split(" "); 334 if (((uint) tmp.size()) < kMaxDotToColon) 338 QStringList tmp = strTmp.split(" ",QString::SkipEmptyParts); 339 if (((uint) tmp.size()) <= kMaxDotToColon) 335 340 fSingleDot = false; 336 341 } 337 342 … … 342 347 } 343 348 else if (!fSingleDot) 344 349 { 345 QStringList strListTmp;346 uint nTitle=0;347 int n TitleMax=-1;350 int nCount=strListColon.count(); 351 int nMaxLength=0; 352 int nMaxTitle=0; 348 353 int i; 349 for (i =0; (i<(int)strListColon.count()) && (nTitleMax==-1);i++)354 for (i=0;i<(nCount-1);i++) 350 355 { 351 const QStringList tmp = strListColon[i].split(" "); 352 353 nTitle += tmp.size(); 354 355 if (nTitle < kMaxToTitle) 356 strListTmp.push_back(strListColon[i]); 357 else 358 nTitleMax=i; 356 QString strTmp = strListColon[i+1].stripWhiteSpace(); 357 QChar aLetter = strTmp.at(0); 358 QChar bLetter = aLetter.lower(); 359 uint nTmp= 360 QStringList::split(" ",strListColon[i],FALSE).count(); 361 if ((nMaxLength+nTmp > kMaxToTitle) || (aLetter==bLetter)) 362 break; 363 nMaxLength = nMaxLength+nTmp; 364 nMaxTitle = i; 359 365 } 360 QString strPartial; 361 for (i=0;i<(nTitleMax-1);i++) 362 strPartial+=strListTmp[i]+":"; 363 if (nTitleMax>0) 366 if (nMaxLength) 364 367 { 365 strPartial+=strListTmp[nTitleMax-1]; 368 QString strPartial; 369 for (i=0;i<=nMaxTitle;i++) 370 { 371 strPartial+=strListColon[i]; 372 if (i<(nMaxTitle)) 373 strPartial+=":"; 374 } 366 375 strListEnd.push_back(strPartial); 367 }368 for (i=nTitleMax+1;i<(int)strListColon.count();i++)369 strListEnd.push_back(strListColon[i]);370 fColon = true;376 for (i=(nMaxTitle+1);i<nCount;i++) 377 strListEnd.push_back(strListColon[i]); 378 fColon=true; 379 } 371 380 } 372 381 } 382 373 383 QStringList strListPeriod; 374 384 QStringList strListQuestion; 375 385 QStringList strListExcl; 376 386 if (!fColon) 377 387 { 378 strListPeriod = event.description.split("."); 388 int nLength=INT_MAX; 389 strListPeriod = QStringList::split(".",event.description,TRUE); 390 strListQuestion = QStringList::split("?",event.description,TRUE); 391 strListExcl = QStringList::split("!",event.description,TRUE); 392 379 393 if (strListPeriod.count() >1) 380 394 { 381 395 nPosition1 = event.description.find("."); 382 396 int nPosition2 = event.description.find(".."); 383 397 if ((nPosition1 < nPosition2) || (nPosition2==-1)) 398 { 399 nLength = strListPeriod[0].length(); 384 400 strListEnd = strListPeriod; 401 } 385 402 } 386 403 387 strListQuestion = event.description.split("?"); 388 strListExcl = event.description.split("!"); 389 if ((strListQuestion.size() > 1) && 390 ((uint)strListQuestion.size() <= kMaxQuestionExclamation)) 404 if ((strListQuestion.size()>1) && 405 ((uint)strListQuestion.size()<=kMaxQuestionExclamation) && 406 ((int)(strListQuestion[0].length())<nLength)) 391 407 { 392 408 strListEnd = strListQuestion; 393 409 strEnd = "?"; 394 410 } 395 else if ((strListExcl.size() > 1) && 396 ((uint)strListExcl.size() <= kMaxQuestionExclamation)) 411 else if ((strListExcl.size()>1) && 412 ((uint)strListExcl.size()<=kMaxQuestionExclamation) && 413 ((int)(strListExcl[0].length())<nLength)) 397 414 { 398 415 strListEnd = strListExcl; 399 416 strEnd = "!"; 400 417 } 401 418 else 402 strEnd = QString::null;419 strEnd = ""; 403 420 } 404 421 405 422 if (!strListEnd.empty()) 406 423 { 407 424 QStringList strListSpace = strListEnd[0].split( 408 425 " ", QString::SkipEmptyParts); 409 if ( fColon && ((uint)strListSpace.size() > kMaxToTitle))426 if (strListSpace.count() > (int)kMaxToTitle) 410 427 return; 411 if ((uint)strListSpace.size() > kDotToTitle)412 return;413 428 if (strListSpace.grep(m_ukExclusionFromSubtitle).empty()) 414 429 { 415 430 event.subtitle = strListEnd[0]+strEnd; … … 501 516 } 502 517 503 518 QRegExp tmp24ep = m_uk24ep; 504 if (!event.title.startsWith("CSI:") && !event.title.startsWith("CD:")) 519 if (!event.title.startsWith("CSI:") && 520 !event.title.startsWith("Law & Order:") && 521 !event.title.startsWith("CD:")) 505 522 { 506 523 if (((position1=event.title.find(m_ukDoubleDotEnd)) != -1) && 507 524 ((position2=event.description.find(m_ukDoubleDotStart)) != -1)) … … 558 575 event.title.setLength(position1); 559 576 event.subtitle = strTmp+event.subtitle; 560 577 } 561 else if ((uint)position1 < SUBTITLE_MAX_LEN) 578 else if ( 579 (event.title.mid(position1+1).find(m_ukTerminus)!=0) && 580 ((uint)position1 < SUBTITLE_MAX_LEN)) 562 581 { 563 582 event.subtitle = event.title.mid(position1 + 1); 564 583 event.title = event.title.left(position1); … … 589 608 } 590 609 else 591 610 SetUKSubtitle(event); 611 612 if (event.subtitle.isEmpty() && 613 (position1=event.description.find(m_uk5xNumberHyphen3xNumber)) && 614 (position1 >0) && ((uint)position1<SUBTITLE_MAX_LEN)) 615 { 616 event.subtitle = event.description.left(position1+9); 617 event.description = event.description.mid(position1+9); 618 } 592 619 } 593 620 594 621 // Work out the year (if any) … … 606 633 } 607 634 608 635 // Trim leading/trailing '.' 636 event.description.remove(m_ukDotSpaceStart); 609 637 event.subtitle.remove(m_ukDotSpaceStart); 610 638 if (event.subtitle.findRev("..") != (((int)event.subtitle.length())-2)) 611 639 event.subtitle.remove(m_ukDotEnd); 612 640 641 // Demote the subtitle if it matches the title 642 if (!event.title.isEmpty() && !event.subtitle.isEmpty() && 643 event.title == event.subtitle) 644 { 645 event.description=event.subtitle+" "+event.description; 646 event.subtitle=QString::null; 647 } 648 613 649 // Reverse the subtitle and empty description 614 650 if (event.description.isEmpty() && !event.subtitle.isEmpty()) 615 651 { -
libs/libmythtv/eitfixup.h
1 1 /* 2 * vim: set expandtab tabstop=4 shiftwidth=4: 2 3 * Copyright 2004 - Taylor Jacob (rtjacob at earthlink.net) 3 4 */ 4 5 … … 18 19 // max length of subtitle field in db. 19 20 static const uint SUBTITLE_MAX_LEN = 128; 20 21 // max number of words included in a subtitle 21 static const uint kMaxToTitle = 14; 22 // max number of words up to a period, question mark 23 static const uint kDotToTitle = 9; 22 static const uint kMaxToTitle = 10; 24 23 // max number of question/exclamation marks 25 24 static const uint kMaxQuestionExclamation = 2; 26 25 // max number of difference in words between a period and a colon 27 static const uint kMaxDotToColon = 5;26 static const uint kMaxDotToColon = 6; 28 27 29 28 public: 30 29 enum FixUpType … … 108 107 const QRegExp m_ukYearColon; 109 108 const QRegExp m_ukExclusionFromSubtitle; 110 109 const QRegExp m_ukCompleteDots; 110 const QRegExp m_uk5xNumberHyphen3xNumber; 111 const QRegExp m_ukTerminus; 111 112 const QRegExp m_comHemCountry; 112 113 const QRegExp m_comHemDirector; 113 114 const QRegExp m_comHemActor;