Ticket #4327: eitfixup1.patch
File eitfixup1.patch, 17.6 KB (added by , 16 years ago) |
---|
-
libs/libmythtv/eitfixup.h
14 14 /// EIT Fix Up Functions 15 15 class EITFixUp 16 16 { 17 protected: 18 // max length of subtitle field in db. 19 static const uint SUBTITLE_MAX_LEN = 128; 20 // max number of words up to a period, question mark 21 static const uint kDotToTitle = 6; 22 17 23 public: 18 24 enum FixUpType 19 25 { … … 54 60 55 61 private: 56 62 void FixBellExpressVu(DBEvent &event) const; // Canada DVB-S 63 void SetUKSubtitle(DBEvent &event) const; 57 64 void FixUK(DBEvent &event) const; // UK DVB-T 58 65 void FixPBS(DBEvent &event) const; // USA ATSC 59 66 void FixComHem(DBEvent &event, bool parse_subtitle) const; // Sweden DVB-C … … 75 82 const QRegExp m_ukSubtitle; 76 83 const QRegExp m_ukThen; 77 84 const QRegExp m_ukNew; 85 const QRegExp m_ukNew1; 78 86 const QRegExp m_ukT4; 79 87 const QRegExp m_ukEQ; 80 88 const QRegExp m_ukEPQ; 89 const QRegExp m_ukColonHyphen; 81 90 const QRegExp m_ukPStart; 82 91 const QRegExp m_ukPEnd; 83 92 const QRegExp m_ukSeries1; 84 93 const QRegExp m_ukSeries2; 94 const QRegExp m_ukSeries3; 85 95 const QRegExp m_ukCC; 86 96 const QRegExp m_ukYear; 87 97 const QRegExp m_uk24ep; 88 98 const QRegExp m_ukStarring; 99 const QRegExp m_ukBBC7rpt; 100 const QRegExp m_ukCBBC; 101 const QRegExp m_ukCBeebies; 102 const QRegExp m_ukStarring1; 103 const QRegExp m_ukDoubleDotEnd; 104 const QRegExp m_ukDoubleDotStart; 89 105 const QRegExp m_comHemCountry; 90 106 const QRegExp m_comHemDirector; 91 107 const QRegExp m_comHemActor; -
libs/libmythtv/eitfixup.cpp
17 17 m_bellPPVDescriptionEventId("\\([0-9]{5}\\)"), 18 18 m_ukSubtitle("\\[.*S\\]"), 19 19 m_ukThen("\\s*(Then|Followed by) 60 Seconds\\.", false), 20 m_ukNew("\\s*(Brand New|New) Series\\s*[:\\.\\-]"), 20 m_ukNew("\\s*(Brand New|New)\\s*Series\\s*[:\\.\\-]",false), 21 m_ukNew1("^New\\."), 21 22 m_ukT4("^[tT]4:"), 22 m_ukEQ("[ \\!\\?]"),23 m_ukEQ("[:\\!\\?]"), 23 24 m_ukEPQ("[:\\!\\.\\?]"), 25 m_ukColonHyphen("[:-]"), 24 26 m_ukPStart("^\\.+"), 25 27 m_ukPEnd("\\.+$"), 26 28 m_ukSeries1("^\\s*(\\d{1,2})/(\\d{1,2})\\."), 27 29 m_ukSeries2("\\((Part|Pt)\\s+(\\d{1,2})\\s+of\\s+(\\d{1,2})\\)", false), 30 m_ukSeries3("\\s*Episode\\s+(\\d{1,2})\\s+of\\s+(\\d{1,2})\\.", false), 28 31 m_ukCC("\\[(AD)(,(S)){,1}(,SL){,1}\\]|\\[(S)(,AD){,1}(,SL){,1}\\]|" 29 32 "\\[(SL)(,AD){,1}(,(S)){,1}\\]"), 30 33 m_ukYear("[\\[\\(]([\\d]{4})[\\)\\]]"), 31 34 m_uk24ep("^\\d{1,2}:00[ap]m to \\d{1,2}:00[ap]m: "), 32 35 m_ukStarring("(?:Western\\s)?[Ss]tarring ([\\w\\s\\-']+)[Aa]nd\\s([\\w\\s\\-']+)[\\.|,](?:\\s)*(\\d{4})?(?:\\.\\s)?"), 36 m_ukBBC7rpt("\\[Rptd?[^]]+\\d{1,2}\\.\\d{1,2}[ap]m\\]\\."), 37 m_ukCBBC("^CBBC."), 38 m_ukCBeebies("^CBeebies."), 39 m_ukStarring1("star(ring)"), 40 m_ukDoubleDotEnd("\\.\\.+$"), 41 m_ukDoubleDotStart("^\\.\\.+"), 33 42 m_comHemCountry("^(\\(.+\\))?\\s?([^ ]+)\\s([^\\.0-9]+)" 34 43 "(?:\\sfrån\\s([0-9]{4}))(?:\\smed\\s([^\\.]+))?\\.?"), 35 44 m_comHemDirector("[Rr]egi"), … … 283 292 284 293 } 285 294 295 void EITFixUp::SetUKSubtitle(DBEvent &event) const 296 { 297 QStringList strList1; 298 int position1; 299 int position2; 300 if ((position1 = event.description.find(m_ukEQ)) != -1) 301 { 302 position2 = event.description.find(":",position1+1); 303 if (position2 != -1) 304 { 305 strList1 = QStringList::split(" ", 306 event.description.mid(position1+1, 307 position2-position1+1)); 308 if (strList1.count() < kDotToTitle) 309 { 310 event.subtitle = event.description.left(position2); 311 event.description = event.description.mid(position2 + 1); 312 } 313 else 314 { 315 event.subtitle = event.description.left(position1); 316 event.description = event.description.mid(position1 + 1); 317 } 318 } 319 else 320 { 321 strList1 = QStringList::split(" ", 322 event.description.left(position1)); 323 if (strList1.count() < kDotToTitle) 324 { 325 if ((uint)position1 < SUBTITLE_MAX_LEN) 326 { 327 event.subtitle = event.description.left(position1); 328 event.description = event.description.mid(position1+1); 329 } 330 } 331 } 332 } 333 } 334 286 335 /** \fn EITFixUp::FixUK(DBEvent&) const 287 336 * \brief Use this in the United Kingdom to standardize DVB-T guide. 288 337 */ 289 338 void EITFixUp::FixUK(DBEvent &event) const 290 339 { 291 const uint SUBTITLE_PCT = 30; //% of description to allow subtitle up to 292 const uint SUBTITLE_MAX_LEN = 128; // max length of subtitle field in db. 293 int position = event.description.find("New Series"); 294 if (position != -1) 295 { 296 // Do something here 297 } 340 int position1; 341 int position2; 342 QString strFull; 298 343 299 position = event.description.find(m_ukSubtitle);300 if (position != -1)344 position1 = event.description.find(m_ukSubtitle); 345 if (position1 != -1) 301 346 { 302 347 event.flags |= DBEvent::kSubtitled; 303 event.description.re place(m_ukSubtitle, "");348 event.description.remove(m_ukSubtitle); 304 349 } 305 350 306 351 // BBC three case (could add another record here ?) 307 event.description = event.description.replace(m_ukThen, ""); 308 event.description = event.description.replace(m_ukNew, ""); 309 event.title = event.title.replace(m_ukT4, ""); 352 event.description = event.description.remove(m_ukThen); 353 event.description = event.description.remove(m_ukNew); 310 354 311 // First join up event data, that's spread across title/desc 312 // at this point there is no subtitle. 313 if (event.title.endsWith("...") || 314 event.description.startsWith("..") || 315 event.description.isEmpty()) 316 { 317 // try and make the subtitle 318 QString Full = event.title.replace(m_ukPEnd, "") + " " + 319 event.description.replace(m_ukPStart, ""); 355 event.description = event.description.remove(m_ukNew1); 356 event.title = event.title.remove(m_ukT4); 320 357 321 if ((position = Full.find(m_ukEPQ)) != -1) 322 { 323 if (Full[position] == '!' || Full[position] == '?' || 324 Full[position] == '.') 325 position++; 326 event.title = Full.left(position); 327 event.description = Full.mid(position + 1); 328 } 329 if ((position = event.title.find(m_ukYear)) != -1) 330 { 331 // Looks like they are using the airdate as a delimiter 332 event.description = event.title.mid(position); 333 event.title = event.title.left(position); 334 } 335 } 358 // BBC 7 [Rpt of ...] case. 359 event.description = event.description.remove(m_ukBBC7rpt); 336 360 337 // This is trying to catch the case where the subtitle is in the main title338 // but avoid cases where it isn't a subtitle e.g cd:uk339 if (((position = event.title.find(":")) != -1) &&340 (event.description.find(":") == -1) &&341 (event.title[position + 1].upper() == event.title[position + 1]))342 {343 event.subtitle = event.title.mid(position + 1);344 event.title = event.title.left(position);345 }346 347 // Special case for episodes of 24.348 361 QRegExp tmp24ep = m_uk24ep; 349 if ((position = tmp24ep.search(event.description)) != -1) 362 QStringList strList1; 363 if (!event.title.startsWith("CSI:") && !event.title.startsWith("CD:")) 350 364 { 351 // -2 from the length cause we don't want ": " on the end 352 event.subtitle = event.description.mid(position, tmp24ep.cap(0).length() - 2); 353 event.description = event.description.replace(tmp24ep.cap(0),""); 354 } 355 else if ((position = event.description.find(":")) != -1) 356 { 357 // if the subtitle is less than 50% of the description use it. 358 if (((uint)position < SUBTITLE_MAX_LEN) && 359 ((position*100)/event.description.length() < SUBTITLE_PCT)) 365 if (((position1=event.title.find(m_ukDoubleDotEnd))!=-1) && 366 ((position2=event.description.find(m_ukDoubleDotStart))!=-1)) 360 367 { 361 event.subtitle = event.description.left(position); 362 event.description = event.description.mid(position + 1); 368 strFull = event.title.remove(m_ukDoubleDotEnd)+" "+ 369 event.description.remove(m_ukDoubleDotStart); 370 if ((position1 = strFull.find(m_ukEPQ)) != -1) 371 { 372 if (strFull[position1] == '!' || strFull[position1] == '?' || 373 strFull[position1] == '.') 374 position1++; 375 event.title = strFull.left(position1); 376 event.description = strFull.mid(position1 + 1); 377 SetUKSubtitle(event); 378 } 379 else if ((position1 = strFull.find(m_ukYear)) != -1) 380 { 381 // Looks like they are using the airdate as a delimiter 382 if ((uint)position1 < SUBTITLE_MAX_LEN) 383 { 384 event.description = event.title.mid(position1); 385 event.title = event.title.left(position1); 386 } 387 } 363 388 } 364 } 365 else if (!(((position = event.description.find(m_ukYear)) != -1) && (position < 3)) 366 && ((position = event.description.find(m_ukEPQ)) != -1)) 367 { 368 // only move stuff into the subtitle if the airdate isn't at 369 // the beginning of the description 370 if (((uint)position < SUBTITLE_MAX_LEN) && 371 ((position*100)/event.description.length() < SUBTITLE_PCT)) 389 else if ((position1 = tmp24ep.search(event.description)) != -1) 372 390 { 373 event.subtitle = event.description.left(position + 1); 374 event.description = event.description.mid(position + 2); 391 // Special case for episodes of 24. 392 // -2 from the length cause we don't want ": " on the end 393 event.subtitle = event.description.mid(position1, 394 tmp24ep.cap(0).length() - 2); 395 event.description = event.description.remove(tmp24ep.cap(0)); 375 396 } 397 else if (((position1 = event.title.find(m_ukColonHyphen)) != -1) && 398 (event.description.find(":") == -1)) 399 { 400 if ((uint)position1 < SUBTITLE_MAX_LEN) 401 { 402 event.subtitle = event.title.mid(position1 + 1); 403 event.title = event.title.left(position1); 404 } 405 } 406 else 407 SetUKSubtitle(event); 376 408 } 377 409 378 410 QRegExp tmpStarring = m_ukStarring; … … 381 413 // If the "Starring..." string got promoted to subtitle move it back. 382 414 event.description.prepend(". "); 383 415 event.description.prepend(tmpStarring.cap(0)); 384 event.subtitle.re place(tmpStarring.cap(0), "");416 event.subtitle.remove(tmpStarring.cap(0)); 385 417 } 386 418 tmpStarring = m_ukStarring; 387 419 if (tmpStarring.search(event.description) != -1) … … 398 430 event.originalairdate = QDate(y, 1, 1); 399 431 } 400 432 } 401 // Trim trailing '.'402 event.title.replace(m_ukPEnd, "");403 event.subtitle.replace(m_ukPEnd, "");404 433 405 434 // Work out the episode numbers (if any) 406 435 bool series = false; 407 436 QRegExp tmpExp1 = m_ukSeries1; 408 437 QRegExp tmpExp2 = m_ukSeries2; 409 if ((position = tmpExp1.search(event.title)) != -1) 438 QRegExp tmpExp3 = m_ukSeries3; 439 if ((position1 = tmpExp1.search(event.title)) != -1) 410 440 { 411 441 event.partnumber = tmpExp1.cap(1).toUInt(); 412 442 event.parttotal = tmpExp1.cap(2).toUInt(); 413 443 // Remove from the title 414 444 event.title = 415 event.title.mid(position + tmpExp1.cap(0).length()); 416 // but add it to the description 417 event.description += tmpExp1.cap(0); 445 event.title.mid(position1 + tmpExp1.cap(0).length()); 418 446 series = true; 419 447 } 420 else if ((position = tmpExp1.search(event.subtitle)) != -1)448 else if ((position1 = tmpExp1.search(event.subtitle)) != -1) 421 449 { 422 450 event.partnumber = tmpExp1.cap(1).toUInt(); 423 451 event.parttotal = tmpExp1.cap(2).toUInt(); 424 452 // Remove from the sub title 425 453 event.subtitle = 426 event.subtitle.mid(position + tmpExp1.cap(0).length()); 427 // but add it to the description 428 event.description += tmpExp1.cap(0); 454 event.subtitle.mid(position1 + tmpExp1.cap(0).length()); 429 455 series = true; 430 456 } 431 else if ((position = tmpExp1.search(event.description)) != -1)457 else if ((position1 = tmpExp1.search(event.description)) != -1) 432 458 { 433 459 event.partnumber = tmpExp1.cap(1).toUInt(); 434 460 event.parttotal = tmpExp1.cap(2).toUInt(); 435 // Don't cut it from the description 436 //event.description = event.description.left(position) + 437 // event.description.mid(position + tmpExp1.cap(0).length()); 461 event.description = event.description.left(position1) + 462 event.description.mid(position1 + tmpExp1.cap(0).length()); 438 463 series = true; 439 464 } 440 else if ((position = tmpExp2.search(event.description)) != -1)465 else if ((position1 = tmpExp2.search(event.description)) != -1) 441 466 { 442 467 event.partnumber = tmpExp2.cap(2).toUInt(); 443 468 event.parttotal = tmpExp2.cap(3).toUInt(); 444 // Don't cut it from the description 445 //event.description = event.description.left(position) + 446 // event.description.mid(position + tmpExp2.cap(0).length()); 469 event.description = event.description.left(position1) + 470 event.description.mid(position1 + tmpExp2.cap(0).length()); 447 471 series = true; 448 472 } 473 else if ((position1 = tmpExp3.search(event.description)) != -1) 474 { 475 event.partnumber = tmpExp3.cap(1).toUInt(); 476 event.parttotal = tmpExp3.cap(2).toUInt(); 477 event.description = event.description.left(position1) + 478 event.description.mid(position1 + tmpExp3.cap(0).length()); 479 series = true; 480 } 449 481 if (series) 450 482 event.category_type = kCategorySeries; 451 483 … … 453 485 QStringList captures; 454 486 QStringList::const_iterator it; 455 487 QRegExp tmpUKCC = m_ukCC; 456 if ((position = tmpUKCC.search(event.description)) != -1)488 if ((position1 = tmpUKCC.search(event.description)) != -1) 457 489 { 458 490 // Enumerate throught and see if we have subtitles, don't modify 459 491 // the description as we might destroy other useful information … … 464 496 event.flags |= DBEvent::kSubtitled; 465 497 } 466 498 } 467 else if ((position = tmpUKCC.search(event.subtitle)) != -1)499 else if ((position1 = tmpUKCC.search(event.subtitle)) != -1) 468 500 { 469 501 captures = tmpUKCC.capturedTexts(); 470 502 for (it = captures.begin(); it != captures.end(); ++it) … … 475 507 476 508 // We remove [AD,S] from the subtitle. 477 509 QString stmp = event.subtitle; 478 int itmp = position + tmpUKCC.cap(0).length();479 event.subtitle = stmp.left(position ) + stmp.mid(itmp);510 int itmp = position1 + tmpUKCC.cap(0).length(); 511 event.subtitle = stmp.left(position1) + stmp.mid(itmp); 480 512 } 481 513 482 514 // Work out the year (if any) 483 515 QRegExp tmpUKYear = m_ukYear; 484 if ((position = tmpUKYear.search(event.description)) != -1)516 if ((position1 = tmpUKYear.search(event.description)) != -1) 485 517 { 486 518 QString stmp = event.description; 487 int itmp = position + tmpUKYear.cap(0).length();488 event.description = stmp.left(position ) + stmp.mid(itmp);519 int itmp = position1 + tmpUKYear.cap(0).length(); 520 event.description = stmp.left(position1) + stmp.mid(itmp); 489 521 event.airdate = tmpUKYear.cap(1); 490 522 bool ok; 491 523 uint y = tmpUKYear.cap(1).toUInt(&ok); 492 524 if (ok) 493 525 event.originalairdate = QDate(y, 1, 1); 494 526 } 527 528 // Remove spurious channel names 529 event.subtitle = event.subtitle.remove(m_ukCBBC); 530 event.description = event.description.remove(m_ukCBBC); 531 event.subtitle = event.subtitle.remove(m_ukCBeebies); 532 event.description = event.description.remove(m_ukCBeebies); 533 534 // Trim trailing '.' 535 event.title.remove(m_ukPEnd); 536 event.subtitle.remove(m_ukPEnd); 537 538 if (event.subtitle.isEmpty() && !event.category.startsWith("Movie",false)) 539 { 540 strList1 = QStringList::split(".",event.description); 541 QStringList strList2 = QStringList::split("?",event.description); 542 if (strList1.count()>1 || strList2.count()>1) 543 { 544 QString strEnd; 545 if (strList1[0].length() > strList2[0].length()) 546 { 547 strList1=strList2; 548 strEnd="?"; 549 } 550 strList2 = QStringList::split(" ",strList1[0]); 551 if (strList2.count() < kDotToTitle) 552 { 553 QStringList strList3 = strList2.grep("Drama",false); 554 QStringList strList4 = strList2.grep("sitcom",false); 555 QStringList strList5 = strList2.grep(m_ukStarring1); 556 if ((strList3.count()==0) && (strList4.count()==0) && 557 (strList5.count()==0)) 558 { 559 event.subtitle = strList1[0]+strEnd; 560 event.description= 561 event.description.mid(strList1[0].length()+1); 562 563 } 564 } 565 } 566 } 495 567 } 496 568 497 569 /** \fn EITFixUp::FixPBS(DBEvent&) const