Ticket #12436: eitfixup-greek-diff-as-sent-to-04-25.diff

File eitfixup-greek-diff-as-sent-to-04-25.diff, 8.3 KB (added by Yianni Vidalis <yiannividalis@…>, 5 years ago)

diff between v1 and v2

Line 
1diff -u b/mythtv/libs/libmythtv/eitfixup.cpp b/mythtv/libs/libmythtv/eitfixup.cpp
2--- b/mythtv/libs/libmythtv/eitfixup.cpp
3+++ b/mythtv/libs/libmythtv/eitfixup.cpp
4@@ -187,12 +187,15 @@
5       m_AUFreeviewSYC("(.*) \\((.+)\\) \\(([12][0-9][0-9][0-9])\\) \\((.+)\\)$"),
6       m_grReplay("\\([ΕE]\\)"),
7       m_grDescriptionFinale("\\s*΀ελευταίο\\sΕπεισόΎιο\\.\\s*"),
8-      m_grActors("(?:[Ππ]α[ιί]ζουΜ:|[ΜΌ]ε τους:|ΠρωταγωΜιστο[υύ]Îœ:|ΠρωταγωΜιστε[ιί]:?)(?:\\s+στο ρόλο(?: του| της)?\\s(?:\\w+\\s[οη]\\s))?([-\\w\\s]+(?:,[-\\w\\s]+)*)(?:\\W?)"),
9+      m_grActors("(?:[Ππ]α[ιί]ζουΜ:|[ΜΌ]ε τους:|ΠρωταγωΜιστο[υύ]Îœ:|ΠρωταγωΜιστε[ιί]:?)(?:\\s+στο ρόλο(?: του| της)?\\s(?:\\w+\\s[οη]\\s))?([-\\w\\s']+(?:,[-\\w\\s']+)*)(?:κ\\.[αά])?(?:\\W?)"),
10       // cap(1) actors, just names
11+      m_grFixnofullstopActors("(\\w\\s(ΠαίζουΜ:|ΠρωταγωΜ))"),
12+      m_grFixnofullstopDirectors("((\\w\\s(ΣκηΜοΞ[εέ]))"),
13       m_grPeopleSeparator("(,\\s+)"),
14       m_grDirector("(?:ΣκηΜοΞεσία: |ΣκηΜοΞέτης: )(\\w+\\s\\w+\\s?)(?:\\W?)"),
15       m_grPres("(?:Παρουσ[ιί]αση:(?:\\b)*|Παρουσι[αά]ζ(?:ουΜ|ει)(?::|\\sο|\\sη)|Με τ(?:οΜ |ηΜ )(?:[\\s|:|ο|η])*(?:\\b)*)([-\\w\\s]+(?:,[-\\w\\s]+)*)(?:\\W?)"),
16-      m_grYear("(?:\\W?)(?:\\s?παραγωγ[ηή]ς|\\s?-|,)\\s*([1-2]{1}[0-9]{3})",Qt::CaseInsensitive),
17+      m_grYear("(?:\\W?)(?:\\s?παραγωγ[ηή]ς|\\s?-|,)\\s*([1-2]{1}[0-9]{3})(?:-\\d{1,4})?",Qt::CaseInsensitive),
18+      m_grCountry("(?:\\W|\\b)(?:(ελληΜ|τουρκ|αΌερικ[αά]Îœ|γαλλ|αγγλ|βρεττ?αΜ|γερΌαΜ|ρωσσ?|ιταλ|ελβετ|σουηΎ|ισπαΜ|πορτογαλ|ΌεΟικ[αά]Îœ|κιΜ[εέ]ζικ|ιαπωΜ|καΜαΎ|βραζιλι[αά]Îœ)(ικ[ηή][ςσ]))",Qt::CaseInsensitive),
19       m_grlongEp("\\b(?:Επ.|επεισ[οό]Ύιο:?)\\s*(\\d+)(?:\\W?)",Qt::CaseInsensitive),
20       m_grSeason("(?:-\\s)?\\b((\\D{1,2})(?:')?|(\\d{1,2})(?:ος|ου)?)(?:\\sκ[υύ]κλο(?:[σς]|υ)){1}\\s?",Qt::CaseInsensitive),
21       m_grRealTitleinDescription("(?:^\\()([\\w\\s\\d\\D-]+)(?:\\))(?:\\s*)"),
22@@ -203,6 +206,7 @@
23       // cap0 = real title in parentheses.
24       m_grNotPreviouslyShown("(?:\\b[Α1]['η]?\\s*(?:τηλεοπτικ[ηή]\\s*)?(?:Όετ[αά]Ύοση|προβολ[ηή]))(?:\\W?)"),
25       // Try to exctract Greek categories from keywords in description.
26+      m_grEpisodeAsSubtitle("(?:^Επεισ[οό]Ύιο:\\s?)([\\w\\s-,']+)\\.(?:\\s)?"),
27       m_grCategFood("(?:\\W)?(?:εκποΌπ[ηή]\\W)?(ΓαστροΜοΌ[ιί]α[σς]?|Όαγειρικ[ηή][σς]?|chef|συΜταγ[εέηή]|Ύιατροφ|wine|ÎŒ[αά]γειρα[σς]?)(?:\\W)?",Qt::CaseInsensitive),
28       m_grCategDrama("(?:\\W)?(κοιΜωΜικ[ηήό]|ΎραΌατικ[ηή]|Ύρ[αά]Όα)(?:\\W)(?:(?:εκποΌπ[ηή]|σειρ[αά]|ταιΜ[ιί]α)\\W)?",Qt::CaseInsensitive),
29       m_grCategComedy("(?:\\W)?(κωΌικ[ηήοό]|χιουΌοριστικ[ηήοό]|κωΌωΎ[ιί]α)(?:\\W)(?:(?:εκποΌπ[ηή]|σειρ[αά]|ταιΜ[ιί]α)\\W)?",Qt::CaseInsensitive),
30@@ -2353,6 +2357,8 @@
31     }
32     if (!event.subtitle.isEmpty())
33     {
34+        if (event.subtitle.trimmed().right(1) != ".'" )
35+            event.subtitle = event.subtitle.trimmed() + ".";
36         event.description = event.subtitle.trimmed() + QString(" ") + event.description;
37         event.subtitle = QString("");
38     }
39@@ -2388,6 +2394,21 @@
40         event.title = event.title.replace(tmpRegEx, "");
41     }
42 
43+    tmpRegEx = m_grFixnofullstopActors;
44+    position = event.description.indexOf(tmpRegEx);
45+    if (position != -1)
46+    {
47+        event.description.insert(position + 1, ".");
48+    }
49+
50+    // If they forgot the "." at the end of the sentence before the actors/directors begin, let's insert it.
51+    tmpRegEx = m_grFixnofullstopDirectors;
52+    position = event.description.indexOf(tmpRegEx);
53+    if (position != -1)
54+    {
55+        event.description.insert(position + 1, ".");
56+    }
57+
58     // Find actors and director in description
59     // I am looking for actors first and then for directors/presenters because
60     // sometimes punctuation is missing and the "ΠαίζουΜ:" label is mistaken
61@@ -2501,6 +2522,14 @@
62     event.title       = event.title.trimmed();
63     event.subtitle    = event.subtitle.trimmed();
64 
65+    //find country of origin and remove it from description.
66+    tmpRegEx = m_grCountry;
67+    position = event.description.indexOf(tmpRegEx);
68+    if (position != -1)
69+    {
70+        event.description.replace(tmpRegEx, "");
71+    }
72+
73     // Work out the season and episode numbers (if any)
74     // Matching pattern "Επεισ[όο]Ύιο:?|Επ 3 από 14|3/14" etc
75     bool    series  = false;
76@@ -2591,6 +2620,15 @@
77         }
78     }
79     
80+    // Description field: "^Episode: Lion in the cage. (Description follows)"
81+    tmpRegEx = m_grEpisodeAsSubtitle;
82+    position = event.description.indexOf(tmpRegEx);
83+    if (position != -1)
84+    {
85+        event.subtitle = tmpRegEx.cap(1).trimmed();
86+        event.description.replace(tmpRegEx, "");
87+    }
88+
89     if (series)
90         event.categoryType = ProgramInfo::kCategorySeries;
91 
92@@ -2611,6 +2649,10 @@
93     {
94         event.category = "ΚωΌωΎία";
95     }
96+    else if (event.description.indexOf(m_grCategTeleMag) != -1)
97+    {
98+        event.category = "΀ηλεπεριοΎικό";
99+    }
100     else if (event.description.indexOf(m_grCategNature) != -1)
101     {
102         event.category = "ΕπιστήΌη/Ίύση";
103@@ -2625,7 +2667,7 @@
104     }
105     else if (event.description.indexOf(m_grCategDrama) != -1)
106     {
107-        event.category = "ΔραΌατικό";
108+        event.category = "ΚοιΜωΜικό";
109     }
110     else if (event.description.indexOf(m_grCategChildren) != -1)
111     {
112@@ -2657,10 +2699,6 @@
113     {
114         event.category = "΀ηλεπωλήσεις";
115     }
116-    else if (event.description.indexOf(m_grCategTeleMag) != -1)
117-    {
118-        event.category = "΀ηλεπεριοΎικό";
119-    }         
120     else if (event.description.indexOf(m_grCategFood) != -1)
121     {
122         event.category = "ΓαστροΜοΌία";
123diff -u b/mythtv/libs/libmythtv/eitfixup.h b/mythtv/libs/libmythtv/eitfixup.h
124--- b/mythtv/libs/libmythtv/eitfixup.h
125+++ b/mythtv/libs/libmythtv/eitfixup.h
126@@ -244,22 +244,26 @@
127     const QRegExp m_grReplay; //Greek rerun
128     const QRegExp m_grDescriptionFinale; //Greek last m_grEpisode
129     const QRegExp m_grActors; //Greek actors
130+    const QRegExp m_grFixnofullstopActors; //bad punctuation makes the "ΠαίζουΜ:" and the actors' names part of the directors...
131+    const QRegExp m_grFixnofullstopDirectors; //bad punctuation makes the "ΣκηΜοΞ...:" and the previous sentence.
132     const QRegExp m_grPeopleSeparator; // The comma that separates the actors.
133     const QRegExp m_grDirector;
134     const QRegExp m_grPres; // Greek Presenters for shows
135-    const QRegExp m_grYear; // ("[Ππ]αραγωγής\\s(\\d{4})")
136+    const QRegExp m_grYear; // Greek release year.
137+    const QRegExp m_grCountry; // Greek event country of origin.
138     const QRegExp m_grlongEp; // Greek Episode
139     const QRegExp m_grSeason; // Greek Season
140     const QRegExp m_grSeries;
141     const QRegExp m_grRealTitleinDescription; // The original title is often in the descr in parenthesis.
142     const QRegExp m_grRealTitleinTitle; // The original title is often in the title in parenthesis.
143     const QRegExp m_grNotPreviouslyShown; // Not previously shown on TV
144+    const QRegExp m_grEpisodeAsSubtitle; // Description field: "^Episode: Lion in the cage. (Description follows)"
145     const QRegExp m_grCategFood; // Greek category food
146     const QRegExp m_grCategDrama; // Greek category social/drama
147     const QRegExp m_grCategComedy; // Greek category comedy
148     const QRegExp m_grCategChildren; // Greek category for children / cartoons
149-    const QRegExp m_grCategFantasy; // Greek category for fantasy
150     const QRegExp m_grCategMystery; // Greek category for mystery
151+    const QRegExp m_grCategFantasy; // Greek category for fantasy
152     const QRegExp m_grCategHistory; //Greek category for historical movie/series
153     const QRegExp m_grCategTeleMag; //Greek category for Telemagazine show
154     const QRegExp m_grCategTeleShop; //Greek category for teleshopping