| 127 | |
| 128 | // |
| 129 | // This is an implementation of JaroWinkler : http://en.wikipedia.org/wiki/Jaro-Winkler |
| 130 | // |
| 131 | // Jaro is working with a formula where we look to the: |
| 132 | // Number of matching characters : characters that are both in t1 & t2 with an index position diff < (max(t1.length, t2.length)/2) -1 |
| 133 | // Number of transpose characters : number of characters that are not in the same place in t1 & t2 |
| 134 | // JaroWinkler is an extension of Jaro where we apply a coefficient to the start of the string |
| 135 | // |
| 136 | // the code is transpose from : http://www.cppfrance.com/codes/DISTANCE-JARO-WINKLER_49753.aspx |
| 137 | // |
| 138 | |
| 139 | QString FindMatches(const QString &txt,int bl[]) |
| 140 | { |
| 141 | QString res; |
| 142 | QChar ctmp = 'a'; |
| 143 | |
| 144 | int i = 0; |
| 145 | while (i < txt.length()) |
| 146 | { |
| 147 | ctmp = txt[i]; |
| 148 | if (bl[i] == true) |
| 149 | { |
| 150 | res.append(ctmp); |
| 151 | } |
| 152 | i++; |
| 153 | } |
| 154 | return res; |
| 155 | } |
| 156 | |
| 157 | double JaroWinkler(const QString &t1,const QString &t2) |
| 158 | { |
| 159 | int maxSpace,l1,l2, matchingCount , transposalCount , prefixLength , i, j; |
| 160 | QString t1Matche,t2Matche; |
| 161 | double jaroLength; |
| 162 | |
| 163 | l1 = t1.length(); |
| 164 | l2 = t2.length(); |
| 165 | maxSpace = (int)(((double)max(l1,l2)) / 2.00); |
| 166 | matchingCount = 0; |
| 167 | |
| 168 | VERBOSE(VB_XMLTV, QString("Input (%1) (%2)").arg(t1).arg(t2)); |
| 169 | VERBOSE(VB_XMLTV, QString("matchingCount (%1)").arg(maxSpace)); |
| 170 | |
| 171 | int b1[l1+2],b2[l2+2]; |
| 172 | |
| 173 | for (i = 0; i < l1; i++) |
| 174 | b1[i] = false; |
| 175 | |
| 176 | for (i = 0;i < l2; i++) |
| 177 | b2[i] = false; |
| 178 | |
| 179 | for (i = 0; i <l1; i++) |
| 180 | { |
| 181 | for (j = max(i-maxSpace,0); j < min(i+maxSpace,l2); j++) |
| 182 | { |
| 183 | if (t1[i] == t2[j]) |
| 184 | { |
| 185 | b1[i] = true; |
| 186 | b2[i] = true; |
| 187 | matchingCount++; |
| 188 | break; |
| 189 | } |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | VERBOSE(VB_XMLTV, QString("matchingCount (%1)").arg(matchingCount)); |
| 194 | |
| 195 | if (matchingCount == 0) |
| 196 | return 0.0; |
| 197 | |
| 198 | t1Matche = FindMatches(t1,b1); |
| 199 | t2Matche = FindMatches(t2,b2); |
| 200 | |
| 201 | VERBOSE(VB_XMLTV, QString("t1Matche (%1)").arg(t1Matche)); |
| 202 | VERBOSE(VB_XMLTV, QString("t2Matche (%1)").arg(t2Matche)); |
| 203 | |
| 204 | transposalCount = 0; |
| 205 | if (strcmp(t1Matche,t2Matche) != 0) |
| 206 | { |
| 207 | for (i = 0; i < t1Matche.length(); i++) |
| 208 | if (t1Matche[i] != t2Matche[i]) |
| 209 | transposalCount++; |
| 210 | } |
| 211 | else |
| 212 | transposalCount = 0; |
| 213 | |
| 214 | |
| 215 | VERBOSE(VB_XMLTV, QString("transposalCount (%1)").arg(transposalCount)); |
| 216 | |
| 217 | jaroLength = (((double)matchingCount/l1) + |
| 218 | ((double)matchingCount/l2) + |
| 219 | ((matchingCount - transposalCount / 2.0) / matchingCount)) / 3.0; |
| 220 | |
| 221 | VERBOSE(VB_XMLTV, QString("JARO Length: (%1)").arg(jaroLength)); |
| 222 | |
| 223 | prefixLength = 0; |
| 224 | for (i = 0; i < min(3, min(l1,l2)) + 1; i++) //longueur max : 4 |
| 225 | { |
| 226 | if ( i < t1.length() && |
| 227 | i < t2.length() && |
| 228 | t1[i] == t2[i]) |
| 229 | prefixLength++; |
| 230 | else |
| 231 | break; |
| 232 | } |
| 233 | |
| 234 | VERBOSE(VB_XMLTV, QString("prefixLength (%1)").arg(prefixLength)); |
| 235 | |
| 236 | return jaroLength + ( prefixLength * 0.1 * (1 - jaroLength)); |
| 237 | } |
| 238 | |
| 239 | |
| 240 | ChanInfo* _findChanMatch(QList<ChanInfo> *oldchanlist, ChanInfo chan) |
| 241 | { |
| 242 | QList<ChanInfo>::iterator i = oldchanlist->begin(); |
| 243 | QString channelNameUpper = chan.name.toUpper(); |
| 244 | double jaroMax = 0.0; |
| 245 | ChanInfo *jaroMaxChanInfo; |
| 246 | |
| 247 | VERBOSE(VB_XMLTV,QString("============================================")); |
| 248 | VERBOSE(VB_XMLTV,QString("searching for (%1)").arg(channelNameUpper)); |
| 249 | |
| 250 | for (; i != oldchanlist->end(); i++) |
| 251 | { |
| 252 | double d1 = 0; |
| 253 | double d2 = 0; |
| 254 | d1 = JaroWinkler(i->name, channelNameUpper); |
| 255 | d2 = JaroWinkler(i->callsign, channelNameUpper); |
| 256 | |
| 257 | if (d1 > 0.9 && d2 > 0.9) |
| 258 | { |
| 259 | VERBOSE(VB_XMLTV, |
| 260 | QString("Looking for a match (%1), (%2), (%3) <==> (%4) = d1 (%5), d2 (%6)") |
| 261 | .arg((*i).name).arg((*i).callsign).arg((*i).chanstr).arg(channelNameUpper).arg(d1).arg(d2)); |
| 262 | } |
| 263 | |
| 264 | // Look to the best match |
| 265 | if (d1 > 0.5 && d1 > jaroMax) |
| 266 | { |
| 267 | jaroMax = d1; |
| 268 | jaroMaxChanInfo = &(*i); |
| 269 | } |
| 270 | |
| 271 | if (d2 > 0.5 && d2 > jaroMax) |
| 272 | { |
| 273 | jaroMax = d2; |
| 274 | jaroMaxChanInfo = &(*i); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | // only keep match with score > 0.93, under may be a false positive |
| 279 | if (jaroMax > 0.93) |
| 280 | { |
| 281 | VERBOSE(VB_XMLTV, |
| 282 | QString("match is (%1), (%2), (%3) <==> (%4) = d1 (%5)") |
| 283 | .arg(jaroMaxChanInfo->name).arg(jaroMaxChanInfo->callsign).arg(jaroMaxChanInfo->chanstr).arg(channelNameUpper).arg(jaroMax)); |
| 284 | return jaroMaxChanInfo; |
| 285 | } |
| 286 | else |
| 287 | { |
| 288 | VERBOSE(VB_XMLTV, QString("no match")); |
| 289 | return NULL; |
| 290 | } |
| 291 | } |
| 292 | |