Title: gh-94808: Cover `PyUnicode_Count` in CAPI by sobolevn · Pull Request #96929 · python/cpython · GitHub
Open Graph Title: gh-94808: Cover `PyUnicode_Count` in CAPI by sobolevn · Pull Request #96929 · python/cpython
X Title: gh-94808: Cover `PyUnicode_Count` in CAPI by sobolevn · Pull Request #96929 · python/cpython
Description: It is heavily inspired by cpython/Lib/test/string_tests.py Lines 99 to 161 in cbdeda8 def test_count(self): self.checkequal(3, 'aaa', 'count', 'a') self.checkequal(0, 'aaa', 'count', 'b') self.checkequal(3, 'aaa', 'count', 'a') self.checkequal(0, 'aaa', 'count', 'b') self.checkequal(3, 'aaa', 'count', 'a') self.checkequal(0, 'aaa', 'count', 'b') self.checkequal(0, 'aaa', 'count', 'b') self.checkequal(2, 'aaa', 'count', 'a', 1) self.checkequal(0, 'aaa', 'count', 'a', 10) self.checkequal(1, 'aaa', 'count', 'a', -1) self.checkequal(3, 'aaa', 'count', 'a', -10) self.checkequal(1, 'aaa', 'count', 'a', 0, 1) self.checkequal(3, 'aaa', 'count', 'a', 0, 10) self.checkequal(2, 'aaa', 'count', 'a', 0, -1) self.checkequal(0, 'aaa', 'count', 'a', 0, -10) self.checkequal(3, 'aaa', 'count', '', 1) self.checkequal(1, 'aaa', 'count', '', 3) self.checkequal(0, 'aaa', 'count', '', 10) self.checkequal(2, 'aaa', 'count', '', -1) self.checkequal(4, 'aaa', 'count', '', -10) self.checkequal(1, '', 'count', '') self.checkequal(0, '', 'count', '', 1, 1) self.checkequal(0, '', 'count', '', sys.maxsize, 0) self.checkequal(0, '', 'count', 'xx') self.checkequal(0, '', 'count', 'xx', 1, 1) self.checkequal(0, '', 'count', 'xx', sys.maxsize, 0) self.checkraises(TypeError, 'hello', 'count') if self.contains_bytes: self.checkequal(0, 'hello', 'count', 42) else: self.checkraises(TypeError, 'hello', 'count', 42) # For a variety of combinations, # verify that str.count() matches an equivalent function # replacing all occurrences and then differencing the string lengths charset = ['', 'a', 'b'] digits = 7 base = len(charset) teststrings = set() for i in range(base ** digits): entry = [] for j in range(digits): i, m = divmod(i, base) entry.append(charset[m]) teststrings.add(''.join(entry)) teststrings = [self.fixtype(ts) for ts in teststrings] for i in teststrings: n = len(i) for j in teststrings: r1 = i.count(j) if j: r2, rem = divmod(n - len(i.replace(j, self.fixtype(''))), len(j)) else: r2, rem = len(i)+1, 0 if rem or r1 != r2: self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i)) self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i)) Question: what is the historical context on why PyUnicode_Count is not reused in unicode_count? They look pretty similar: cpython/Objects/unicodeobject.c Lines 8968 to 9040 in cbdeda8 Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end) { Py_ssize_t result; int kind1, kind2; const void *buf1 = NULL, *buf2 = NULL; Py_ssize_t len1, len2; if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) return -1; kind1 = PyUnicode_KIND(str); kind2 = PyUnicode_KIND(substr); if (kind1 < kind2) return 0; len1 = PyUnicode_GET_LENGTH(str); len2 = PyUnicode_GET_LENGTH(substr); ADJUST_INDICES(start, end, len1); if (end - start < len2) return 0; buf1 = PyUnicode_DATA(str); buf2 = PyUnicode_DATA(substr); if (kind2 != kind1) { buf2 = unicode_askind(kind2, buf2, len2, kind1); if (!buf2) goto onError; } switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr)) result = asciilib_count( ((const Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); else result = ucs1lib_count( ((const Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_2BYTE_KIND: result = ucs2lib_count( ((const Py_UCS2*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_4BYTE_KIND: result = ucs4lib_count( ((const Py_UCS4*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; default: Py_UNREACHABLE(); } assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); if (kind2 != kind1) PyMem_Free((void *)buf2); return result; onError: assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); if (kind2 != kind1) PyMem_Free((void *)buf2); return -1; } And cpython/Objects/unicodeobject.c Lines 10854 to 10916 in cbdeda8 static PyObject * unicode_count(PyObject *self, PyObject *args) { PyObject *substring = NULL; /* initialize to fix a compiler warning */ Py_ssize_t start = 0; Py_ssize_t end = PY_SSIZE_T_MAX; PyObject *result; int kind1, kind2; const void *buf1, *buf2; Py_ssize_t len1, len2, iresult; if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) return NULL; kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); if (kind1 < kind2) return PyLong_FromLong(0); len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); ADJUST_INDICES(start, end, len1); if (end - start < len2) return PyLong_FromLong(0); buf1 = PyUnicode_DATA(self); buf2 = PyUnicode_DATA(substring); if (kind2 != kind1) { buf2 = unicode_askind(kind2, buf2, len2, kind1); if (!buf2) return NULL; } switch (kind1) { case PyUnicode_1BYTE_KIND: iresult = ucs1lib_count( ((const Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_2BYTE_KIND: iresult = ucs2lib_count( ((const Py_UCS2*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; case PyUnicode_4BYTE_KIND: iresult = ucs4lib_count( ((const Py_UCS4*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX ); break; default: Py_UNREACHABLE(); } result = PyLong_FromSsize_t(iresult); assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring))); if (kind2 != kind1) PyMem_Free((void *)buf2); return result; } Issue: gh-94808
Open Graph Description: It is heavily inspired by cpython/Lib/test/string_tests.py Lines 99 to 161 in cbdeda8 def test_count(sel...
X Description: It is heavily inspired by cpython/Lib/test/string_tests.py Lines 99 to 161 in cbdeda8 def test_count(sel...
Opengraph URL: https://github.com/python/cpython/pull/96929
X: @github
Domain: github.com
| route-pattern | /:user_id/:repository/pull/:id/files(.:format) |
| route-controller | pull_requests |
| route-action | files |
| fetch-nonce | v2:8d74f29a-1959-76c0-7f67-6098fc5677b7 |
| current-catalog-service-hash | ae870bc5e265a340912cde392f23dad3671a0a881730ffdadd82f2f57d81641b |
| request-id | B614:2BBCDD:10342A1:15BBA7B:6969933D |
| html-safe-nonce | 5c1ff4cca43f57e441ca2dd40732e90c4b8e1b4f7754b3a2c9fa71a19395aada |
| visitor-payload | eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiJCNjE0OjJCQkNERDoxMDM0MkExOjE1QkJBN0I6Njk2OTkzM0QiLCJ2aXNpdG9yX2lkIjoiNTk1NzEyOTA5NDU0MzU0NTE0OSIsInJlZ2lvbl9lZGdlIjoiaWFkIiwicmVnaW9uX3JlbmRlciI6ImlhZCJ9 |
| visitor-hmac | c9e2dbc46183015c527bc9d0da27db78f708442c05877f3739c8a07594511e38 |
| hovercard-subject-tag | pull_request:1060045301 |
| github-keyboard-shortcuts | repository,pull-request-list,pull-request-conversation,pull-request-files-changed,copilot |
| google-site-verification | Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I |
| octolytics-url | https://collector.github.com/github/collect |
| analytics-location | / |
| fb:app_id | 1401488693436528 |
| apple-itunes-app | app-id=1477376905, app-argument=https://github.com/python/cpython/pull/96929/files |
| twitter:image | https://avatars.githubusercontent.com/u/4660275?s=400&v=4 |
| twitter:card | summary_large_image |
| og:image | https://avatars.githubusercontent.com/u/4660275?s=400&v=4 |
| og:image:alt | It is heavily inspired by cpython/Lib/test/string_tests.py Lines 99 to 161 in cbdeda8 def test_count(sel... |
| og:site_name | GitHub |
| og:type | object |
| hostname | github.com |
| expected-hostname | github.com |
| None | 3542e147982176a7ebaa23dfb559c8af16f721c03ec560c68c56b64a0f35e751 |
| turbo-cache-control | no-preview |
| diff-view | unified |
| go-import | github.com/python/cpython git https://github.com/python/cpython.git |
| octolytics-dimension-user_id | 1525981 |
| octolytics-dimension-user_login | python |
| octolytics-dimension-repository_id | 81598961 |
| octolytics-dimension-repository_nwo | python/cpython |
| octolytics-dimension-repository_public | true |
| octolytics-dimension-repository_is_fork | false |
| octolytics-dimension-repository_network_root_id | 81598961 |
| octolytics-dimension-repository_network_root_nwo | python/cpython |
| turbo-body-classes | logged-out env-production page-responsive full-width |
| disable-turbo | true |
| browser-stats-url | https://api.github.com/_private/browser/stats |
| browser-errors-url | https://api.github.com/_private/browser/errors |
| release | af80af7cc9e3de9c336f18b208a600950a3c187c |
| ui-target | full |
| theme-color | #1e2327 |
| color-scheme | light dark |
Links:
Viewport: width=device-width