aboutsummaryrefslogtreecommitdiffstats
path: root/string
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2018-07-16 17:50:09 +0100
committerWilco Dijkstra <wdijkstr@arm.com>2018-07-16 17:51:52 +0100
commit3ae725dfb6d7f61447d27d00ed83e573bd5454f4 (patch)
tree1a0eada529fc06de4613cb84a20d80e5d69c6d0a /string
parent31e422ca9680bbe412aa56b23f058c10eaee7be8 (diff)
downloadtermbaud-3ae725dfb6d7f61447d27d00ed83e573bd5454f4.tar.gz
termbaud-3ae725dfb6d7f61447d27d00ed83e573bd5454f4.tar.xz
termbaud-3ae725dfb6d7f61447d27d00ed83e573bd5454f4.zip
Improve strstr performance
Improve strstr performance. Strstr tends to be slow because it uses many calls to memchr and a slow byte loop to scan for the next match. Performance is significantly improved by using strnlen on larger blocks and using strchr to search for the next matching character. strcasestr can also use strnlen to scan ahead, and memmem can use memchr to check for the next match. On the GLIBC bench tests the performance gains on Cortex-A72 are: strstr: +25% strcasestr: +4.3% memmem: +18% On a 256KB dataset strstr performance improves by 67%, strcasestr by 47%. Reviewd-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Diffstat (limited to 'string')
-rw-r--r--string/memmem.c1
-rw-r--r--string/str-two-way.h56
-rw-r--r--string/strcasestr.c4
-rw-r--r--string/strstr.c5
-rw-r--r--string/test-strcasestr.c1
-rw-r--r--string/test-strstr.c1
6 files changed, 35 insertions, 33 deletions
diff --git a/string/memmem.c b/string/memmem.c
index c17e1cf6a63..43efaa3fb71 100644
--- a/string/memmem.c
+++ b/string/memmem.c
@@ -31,6 +31,7 @@
#define RETURN_TYPE void *
#define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
+#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N))
#include "str-two-way.h"
#undef memmem
diff --git a/string/str-two-way.h b/string/str-two-way.h
index cd2605857d9..523d946c594 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
else
{
- const unsigned char *phaystack = &haystack[suffix];
+ const unsigned char *phaystack;
/* The comparison always starts from needle[suffix], so cache it
and use an optimized first-character loop. */
unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
-#if CHECK_EOL
- /* We start matching from the SUFFIX'th element, so make sure we
- don't hit '\0' before that. */
- if (haystack_len < suffix + 1
- && !AVAILABLE (haystack, haystack_len, 0, suffix + 1))
- return NULL;
-#endif
-
/* The two halves of needle are distinct; no extra memory is
required, and any mismatch results in a maximal shift. */
period = MAX (suffix, needle_len - suffix) + 1;
j = 0;
- while (1
-#if !CHECK_EOL
- && AVAILABLE (haystack, haystack_len, j, needle_len)
-#endif
- )
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
{
unsigned char haystack_char;
const unsigned char *pneedle;
- /* TODO: The first-character loop can be sped up by adapting
- longword-at-a-time implementation of memchr/strchr. */
- if (needle_suffix
+ phaystack = &haystack[suffix + j];
+
+#ifdef FASTSEARCH
+ if (*phaystack++ != needle_suffix)
+ {
+ phaystack = FASTSEARCH (phaystack, needle_suffix,
+ haystack_len - needle_len - j);
+ if (phaystack == NULL)
+ goto ret0;
+ j = phaystack - &haystack[suffix];
+ phaystack++;
+ }
+#else
+ while (needle_suffix
!= (haystack_char = CANON_ELEMENT (*phaystack++)))
{
RET0_IF_0 (haystack_char);
-#if !CHECK_EOL
+# if !CHECK_EOL
++j;
-#endif
- continue;
+ if (!AVAILABLE (haystack, haystack_len, j, needle_len))
+ goto ret0;
+# endif
}
-#if CHECK_EOL
+# if CHECK_EOL
/* Calculate J if it wasn't kept up-to-date in the first-character
loop. */
j = phaystack - &haystack[suffix] - 1;
+# endif
#endif
-
/* Scan for matches in right half. */
i = suffix + 1;
pneedle = &needle[i];
@@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
++i;
}
+#if CHECK_EOL
+ /* Update minimal length of haystack. */
+ if (phaystack > haystack + haystack_len)
+ haystack_len = phaystack - haystack;
+#endif
if (needle_len <= i)
{
/* Scan for matches in left half. */
@@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
}
else
j += i - suffix + 1;
-
-#if CHECK_EOL
- if (!AVAILABLE (haystack, haystack_len, j, needle_len))
- break;
-#endif
-
- phaystack = &haystack[suffix + j];
}
}
ret0: __attribute__ ((unused))
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 90ba1897904..5909fe3cdba 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -37,8 +37,8 @@
/* Two-Way algorithm. */
#define RETURN_TYPE char *
#define AVAILABLE(h, h_l, j, n_l) \
- (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
- && ((h_l) = (j) + (n_l)))
+ (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
+ (j) + (n_l) <= (h_l)))
#define CHECK_EOL (1)
#define RET0_IF_0(a) if (!a) goto ret0
#define CANON_ELEMENT(c) TOLOWER (c)
diff --git a/string/strstr.c b/string/strstr.c
index b3b5deb6737..265e9f310ce 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -33,10 +33,11 @@
#define RETURN_TYPE char *
#define AVAILABLE(h, h_l, j, n_l) \
- (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \
- && ((h_l) = (j) + (n_l)))
+ (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
+ (j) + (n_l) <= (h_l)))
#define CHECK_EOL (1)
#define RET0_IF_0(a) if (!a) goto ret0
+#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))
#include "str-two-way.h"
#undef strstr
diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
index 2b0a38ea734..9b1088df54c 100644
--- a/string/test-strcasestr.c
+++ b/string/test-strcasestr.c
@@ -25,6 +25,7 @@
#define STRCASESTR simple_strcasestr
#define NO_ALIAS
#define __strncasecmp strncasecmp
+#define __strnlen strnlen
#include "strcasestr.c"
diff --git a/string/test-strstr.c b/string/test-strstr.c
index acf6ff82246..8d99716ff39 100644
--- a/string/test-strstr.c
+++ b/string/test-strstr.c
@@ -24,6 +24,7 @@
#define STRSTR simple_strstr
#define libc_hidden_builtin_def(arg) /* nothing */
+#define __strnlen strnlen
#include "strstr.c"